summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
commitfcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch)
tree22962a4387943edc841c72a4e636a068c66d58fd /net/netfilter
downloadast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip
ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz
Initial import of modified Linux 2.6.28 tree
Original upstream URL: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig861
-rw-r--r--net/netfilter/Makefile94
-rw-r--r--net/netfilter/core.c283
-rw-r--r--net/netfilter/ipvs/Kconfig241
-rw-r--r--net/netfilter/ipvs/Makefile33
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c622
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1110
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c1542
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3443
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c261
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c166
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c410
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c555
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c755
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c103
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c138
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c288
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c235
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c732
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c533
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c112
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c251
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c140
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c258
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c942
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c128
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c237
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c1005
-rw-r--r--net/netfilter/nf_conntrack_acct.c148
-rw-r--r--net/netfilter/nf_conntrack_amanda.c235
-rw-r--r--net/netfilter/nf_conntrack_core.c1280
-rw-r--r--net/netfilter/nf_conntrack_ecache.c128
-rw-r--r--net/netfilter/nf_conntrack_expect.c612
-rw-r--r--net/netfilter/nf_conntrack_extend.c191
-rw-r--r--net/netfilter/nf_conntrack_ftp.c594
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c888
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1833
-rw-r--r--net/netfilter/nf_conntrack_h323_types.c1922
-rw-r--r--net/netfilter/nf_conntrack_helper.c213
-rw-r--r--net/netfilter/nf_conntrack_irc.c290
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c74
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c126
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1838
-rw-r--r--net/netfilter/nf_conntrack_pptp.c629
-rw-r--r--net/netfilter/nf_conntrack_proto.c370
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c816
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c109
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c353
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c746
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c1440
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c231
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c242
-rw-r--r--net/netfilter/nf_conntrack_sane.c236
-rw-r--r--net/netfilter/nf_conntrack_sip.c1377
-rw-r--r--net/netfilter/nf_conntrack_standalone.c519
-rw-r--r--net/netfilter/nf_conntrack_tftp.c152
-rw-r--r--net/netfilter/nf_internals.h38
-rw-r--r--net/netfilter/nf_log.c172
-rw-r--r--net/netfilter/nf_queue.c357
-rw-r--r--net/netfilter/nf_sockopt.c169
-rw-r--r--net/netfilter/nf_tproxy_core.c95
-rw-r--r--net/netfilter/nfnetlink.c207
-rw-r--r--net/netfilter/nfnetlink_log.c982
-rw-r--r--net/netfilter/nfnetlink_queue.c943
-rw-r--r--net/netfilter/x_tables.c1078
-rw-r--r--net/netfilter/xt_CLASSIFY.c61
-rw-r--r--net/netfilter/xt_CONNMARK.c225
-rw-r--r--net/netfilter/xt_CONNSECMARK.c144
-rw-r--r--net/netfilter/xt_DSCP.c210
-rw-r--r--net/netfilter/xt_MARK.c201
-rw-r--r--net/netfilter/xt_NFLOG.c71
-rw-r--r--net/netfilter/xt_NFQUEUE.c69
-rw-r--r--net/netfilter/xt_NOTRACK.c53
-rw-r--r--net/netfilter/xt_RATEEST.c183
-rw-r--r--net/netfilter/xt_SECMARK.c146
-rw-r--r--net/netfilter/xt_TCPMSS.c314
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c143
-rw-r--r--net/netfilter/xt_TPROXY.c102
-rw-r--r--net/netfilter/xt_TRACE.c40
-rw-r--r--net/netfilter/xt_comment.c45
-rw-r--r--net/netfilter/xt_connbytes.c145
-rw-r--r--net/netfilter/xt_connlimit.c296
-rw-r--r--net/netfilter/xt_connmark.c166
-rw-r--r--net/netfilter/xt_conntrack.c385
-rw-r--r--net/netfilter/xt_dccp.c181
-rw-r--r--net/netfilter/xt_dscp.c132
-rw-r--r--net/netfilter/xt_esp.c113
-rw-r--r--net/netfilter/xt_hashlimit.c1020
-rw-r--r--net/netfilter/xt_helper.c97
-rw-r--r--net/netfilter/xt_iprange.c174
-rw-r--r--net/netfilter/xt_length.c70
-rw-r--r--net/netfilter/xt_limit.c190
-rw-r--r--net/netfilter/xt_mac.c61
-rw-r--r--net/netfilter/xt_mark.c119
-rw-r--r--net/netfilter/xt_multiport.c248
-rw-r--r--net/netfilter/xt_owner.c188
-rw-r--r--net/netfilter/xt_physdev.c136
-rw-r--r--net/netfilter/xt_pkttype.c65
-rw-r--r--net/netfilter/xt_policy.c191
-rw-r--r--net/netfilter/xt_quota.c72
-rw-r--r--net/netfilter/xt_rateest.c156
-rw-r--r--net/netfilter/xt_realm.c54
-rw-r--r--net/netfilter/xt_recent.c687
-rw-r--r--net/netfilter/xt_sctp.c196
-rw-r--r--net/netfilter/xt_socket.c185
-rw-r--r--net/netfilter/xt_state.c87
-rw-r--r--net/netfilter/xt_statistic.c84
-rw-r--r--net/netfilter/xt_string.c113
-rw-r--r--net/netfilter/xt_tcpmss.c110
-rw-r--r--net/netfilter/xt_tcpudp.c240
-rw-r--r--net/netfilter/xt_time.c260
-rw-r--r--net/netfilter/xt_u32.c124
112 files changed, 45463 insertions, 0 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
new file mode 100644
index 0000000..25dcef9
--- /dev/null
+++ b/net/netfilter/Kconfig
@@ -0,0 +1,861 @@
+menu "Core Netfilter Configuration"
+ depends on NET && INET && NETFILTER
+
+config NETFILTER_NETLINK
+ tristate
+
+config NETFILTER_NETLINK_QUEUE
+ tristate "Netfilter NFQUEUE over NFNETLINK interface"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK
+ help
+ If this option is enabled, the kernel will include support
+ for queueing packets via NFNETLINK.
+
+config NETFILTER_NETLINK_LOG
+ tristate "Netfilter LOG over NFNETLINK interface"
+ default m if NETFILTER_ADVANCED=n
+ select NETFILTER_NETLINK
+ help
+ If this option is enabled, the kernel will include support
+ for logging packets via NFNETLINK.
+
+ This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms,
+ and is also scheduled to replace the old syslog-based ipt_LOG
+ and ip6t_LOG modules.
+
+config NF_CONNTRACK
+ tristate "Netfilter connection tracking support"
+ default m if NETFILTER_ADVANCED=n
+ help
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is required to do Masquerading or other kinds of Network
+ Address Translation. It can also be used to enhance packet
+ filtering (see `Connection state match support' below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if NF_CONNTRACK
+
+config NF_CT_ACCT
+ bool "Connection tracking flow accounting"
+ depends on NETFILTER_ADVANCED
+ help
+ If this option is enabled, the connection tracking code will
+ keep per-flow packet and byte counters.
+
+ Those counters can be used for flow-based accounting or the
+ `connbytes' match.
+
+ Please note that currently this option only sets a default state.
+ You may change it at boot time with nf_conntrack.acct=0/1 kernel
+ paramater or by loading the nf_conntrack module with acct=0/1.
+
+ You may also disable/enable it on a running system with:
+ sysctl net.netfilter.nf_conntrack_acct=0/1
+
+ This option will be removed in 2.6.29.
+
+ If unsure, say `N'.
+
+config NF_CONNTRACK_MARK
+ bool 'Connection mark tracking support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option enables support for connection marks, used by the
+ `CONNMARK' target and `connmark' match. Similar to the mark value
+ of packets, but this mark value is kept in the conntrack session
+ instead of the individual packets.
+
+config NF_CONNTRACK_SECMARK
+ bool 'Connection tracking security mark support'
+ depends on NETWORK_SECMARK
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option enables security markings to be applied to
+ connections. Typically they are copied to connections from
+ packets using the CONNSECMARK target and copied back from
+ connections to packets with the same target, with the packets
+ being originally labeled via SECMARK.
+
+ If unsure, say 'N'.
+
+config NF_CONNTRACK_EVENTS
+ bool "Connection tracking events"
+ depends on NETFILTER_ADVANCED
+ help
+ If this option is enabled, the connection tracking code will
+ provide a notifier chain that can be used by other kernel code
+ to get notified about changes in the connection tracking state.
+
+ If unsure, say `N'.
+
+config NF_CT_PROTO_DCCP
+ tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on NETFILTER_ADVANCED
+ default IP_DCCP
+ help
+ With this option enabled, the layer 3 independent connection
+ tracking code will be able to do state tracking on DCCP connections.
+
+ If unsure, say 'N'.
+
+config NF_CT_PROTO_GRE
+ tristate
+
+config NF_CT_PROTO_SCTP
+ tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on NETFILTER_ADVANCED
+ default IP_SCTP
+ help
+ With this option enabled, the layer 3 independent connection
+ tracking code will be able to do state tracking on SCTP connections.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NF_CT_PROTO_UDPLITE
+ tristate 'UDP-Lite protocol connection tracking support'
+ depends on NETFILTER_ADVANCED
+ help
+ With this option enabled, the layer 3 independent connection
+ tracking code will be able to do state tracking on UDP-Lite
+ connections.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_AMANDA
+ tristate "Amanda backup protocol support"
+ depends on NETFILTER_ADVANCED
+ select TEXTSEARCH
+ select TEXTSEARCH_KMP
+ help
+ If you are running the Amanda backup package <http://www.amanda.org/>
+ on this machine or machines that will be MASQUERADED through this
+ machine, then you may want to enable this feature. This allows the
+ connection tracking and natting code to allow the sub-channels that
+ Amanda requires for communication of the backup data, messages and
+ index.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_FTP
+ tristate "FTP protocol support"
+ default m if NETFILTER_ADVANCED=n
+ help
+ Tracking FTP connections is problematic: special helpers are
+ required for tracking them, and doing masquerading and other forms
+ of Network Address Translation on them.
+
+ This is FTP support on Layer 3 independent connection tracking.
+ Layer 3 independent connection tracking is experimental scheme
+ which generalize ip_conntrack to support other layer 3 protocols.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_H323
+ tristate "H.323 protocol support"
+ depends on (IPV6 || IPV6=n)
+ depends on NETFILTER_ADVANCED
+ help
+ H.323 is a VoIP signalling protocol from ITU-T. As one of the most
+ important VoIP protocols, it is widely used by voice hardware and
+ software including voice gateways, IP phones, Netmeeting, OpenPhone,
+ Gnomemeeting, etc.
+
+ With this module you can support H.323 on a connection tracking/NAT
+ firewall.
+
+ This module supports RAS, Fast Start, H.245 Tunnelling, Call
+ Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
+ whiteboard, file transfer, etc. For more information, please
+ visit http://nath323.sourceforge.net/.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_IRC
+ tristate "IRC protocol support"
+ default m if NETFILTER_ADVANCED=n
+ help
+ There is a commonly-used extension to IRC called
+ Direct Client-to-Client Protocol (DCC). This enables users to send
+ files to each other, and also chat to each other without the need
+ of a server. DCC Sending is used anywhere you send files over IRC,
+ and DCC Chat is most commonly used by Eggdrop bots. If you are
+ using NAT, this extension will enable you to send files and initiate
+ chats. Note that you do NOT need this extension to get files or
+ have others initiate chats, or everything else in IRC.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_NETBIOS_NS
+ tristate "NetBIOS name service protocol support"
+ depends on NETFILTER_ADVANCED
+ help
+ NetBIOS name service requests are sent as broadcast messages from an
+ unprivileged port and responded to with unicast messages to the
+ same port. This make them hard to firewall properly because connection
+ tracking doesn't deal with broadcasts. This helper tracks locally
+ originating NetBIOS name service requests and the corresponding
+ responses. It relies on correct IP address configuration, specifically
+ netmask and broadcast address. When properly configured, the output
+ of "ip address show" should look similar to this:
+
+ $ ip -4 address show eth0
+ 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
+ inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_PPTP
+ tristate "PPtP protocol support"
+ depends on NETFILTER_ADVANCED
+ select NF_CT_PROTO_GRE
+ help
+ This module adds support for PPTP (Point to Point Tunnelling
+ Protocol, RFC2637) connection tracking and NAT.
+
+ If you are running PPTP sessions over a stateful firewall or NAT
+ box, you may want to enable this feature.
+
+ Please note that not all PPTP modes of operation are supported yet.
+ Specifically these limitations exist:
+ - Blindly assumes that control connections are always established
+ in PNS->PAC direction. This is a violation of RFC2637.
+ - Only supports a single call within each session
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_SANE
+ tristate "SANE protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on NETFILTER_ADVANCED
+ help
+ SANE is a protocol for remote access to scanners as implemented
+ by the 'saned' daemon. Like FTP, it uses separate control and
+ data connections.
+
+ With this module you can support SANE on a connection tracking
+ firewall.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_SIP
+ tristate "SIP protocol support"
+ default m if NETFILTER_ADVANCED=n
+ help
+ SIP is an application-layer control protocol that can establish,
+ modify, and terminate multimedia sessions (conferences) such as
+ Internet telephony calls. With the ip_conntrack_sip and
+ the nf_nat_sip modules you can support the protocol on a connection
+ tracking/NATing firewall.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_TFTP
+ tristate "TFTP protocol support"
+ depends on NETFILTER_ADVANCED
+ help
+ TFTP connection tracking helper, this is required depending
+ on how restrictive your ruleset is.
+ If you are using a tftp client behind -j SNAT or -j MASQUERADING
+ you will need this.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CT_NETLINK
+ tristate 'Connection tracking netlink interface'
+ select NETFILTER_NETLINK
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option enables support for a netlink-based userspace interface
+
+# transparent proxy support
+config NETFILTER_TPROXY
+ tristate "Transparent proxying support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on IP_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ help
+ This option enables transparent proxying support, that is,
+ support for handling non-locally bound IPv4 TCP and UDP sockets.
+ For it to work you will have to configure certain iptables rules
+ and use policy routing. For more information on how to set it up
+ see Documentation/networking/tproxy.txt.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+endif # NF_CONNTRACK
+
+config NETFILTER_XTABLES
+ tristate "Netfilter Xtables support (required for ip_tables)"
+ default m if NETFILTER_ADVANCED=n
+ help
+ This is required if you intend to use any of ip_tables,
+ ip6_tables or arp_tables.
+
+if NETFILTER_XTABLES
+
+# alphabetically ordered list of targets
+
+config NETFILTER_XT_TARGET_CLASSIFY
+ tristate '"CLASSIFY" target support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `CLASSIFY' target, which enables the user to set
+ the priority of a packet. Some qdiscs can use this value for
+ classification, among these are:
+
+ atm, cbq, dsmark, pfifo_fast, htb, prio
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_CONNMARK
+ tristate '"CONNMARK" target support'
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_MARK
+ help
+ This option adds a `CONNMARK' target, which allows one to manipulate
+ the connection mark value. Similar to the MARK target, but
+ affects the connection mark value rather than the packet mark value.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. The module will be called
+ ipt_CONNMARK.ko. If unsure, say `N'.
+
+config NETFILTER_XT_TARGET_CONNSECMARK
+ tristate '"CONNSECMARK" target support'
+ depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK
+ default m if NETFILTER_ADVANCED=n
+ help
+ The CONNSECMARK target copies security markings from packets
+ to connections, and restores security markings from connections
+ to packets (if the packets are not already marked). This would
+ normally be used in conjunction with the SECMARK target.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_DSCP
+ tristate '"DSCP" and "TOS" target support'
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `DSCP' target, which allows you to manipulate
+ the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+ The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+ It also adds the "TOS" target, which allows you to create rules in
+ the "mangle" table which alter the Type Of Service field of an IPv4
+ or the Priority field of an IPv6 packet, prior to routing.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_MARK
+ tristate '"MARK" target support'
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `MARK' target, which allows you to create rules
+ in the `mangle' table which alter the netfilter mark (nfmark) field
+ associated with the packet prior to routing. This can change
+ the routing method (see `Use netfilter MARK value as routing
+ key') and can also be used by other subsystems to change their
+ behavior.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_NFLOG
+ tristate '"NFLOG" target support'
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option enables the NFLOG target, which allows to LOG
+ messages through the netfilter logging API, which can use
+ either the old LOG target, the old ULOG target or nfnetlink_log
+ as backend.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_NFQUEUE
+ tristate '"NFQUEUE" target Support'
+ depends on NETFILTER_ADVANCED
+ help
+ This target replaced the old obsolete QUEUE target.
+
+ As opposed to QUEUE, it supports 65535 different queues,
+ not just one.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_NOTRACK
+ tristate '"NOTRACK" target support'
+ depends on IP_NF_RAW || IP6_NF_RAW
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ help
+ The NOTRACK target allows a select rule to specify
+ which packets *not* to enter the conntrack/NAT
+ subsystem with all the consequences (no ICMP error tracking,
+ no protocol helpers for the selected packets).
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_TARGET_RATEEST
+ tristate '"RATEEST" target support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `RATEEST' target, which allows to measure
+ rates similar to TC estimators. The `rateest' match can be
+ used to match on the measured rates.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_TPROXY
+ tristate '"TPROXY" target support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on NETFILTER_TPROXY
+ depends on NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
+ select NF_DEFRAG_IPV4
+ help
+ This option adds a `TPROXY' target, which is somewhat similar to
+ REDIRECT. It can only be used in the mangle table and is useful
+ to redirect traffic to a transparent proxy. It does _not_ depend
+ on Netfilter connection tracking and NAT, unlike REDIRECT.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_TRACE
+ tristate '"TRACE" target support'
+ depends on IP_NF_RAW || IP6_NF_RAW
+ depends on NETFILTER_ADVANCED
+ help
+ The TRACE target allows you to mark packets so that the kernel
+ will log every rule which match the packets as those traverse
+ the tables, chains, rules.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_TARGET_SECMARK
+ tristate '"SECMARK" target support'
+ depends on NETWORK_SECMARK
+ default m if NETFILTER_ADVANCED=n
+ help
+ The SECMARK target allows security marking of network
+ packets, for use with security subsystems.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_TCPMSS
+ tristate '"TCPMSS" target support'
+ depends on (IPV6 || IPV6=n)
+ default m if NETFILTER_ADVANCED=n
+ ---help---
+ This option adds a `TCPMSS' target, which allows you to alter the
+ MSS value of TCP SYN packets, to control the maximum size for that
+ connection (usually limiting it to your outgoing interface's MTU
+ minus 40).
+
+ This is used to overcome criminally braindead ISPs or servers which
+ block ICMP Fragmentation Needed packets. The symptoms of this
+ problem are that everything works fine from your Linux
+ firewall/router, but machines behind it can never exchange large
+ packets:
+ 1) Web browsers connect, then hang with no data received.
+ 2) Small mail works fine, but large emails hang.
+ 3) ssh works fine, but scp hangs after initial handshaking.
+
+ Workaround: activate this option and add a rule to your firewall
+ configuration like:
+
+ iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
+ -j TCPMSS --clamp-mss-to-pmtu
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_TARGET_TCPOPTSTRIP
+ tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a "TCPOPTSTRIP" target, which allows you to strip
+ TCP options from TCP packets.
+
+config NETFILTER_XT_MATCH_COMMENT
+ tristate '"comment" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `comment' dummy-match, which allows you to put
+ comments in your iptables ruleset.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNBYTES
+ tristate '"connbytes" per-connection counter match support'
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ select NF_CT_ACCT
+ help
+ This option adds a `connbytes' match, which allows you to match the
+ number of bytes and/or packets for each direction within a connection.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNLIMIT
+ tristate '"connlimit" match support"'
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This match allows you to match against the number of parallel
+ connections to a server per client IP address (or address block).
+
+config NETFILTER_XT_MATCH_CONNMARK
+ tristate '"connmark" connection mark match support'
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_MARK
+ help
+ This option adds a `connmark' match, which allows you to match the
+ connection mark value previously set for the session by `CONNMARK'.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. The module will be called
+ ipt_connmark.ko. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_CONNTRACK
+ tristate '"conntrack" connection tracking match support'
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ help
+ This is a general conntrack match module, a superset of the state match.
+
+ It allows matching on additional conntrack information, which is
+ useful in complex configurations, such as NAT gateways with multiple
+ internet links or tunnels.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_DCCP
+ tristate '"dccp" protocol match support'
+ depends on NETFILTER_ADVANCED
+ default IP_DCCP
+ help
+ With this option enabled, you will be able to use the iptables
+ `dccp' match in order to match on DCCP source/destination ports
+ and DCCP flags.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_DSCP
+ tristate '"dscp" and "tos" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `DSCP' match, which allows you to match against
+ the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+ The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+ It will also add a "tos" match, which allows you to match packets
+ based on the Type Of Service fields of the IPv4 packet (which share
+ the same bits as DSCP).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_ESP
+ tristate '"esp" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This match extension allows you to match a range of SPIs
+ inside ESP header of IPSec packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_HASHLIMIT
+ tristate '"hashlimit" match support'
+ depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `hashlimit' match.
+
+ As opposed to `limit', this match dynamically creates a hash table
+ of limit buckets, based on your selection of source/destination
+ addresses and/or ports.
+
+ It enables you to express policies like `10kpps for any given
+ destination address' or `500pps from any given source address'
+ with a single rule.
+
+config NETFILTER_XT_MATCH_HELPER
+ tristate '"helper" match support'
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ help
+ Helper matching allows you to match packets in dynamic connections
+ tracked by a conntrack-helper, ie. ip_conntrack_ftp
+
+ To compile it as a module, choose M here. If unsure, say Y.
+
+config NETFILTER_XT_MATCH_IPRANGE
+ tristate '"iprange" address range match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds a "iprange" match, which allows you to match based on
+ an IP address range. (Normal iptables only matches on single addresses
+ with an optional mask.)
+
+ If unsure, say M.
+
+config NETFILTER_XT_MATCH_LENGTH
+ tristate '"length" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option allows you to match the length of a packet against a
+ specific value or range of values.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_LIMIT
+ tristate '"limit" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ limit matching allows you to control the rate at which a rule can be
+ matched: mainly useful in combination with the LOG target ("LOG
+ target support", below) and to avoid some Denial of Service attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_MAC
+ tristate '"mac" address match support'
+ depends on NETFILTER_ADVANCED
+ help
+ MAC matching allows you to match packets based on the source
+ Ethernet address of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_MARK
+ tristate '"mark" match support'
+ default m if NETFILTER_ADVANCED=n
+ help
+ Netfilter mark matching allows you to match packets based on the
+ `nfmark' value in the packet. This can be set by the MARK target
+ (see below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_MULTIPORT
+ tristate '"multiport" Multiple port match support'
+ depends on NETFILTER_ADVANCED
+ help
+ Multiport matching allows you to match TCP or UDP packets based on
+ a series of source or destination ports: normally a rule can only
+ match a single range of ports.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_OWNER
+ tristate '"owner" match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ Socket owner matching allows you to match locally-generated packets
+ based on who created the socket: the user or group. It is also
+ possible to check whether a socket actually exists.
+
+config NETFILTER_XT_MATCH_POLICY
+ tristate 'IPsec "policy" match support'
+ depends on XFRM
+ default m if NETFILTER_ADVANCED=n
+ help
+ Policy matching allows you to match packets based on the
+ IPsec policy that was used during decapsulation/will
+ be used during encapsulation.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_PHYSDEV
+ tristate '"physdev" match support'
+ depends on BRIDGE && BRIDGE_NETFILTER
+ depends on NETFILTER_ADVANCED
+ help
+ Physdev packet matching matches against the physical bridge ports
+ the IP packet arrived on or will leave by.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_PKTTYPE
+ tristate '"pkttype" packet type match support'
+ depends on NETFILTER_ADVANCED
+ help
+ Packet type matching allows you to match a packet by
+ its "class", eg. BROADCAST, MULTICAST, ...
+
+ Typical usage:
+ iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_QUOTA
+ tristate '"quota" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `quota' match, which allows to match on a
+ byte counter.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_RATEEST
+ tristate '"rateest" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_RATEEST
+ help
+ This option adds a `rateest' match, which allows to match on the
+ rate estimated by the RATEEST target.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_REALM
+ tristate '"realm" match support'
+ depends on NETFILTER_ADVANCED
+ select NET_CLS_ROUTE
+ help
+ This option adds a `realm' match, which allows you to use the realm
+ key from the routing subsystem inside iptables.
+
+ This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
+ in tc world.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_RECENT
+ tristate '"recent" match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This match is used for creating one or many lists of recently
+ used addresses and then matching against that/those list(s).
+
+ Short options are available by using 'iptables -m recent -h'
+ Official Website: <http://snowman.net/projects/ipt_recent/>
+
+config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ bool 'Enable obsolete /proc/net/ipt_recent'
+ depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
+ ---help---
+ This option enables the old /proc/net/ipt_recent interface,
+ which has been obsoleted by /proc/net/xt_recent.
+
+config NETFILTER_XT_MATCH_SCTP
+ tristate '"sctp" protocol match support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on NETFILTER_ADVANCED
+ default IP_SCTP
+ help
+ With this option enabled, you will be able to use the
+ `sctp' match in order to match on SCTP source/destination ports
+ and SCTP chunk types.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_SOCKET
+ tristate '"socket" match support (EXPERIMENTAL)'
+ depends on EXPERIMENTAL
+ depends on NETFILTER_TPROXY
+ depends on NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
+ select NF_DEFRAG_IPV4
+ help
+ This option adds a `socket' match, which can be used to match
+ packets for which a TCP or UDP socket lookup finds a valid socket.
+ It can be used in combination with the MARK target and policy
+ routing to implement full featured non-locally bound sockets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_STATE
+ tristate '"state" match support'
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ help
+ Connection state matching allows you to match packets based on their
+ relationship to a tracked connection (ie. previous packets). This
+ is a powerful tool for packet classification.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_STATISTIC
+ tristate '"statistic" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `statistic' match, which allows you to match
+ on packets periodically or randomly with a given percentage.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_STRING
+ tristate '"string" match support'
+ depends on NETFILTER_ADVANCED
+ select TEXTSEARCH
+ select TEXTSEARCH_KMP
+ select TEXTSEARCH_BM
+ select TEXTSEARCH_FSM
+ help
+ This option adds a `string' match, which allows you to look for
+ pattern matchings in packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_TCPMSS
+ tristate '"tcpmss" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `tcpmss' match, which allows you to examine the
+ MSS value of TCP SYN packets, which control the maximum packet size
+ for that connection.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NETFILTER_XT_MATCH_TIME
+ tristate '"time" match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds a "time" match, which allows you to match based on
+ the packet arrival time (at the machine which netfilter is running)
+ on) or departure time/date (for locally generated packets).
+
+ If you say Y here, try `iptables -m time --help` for
+ more information.
+
+ If you want to compile it as a module, say M here.
+ If unsure, say N.
+
+config NETFILTER_XT_MATCH_U32
+ tristate '"u32" match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ u32 allows you to extract quantities of up to 4 bytes from a packet,
+ AND them with specified masks, shift them by specified amounts and
+ test whether the results are in any of a set of specified ranges.
+ The specification of what to extract is general enough to skip over
+ headers with lengths stored in the packet, as in IP or TCP header
+ lengths.
+
+ Details and examples are in the kernel module source.
+
+endif # NETFILTER_XTABLES
+
+endmenu
+
+source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
new file mode 100644
index 0000000..da3d909
--- /dev/null
+++ b/net/netfilter/Makefile
@@ -0,0 +1,94 @@
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
+
+obj-$(CONFIG_NETFILTER) = netfilter.o
+
+obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
+
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
+
+# SCTP protocol connection tracking
+obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
+obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
+
+# netlink interface for nf_conntrack
+obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
+
+# connection tracking helpers
+nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o
+
+obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
+obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
+obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
+obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
+obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
+obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
+obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
+
+# transparent proxy support
+obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+
+# generic X tables
+obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
+# targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+
+# matches
+obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
+
+# IPVS
+obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
new file mode 100644
index 0000000..a90ac83
--- /dev/null
+++ b/net/netfilter/core.c
@@ -0,0 +1,283 @@
+/* netfilter.c: look after the filters for various protocols.
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ */
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+static DEFINE_MUTEX(afinfo_mutex);
+
+const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
+EXPORT_SYMBOL(nf_afinfo);
+
+int nf_register_afinfo(const struct nf_afinfo *afinfo)
+{
+ int err;
+
+ err = mutex_lock_interruptible(&afinfo_mutex);
+ if (err < 0)
+ return err;
+ rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo);
+ mutex_unlock(&afinfo_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_register_afinfo);
+
+void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
+{
+ mutex_lock(&afinfo_mutex);
+ rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
+ mutex_unlock(&afinfo_mutex);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
+
+struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
+EXPORT_SYMBOL(nf_hooks);
+static DEFINE_MUTEX(nf_hook_mutex);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct nf_hook_ops *elem;
+ int err;
+
+ err = mutex_lock_interruptible(&nf_hook_mutex);
+ if (err < 0)
+ return err;
+ list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+ if (reg->priority < elem->priority)
+ break;
+ }
+ list_add_rcu(&reg->list, elem->list.prev);
+ mutex_unlock(&nf_hook_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(nf_register_hook);
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ mutex_lock(&nf_hook_mutex);
+ list_del_rcu(&reg->list);
+ mutex_unlock(&nf_hook_mutex);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(nf_unregister_hook);
+
+int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = nf_register_hook(&reg[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ nf_unregister_hooks(reg, i);
+ return err;
+}
+EXPORT_SYMBOL(nf_register_hooks);
+
+void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ nf_unregister_hook(&reg[i]);
+}
+EXPORT_SYMBOL(nf_unregister_hooks);
+
+unsigned int nf_iterate(struct list_head *head,
+ struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *indev,
+ const struct net_device *outdev,
+ struct list_head **i,
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
+{
+ unsigned int verdict;
+
+ /*
+ * The caller must not block between calls to this
+ * function because of risk of continuing from deleted element.
+ */
+ list_for_each_continue_rcu(*i, head) {
+ struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+ if (hook_thresh > elem->priority)
+ continue;
+
+ /* Optimization: we don't need to hold module
+ reference here, since function can't sleep. --RR */
+ verdict = elem->hook(hook, skb, indev, outdev, okfn);
+ if (verdict != NF_ACCEPT) {
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (unlikely((verdict & NF_VERDICT_MASK)
+ > NF_MAX_VERDICT)) {
+ NFDEBUG("Evil return from %p(%u).\n",
+ elem->hook, hook);
+ continue;
+ }
+#endif
+ if (verdict != NF_REPEAT)
+ return verdict;
+ *i = (*i)->prev;
+ }
+ }
+ return NF_ACCEPT;
+}
+
+
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. */
+int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
+{
+ struct list_head *elem;
+ unsigned int verdict;
+ int ret = 0;
+
+ /* We may already have this, but read-locks nest anyway */
+ rcu_read_lock();
+
+ elem = &nf_hooks[pf][hook];
+next_hook:
+ verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
+ outdev, &elem, okfn, hook_thresh);
+ if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+ ret = 1;
+ goto unlock;
+ } else if (verdict == NF_DROP) {
+ kfree_skb(skb);
+ ret = -EPERM;
+ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
+ if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+ verdict >> NF_VERDICT_BITS))
+ goto next_hook;
+ }
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(nf_hook_slow);
+
+
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
+{
+ if (writable_len > skb->len)
+ return 0;
+
+ /* Not exclusive use of packet? Must copy. */
+ if (!skb_cloned(skb)) {
+ if (writable_len <= skb_headlen(skb))
+ return 1;
+ } else if (skb_clone_writable(skb, writable_len))
+ return 1;
+
+ if (writable_len <= skb_headlen(skb))
+ writable_len = 0;
+ else
+ writable_len -= skb_headlen(skb);
+
+ return !!__pskb_pull_tail(skb, writable_len);
+}
+EXPORT_SYMBOL(skb_make_writable);
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+/* This does not belong here, but locally generated errors need it if connection
+ tracking in use: without this, connection may not be in hash table, and hence
+ manufactured ICMP or RST packets will not be associated with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+EXPORT_SYMBOL(ip_ct_attach);
+
+void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+{
+ void (*attach)(struct sk_buff *, struct sk_buff *);
+
+ if (skb->nfct) {
+ rcu_read_lock();
+ attach = rcu_dereference(ip_ct_attach);
+ if (attach)
+ attach(new, skb);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(nf_ct_attach);
+
+void (*nf_ct_destroy)(struct nf_conntrack *);
+EXPORT_SYMBOL(nf_ct_destroy);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+ void (*destroy)(struct nf_conntrack *);
+
+ rcu_read_lock();
+ destroy = rcu_dereference(nf_ct_destroy);
+ BUG_ON(destroy == NULL);
+ destroy(nfct);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+#endif /* CONFIG_NF_CONNTRACK */
+
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *proc_net_netfilter;
+EXPORT_SYMBOL(proc_net_netfilter);
+#endif
+
+void __init netfilter_init(void)
+{
+ int i, h;
+ for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&nf_hooks[i][h]);
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
+ if (!proc_net_netfilter)
+ panic("cannot create netfilter proc entry");
+#endif
+
+ if (netfilter_queue_init() < 0)
+ panic("cannot initialize nf_queue");
+ if (netfilter_log_init() < 0)
+ panic("cannot initialize nf_log");
+}
+
+#ifdef CONFIG_SYSCTL
+struct ctl_path nf_net_netfilter_sysctl_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
+ { }
+};
+EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
new file mode 100644
index 0000000..79a6980
--- /dev/null
+++ b/net/netfilter/ipvs/Kconfig
@@ -0,0 +1,241 @@
+#
+# IP Virtual Server configuration
+#
+menuconfig IP_VS
+ tristate "IP virtual server support"
+ depends on NET && INET && NETFILTER
+ ---help---
+ IP Virtual Server support will let you build a high-performance
+ virtual server based on cluster of two or more real servers. This
+ option must be enabled for at least one of the clustered computers
+ that will take care of intercepting incoming connections to a
+ single IP address and scheduling them to real servers.
+
+ Three request dispatching techniques are implemented, they are
+ virtual server via NAT, virtual server via tunneling and virtual
+ server via direct routing. The several scheduling algorithms can
+ be used to choose which server the connection is directed to,
+ thus load balancing can be achieved among the servers. For more
+ information and its administration program, please visit the
+ following URL: <http://www.linuxvirtualserver.org/>.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+if IP_VS
+
+config IP_VS_IPV6
+ bool "IPv6 support for IPVS"
+ depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+ ---help---
+ Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+ See http://www.mindbasket.com/ipvs for more information.
+
+ Say N if unsure.
+
+config IP_VS_DEBUG
+ bool "IP virtual server debugging"
+ ---help---
+ Say Y here if you want to get additional messages useful in
+ debugging the IP virtual server code. You can change the debug
+ level in /proc/sys/net/ipv4/vs/debug_level
+
+config IP_VS_TAB_BITS
+ int "IPVS connection table size (the Nth power of 2)"
+ range 8 20
+ default 12
+ ---help---
+ The IPVS connection hash table uses the chaining scheme to handle
+ hash collisions. Using a big IPVS connection hash table will greatly
+ reduce conflicts when there are hundreds of thousands of connections
+ in the hash table.
+
+ Note the table size must be power of 2. The table size will be the
+ value of 2 to the your input number power. The number to choose is
+ from 8 to 20, the default number is 12, which means the table size
+ is 4096. Don't input the number too small, otherwise you will lose
+ performance on it. You can adapt the table size yourself, according
+ to your virtual server application. It is good to set the table size
+ not far less than the number of connections per second multiplying
+ average lasting time of connection in the table. For example, your
+ virtual server gets 200 connections per second, the connection lasts
+ for 200 seconds in average in the connection table, the table size
+ should be not far less than 200x200, it is good to set the table
+ size 32768 (2**15).
+
+ Another note that each connection occupies 128 bytes effectively and
+ each hash entry uses 8 bytes, so you can estimate how much memory is
+ needed for your box.
+
+comment "IPVS transport protocol load balancing support"
+
+config IP_VS_PROTO_TCP
+ bool "TCP load balancing support"
+ ---help---
+ This option enables support for load balancing TCP transport
+ protocol. Say Y if unsure.
+
+config IP_VS_PROTO_UDP
+ bool "UDP load balancing support"
+ ---help---
+ This option enables support for load balancing UDP transport
+ protocol. Say Y if unsure.
+
+config IP_VS_PROTO_AH_ESP
+ bool
+ depends on UNDEFINED
+
+config IP_VS_PROTO_ESP
+ bool "ESP load balancing support"
+ select IP_VS_PROTO_AH_ESP
+ ---help---
+ This option enables support for load balancing ESP (Encapsulation
+ Security Payload) transport protocol. Say Y if unsure.
+
+config IP_VS_PROTO_AH
+ bool "AH load balancing support"
+ select IP_VS_PROTO_AH_ESP
+ ---help---
+ This option enables support for load balancing AH (Authentication
+ Header) transport protocol. Say Y if unsure.
+
+comment "IPVS scheduler"
+
+config IP_VS_RR
+ tristate "round-robin scheduling"
+ ---help---
+ The robin-robin scheduling algorithm simply directs network
+ connections to different real servers in a round-robin manner.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_WRR
+ tristate "weighted round-robin scheduling"
+ ---help---
+ The weighted robin-robin scheduling algorithm directs network
+ connections to different real servers based on server weights
+ in a round-robin manner. Servers with higher weights receive
+ new connections first than those with less weights, and servers
+ with higher weights get more connections than those with less
+ weights and servers with equal weights get equal connections.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_LC
+ tristate "least-connection scheduling"
+ ---help---
+ The least-connection scheduling algorithm directs network
+ connections to the server with the least number of active
+ connections.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_WLC
+ tristate "weighted least-connection scheduling"
+ ---help---
+ The weighted least-connection scheduling algorithm directs network
+ connections to the server with the least active connections
+ normalized by the server weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_LBLC
+ tristate "locality-based least-connection scheduling"
+ ---help---
+ The locality-based least-connection scheduling algorithm is for
+ destination IP load balancing. It is usually used in cache cluster.
+ This algorithm usually directs packet destined for an IP address to
+ its server if the server is alive and under load. If the server is
+ overloaded (its active connection numbers is larger than its weight)
+ and there is a server in its half load, then allocate the weighted
+ least-connection server to this IP address.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_LBLCR
+ tristate "locality-based least-connection with replication scheduling"
+ ---help---
+ The locality-based least-connection with replication scheduling
+ algorithm is also for destination IP load balancing. It is
+ usually used in cache cluster. It differs from the LBLC scheduling
+ as follows: the load balancer maintains mappings from a target
+ to a set of server nodes that can serve the target. Requests for
+ a target are assigned to the least-connection node in the target's
+ server set. If all the node in the server set are over loaded,
+ it picks up a least-connection node in the cluster and adds it
+ in the sever set for the target. If the server set has not been
+ modified for the specified time, the most loaded node is removed
+ from the server set, in order to avoid high degree of replication.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_DH
+ tristate "destination hashing scheduling"
+ ---help---
+ The destination hashing scheduling algorithm assigns network
+ connections to the servers through looking up a statically assigned
+ hash table by their destination IP addresses.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_SH
+ tristate "source hashing scheduling"
+ ---help---
+ The source hashing scheduling algorithm assigns network
+ connections to the servers through looking up a statically assigned
+ hash table by their source IP addresses.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_SED
+ tristate "shortest expected delay scheduling"
+ ---help---
+ The shortest expected delay scheduling algorithm assigns network
+ connections to the server with the shortest expected delay. The
+ expected delay that the job will experience is (Ci + 1) / Ui if
+ sent to the ith server, in which Ci is the number of connections
+ on the ith server and Ui is the fixed service rate (weight)
+ of the ith server.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_NQ
+ tristate "never queue scheduling"
+ ---help---
+ The never queue scheduling algorithm adopts a two-speed model.
+ When there is an idle server available, the job will be sent to
+ the idle server, instead of waiting for a fast one. When there
+ is no idle server available, the job will be sent to the server
+ that minimize its expected delay (The Shortest Expected Delay
+ scheduling algorithm).
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+comment 'IPVS application helper'
+
+config IP_VS_FTP
+ tristate "FTP protocol helper"
+ depends on IP_VS_PROTO_TCP
+ ---help---
+ FTP is a protocol that transfers IP address and/or port number in
+ the payload. In the virtual server via Network Address Translation,
+ the IP address and port number of real servers cannot be sent to
+ clients in ftp connections directly, so FTP protocol helper is
+ required for tracking the connection and mangling it back to that of
+ virtual service.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
new file mode 100644
index 0000000..73a46fe
--- /dev/null
+++ b/net/netfilter/ipvs/Makefile
@@ -0,0 +1,33 @@
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+
+# IPVS transport protocol load balancing support
+ip_vs_proto-objs-y :=
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+
+ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
+ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
+ ip_vs_est.o ip_vs_proto.o \
+ $(ip_vs_proto-objs-y)
+
+
+# IPVS core
+obj-$(CONFIG_IP_VS) += ip_vs.o
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
new file mode 100644
index 0000000..201b8ea
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -0,0 +1,622 @@
+/*
+ * ip_vs_app.c: Application module support for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses).
+ *
+ * IP_MASQ_APP application masquerading module
+ *
+ * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/net_namespace.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+EXPORT_SYMBOL(register_ip_vs_app_inc);
+
+/* ipvs application list head */
+static LIST_HEAD(ip_vs_app_list);
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
+
+/*
+ * Get an ip_vs_app object
+ */
+static inline int ip_vs_app_get(struct ip_vs_app *app)
+{
+ return try_module_get(app->module);
+}
+
+
+static inline void ip_vs_app_put(struct ip_vs_app *app)
+{
+ module_put(app->module);
+}
+
+
+/*
+ * Allocate/initialize app incarnation and register it in proto apps.
+ */
+static int
+ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+ struct ip_vs_protocol *pp;
+ struct ip_vs_app *inc;
+ int ret;
+
+ if (!(pp = ip_vs_proto_get(proto)))
+ return -EPROTONOSUPPORT;
+
+ if (!pp->unregister_app)
+ return -EOPNOTSUPP;
+
+ inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
+ if (!inc)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&inc->p_list);
+ INIT_LIST_HEAD(&inc->incs_list);
+ inc->app = app;
+ inc->port = htons(port);
+ atomic_set(&inc->usecnt, 0);
+
+ if (app->timeouts) {
+ inc->timeout_table =
+ ip_vs_create_timeout_table(app->timeouts,
+ app->timeouts_size);
+ if (!inc->timeout_table) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = pp->register_app(inc);
+ if (ret)
+ goto out;
+
+ list_add(&inc->a_list, &app->incs_list);
+ IP_VS_DBG(9, "%s application %s:%u registered\n",
+ pp->name, inc->name, inc->port);
+
+ return 0;
+
+ out:
+ kfree(inc->timeout_table);
+ kfree(inc);
+ return ret;
+}
+
+
+/*
+ * Release app incarnation
+ */
+static void
+ip_vs_app_inc_release(struct ip_vs_app *inc)
+{
+ struct ip_vs_protocol *pp;
+
+ if (!(pp = ip_vs_proto_get(inc->protocol)))
+ return;
+
+ if (pp->unregister_app)
+ pp->unregister_app(inc);
+
+ IP_VS_DBG(9, "%s App %s:%u unregistered\n",
+ pp->name, inc->name, inc->port);
+
+ list_del(&inc->a_list);
+
+ kfree(inc->timeout_table);
+ kfree(inc);
+}
+
+
+/*
+ * Get reference to app inc (only called from softirq)
+ *
+ */
+int ip_vs_app_inc_get(struct ip_vs_app *inc)
+{
+ int result;
+
+ atomic_inc(&inc->usecnt);
+ if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
+ atomic_dec(&inc->usecnt);
+ return result;
+}
+
+
+/*
+ * Put the app inc (only called from timer or net softirq)
+ */
+void ip_vs_app_inc_put(struct ip_vs_app *inc)
+{
+ ip_vs_app_put(inc->app);
+ atomic_dec(&inc->usecnt);
+}
+
+
+/*
+ * Register an application incarnation in protocol applications
+ */
+int
+register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+ int result;
+
+ mutex_lock(&__ip_vs_app_mutex);
+
+ result = ip_vs_app_inc_new(app, proto, port);
+
+ mutex_unlock(&__ip_vs_app_mutex);
+
+ return result;
+}
+
+
+/*
+ * ip_vs_app registration routine
+ */
+int register_ip_vs_app(struct ip_vs_app *app)
+{
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+
+ mutex_lock(&__ip_vs_app_mutex);
+
+ list_add(&app->a_list, &ip_vs_app_list);
+
+ mutex_unlock(&__ip_vs_app_mutex);
+
+ return 0;
+}
+
+
+/*
+ * ip_vs_app unregistration routine
+ * We are sure there are no app incarnations attached to services
+ */
+void unregister_ip_vs_app(struct ip_vs_app *app)
+{
+ struct ip_vs_app *inc, *nxt;
+
+ mutex_lock(&__ip_vs_app_mutex);
+
+ list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
+ ip_vs_app_inc_release(inc);
+ }
+
+ list_del(&app->a_list);
+
+ mutex_unlock(&__ip_vs_app_mutex);
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+}
+
+
+/*
+ * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
+ */
+int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+{
+ return pp->app_conn_bind(cp);
+}
+
+
+/*
+ * Unbind cp from application incarnation (called by cp destructor)
+ */
+void ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+ struct ip_vs_app *inc = cp->app;
+
+ if (!inc)
+ return;
+
+ if (inc->unbind_conn)
+ inc->unbind_conn(inc, cp);
+ if (inc->done_conn)
+ inc->done_conn(inc, cp);
+ ip_vs_app_inc_put(inc);
+ cp->app = NULL;
+}
+
+
+/*
+ * Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+ __u32 seq = ntohl(th->seq);
+
+ /*
+ * Adjust seq with delta-offset for all packets after
+ * the most recent resized pkt seq and with previous_delta offset
+ * for all packets before most recent resized pkt seq.
+ */
+ if (vseq->delta || vseq->previous_delta) {
+ if(after(seq, vseq->init_seq)) {
+ th->seq = htonl(seq + vseq->delta);
+ IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+ vseq->delta);
+ } else {
+ th->seq = htonl(seq + vseq->previous_delta);
+ IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+ "(%d) to seq\n", vseq->previous_delta);
+ }
+ }
+}
+
+
+/*
+ * Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+ __u32 ack_seq = ntohl(th->ack_seq);
+
+ /*
+ * Adjust ack_seq with delta-offset for
+ * the packets AFTER most recent resized pkt has caused a shift
+ * for packets before most recent resized pkt, use previous_delta
+ */
+ if (vseq->delta || vseq->previous_delta) {
+ /* since ack_seq is the number of octet that is expected
+ to receive next, so compare it with init_seq+delta */
+ if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+ th->ack_seq = htonl(ack_seq - vseq->delta);
+ IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+ "(%d) from ack_seq\n", vseq->delta);
+
+ } else {
+ th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+ IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+ "previous_delta (%d) from ack_seq\n",
+ vseq->previous_delta);
+ }
+ }
+}
+
+
+/*
+ * Updates ip_vs_seq if pkt has been resized
+ * Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+ unsigned flag, __u32 seq, int diff)
+{
+ /* spinlock is to keep updating cp->flags atomic */
+ spin_lock(&cp->lock);
+ if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
+ vseq->previous_delta = vseq->delta;
+ vseq->delta += diff;
+ vseq->init_seq = seq;
+ cp->flags |= flag;
+ }
+ spin_unlock(&cp->lock);
+}
+
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+ struct ip_vs_app *app)
+{
+ int diff;
+ const unsigned int tcp_offset = ip_hdrlen(skb);
+ struct tcphdr *th;
+ __u32 seq;
+
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ return 0;
+
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+ /*
+ * Remember seq number in case this pkt gets resized
+ */
+ seq = ntohl(th->seq);
+
+ /*
+ * Fix seq stuff if flagged as so.
+ */
+ if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+ vs_fix_seq(&cp->out_seq, th);
+ if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+ vs_fix_ack_seq(&cp->in_seq, th);
+
+ /*
+ * Call private output hook function
+ */
+ if (app->pkt_out == NULL)
+ return 1;
+
+ if (!app->pkt_out(app, cp, skb, &diff))
+ return 0;
+
+ /*
+ * Update ip_vs seq stuff if len has changed.
+ */
+ if (diff != 0)
+ vs_seq_update(cp, &cp->out_seq,
+ IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+ return 1;
+}
+
+/*
+ * Output pkt hook. Will call bound ip_vs_app specific function
+ * called by ipvs packet handler, assumes previously checked cp!=NULL
+ * returns false if it can't handle packet (oom)
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+ struct ip_vs_app *app;
+
+ /*
+ * check if application module is bound to
+ * this ip_vs_conn.
+ */
+ if ((app = cp->app) == NULL)
+ return 1;
+
+ /* TCP is complicated */
+ if (cp->protocol == IPPROTO_TCP)
+ return app_tcp_pkt_out(cp, skb, app);
+
+ /*
+ * Call private output hook function
+ */
+ if (app->pkt_out == NULL)
+ return 1;
+
+ return app->pkt_out(app, cp, skb, NULL);
+}
+
+
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+ struct ip_vs_app *app)
+{
+ int diff;
+ const unsigned int tcp_offset = ip_hdrlen(skb);
+ struct tcphdr *th;
+ __u32 seq;
+
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ return 0;
+
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+ /*
+ * Remember seq number in case this pkt gets resized
+ */
+ seq = ntohl(th->seq);
+
+ /*
+ * Fix seq stuff if flagged as so.
+ */
+ if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+ vs_fix_seq(&cp->in_seq, th);
+ if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+ vs_fix_ack_seq(&cp->out_seq, th);
+
+ /*
+ * Call private input hook function
+ */
+ if (app->pkt_in == NULL)
+ return 1;
+
+ if (!app->pkt_in(app, cp, skb, &diff))
+ return 0;
+
+ /*
+ * Update ip_vs seq stuff if len has changed.
+ */
+ if (diff != 0)
+ vs_seq_update(cp, &cp->in_seq,
+ IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+ return 1;
+}
+
+/*
+ * Input pkt hook. Will call bound ip_vs_app specific function
+ * called by ipvs packet handler, assumes previously checked cp!=NULL.
+ * returns false if can't handle packet (oom).
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+ struct ip_vs_app *app;
+
+ /*
+ * check if application module is bound to
+ * this ip_vs_conn.
+ */
+ if ((app = cp->app) == NULL)
+ return 1;
+
+ /* TCP is complicated */
+ if (cp->protocol == IPPROTO_TCP)
+ return app_tcp_pkt_in(cp, skb, app);
+
+ /*
+ * Call private input hook function
+ */
+ if (app->pkt_in == NULL)
+ return 1;
+
+ return app->pkt_in(app, cp, skb, NULL);
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ * /proc/net/ip_vs_app entry function
+ */
+
+static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+{
+ struct ip_vs_app *app, *inc;
+
+ list_for_each_entry(app, &ip_vs_app_list, a_list) {
+ list_for_each_entry(inc, &app->incs_list, a_list) {
+ if (pos-- == 0)
+ return inc;
+ }
+ }
+ return NULL;
+
+}
+
+static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ mutex_lock(&__ip_vs_app_mutex);
+
+ return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip_vs_app *inc, *app;
+ struct list_head *e;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ip_vs_app_idx(0);
+
+ inc = v;
+ app = inc->app;
+
+ if ((e = inc->a_list.next) != &app->incs_list)
+ return list_entry(e, struct ip_vs_app, a_list);
+
+ /* go on to next application */
+ for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+ app = list_entry(e, struct ip_vs_app, a_list);
+ list_for_each_entry(inc, &app->incs_list, a_list) {
+ return inc;
+ }
+ }
+ return NULL;
+}
+
+static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
+{
+ mutex_unlock(&__ip_vs_app_mutex);
+}
+
+static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "prot port usecnt name\n");
+ else {
+ const struct ip_vs_app *inc = v;
+
+ seq_printf(seq, "%-3s %-7u %-6d %-17s\n",
+ ip_vs_proto_name(inc->protocol),
+ ntohs(inc->port),
+ atomic_read(&inc->usecnt),
+ inc->name);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip_vs_app_seq_ops = {
+ .start = ip_vs_app_seq_start,
+ .next = ip_vs_app_seq_next,
+ .stop = ip_vs_app_seq_stop,
+ .show = ip_vs_app_seq_show,
+};
+
+static int ip_vs_app_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ip_vs_app_seq_ops);
+}
+
+static const struct file_operations ip_vs_app_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_app_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif
+
+
+/*
+ * Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
+ char *o_buf, int o_len, char *n_buf, int n_len)
+{
+ int diff;
+ int o_offset;
+ int o_left;
+
+ EnterFunction(9);
+
+ diff = n_len - o_len;
+ o_offset = o_buf - (char *)skb->data;
+ /* The length of left data after o_buf+o_len in the skb data */
+ o_left = skb->len - (o_offset + o_len);
+
+ if (diff <= 0) {
+ memmove(o_buf + n_len, o_buf + o_len, o_left);
+ memcpy(o_buf, n_buf, n_len);
+ skb_trim(skb, skb->len + diff);
+ } else if (diff <= skb_tailroom(skb)) {
+ skb_put(skb, diff);
+ memmove(o_buf + n_len, o_buf + o_len, o_left);
+ memcpy(o_buf, n_buf, n_len);
+ } else {
+ if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+ return -ENOMEM;
+ skb_put(skb, diff);
+ memmove(skb->data + o_offset + n_len,
+ skb->data + o_offset + o_len, o_left);
+ skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
+ }
+
+ /* must update the iph total length here */
+ ip_hdr(skb)->tot_len = htons(skb->len);
+
+ LeaveFunction(9);
+ return 0;
+}
+
+
+int __init ip_vs_app_init(void)
+{
+ /* we will replace it with proc_net_ipvs_create() soon */
+ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+ return 0;
+}
+
+
+void ip_vs_app_cleanup(void)
+{
+ proc_net_remove(&init_net, "ip_vs_app");
+}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
new file mode 100644
index 0000000..9a24332
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -0,0 +1,1110 @@
+/*
+ * IPVS An implementation of the IP virtual server support for the
+ * LINUX operating system. IPVS is now implemented as a module
+ * over the Netfilter framework. IPVS can be used to build a
+ * high-performance and highly available server based on a
+ * cluster of servers.
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h> /* for proc_net_* */
+#include <linux/seq_file.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#include <net/net_namespace.h>
+#include <net/ip_vs.h>
+
+
+/*
+ * Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct list_head *ip_vs_conn_tab;
+
+/* SLAB cache for IPVS connections */
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
+
+/* counter for current IPVS connections */
+static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
+
+/* counter for no client port connections */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+/* random value for IPVS connection hash */
+static unsigned int ip_vs_conn_rnd;
+
+/*
+ * Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS 4
+#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
+
+struct ip_vs_aligned_lock
+{
+ rwlock_t l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+static struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_read_lock(unsigned key)
+{
+ read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock(unsigned key)
+{
+ read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock(unsigned key)
+{
+ write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock(unsigned key)
+{
+ write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_lock_bh(unsigned key)
+{
+ read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock_bh(unsigned key)
+{
+ read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock_bh(unsigned key)
+{
+ write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned key)
+{
+ write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+
+/*
+ * Returns hash value for IPVS connection entry
+ */
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+ const union nf_inet_addr *addr,
+ __be16 port)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd)
+ & IP_VS_CONN_TAB_MASK;
+#endif
+ return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd)
+ & IP_VS_CONN_TAB_MASK;
+}
+
+
+/*
+ * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ * returns bool success.
+ */
+static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+ unsigned hash;
+ int ret;
+
+ /* Hash by protocol, client address and port */
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+ ct_write_lock(hash);
+
+ if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+ list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+ cp->flags |= IP_VS_CONN_F_HASHED;
+ atomic_inc(&cp->refcnt);
+ ret = 1;
+ } else {
+ IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
+ "called from %p\n", __builtin_return_address(0));
+ ret = 0;
+ }
+
+ ct_write_unlock(hash);
+
+ return ret;
+}
+
+
+/*
+ * UNhashes ip_vs_conn from ip_vs_conn_tab.
+ * returns bool success.
+ */
+static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+ unsigned hash;
+ int ret;
+
+ /* unhash it and decrease its reference counter */
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+ ct_write_lock(hash);
+
+ if (cp->flags & IP_VS_CONN_F_HASHED) {
+ list_del(&cp->c_list);
+ cp->flags &= ~IP_VS_CONN_F_HASHED;
+ atomic_dec(&cp->refcnt);
+ ret = 1;
+ } else
+ ret = 0;
+
+ ct_write_unlock(hash);
+
+ return ret;
+}
+
+
+/*
+ * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ * Called for pkts coming from OUTside-to-INside.
+ * s_addr, s_port: pkt source address (foreign host)
+ * d_addr, d_port: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *__ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+ unsigned hash;
+ struct ip_vs_conn *cp;
+
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+ ct_read_lock(hash);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
+ ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
+ protocol == cp->protocol) {
+ /* HIT */
+ atomic_inc(&cp->refcnt);
+ ct_read_unlock(hash);
+ return cp;
+ }
+ }
+
+ ct_read_unlock(hash);
+
+ return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+ struct ip_vs_conn *cp;
+
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
+ if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+ d_port);
+
+ IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
+
+ return cp;
+}
+
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+ unsigned hash;
+ struct ip_vs_conn *cp;
+
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+ ct_read_lock(hash);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
+ cp->flags & IP_VS_CONN_F_TEMPLATE &&
+ protocol == cp->protocol) {
+ /* HIT */
+ atomic_inc(&cp->refcnt);
+ goto out;
+ }
+ }
+ cp = NULL;
+
+ out:
+ ct_read_unlock(hash);
+
+ IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
+
+ return cp;
+}
+
+/*
+ * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ * Called for pkts coming from inside-to-OUTside.
+ * s_addr, s_port: pkt source address (inside host)
+ * d_addr, d_port: pkt dest address (foreign host)
+ */
+struct ip_vs_conn *ip_vs_conn_out_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+ unsigned hash;
+ struct ip_vs_conn *cp, *ret=NULL;
+
+ /*
+ * Check for "full" addressed entries
+ */
+ hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
+
+ ct_read_lock(hash);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+ d_port == cp->cport && s_port == cp->dport &&
+ protocol == cp->protocol) {
+ /* HIT */
+ atomic_inc(&cp->refcnt);
+ ret = cp;
+ break;
+ }
+ }
+
+ ct_read_unlock(hash);
+
+ IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ ret ? "hit" : "not hit");
+
+ return ret;
+}
+
+
+/*
+ * Put back the conn and restart its timer with its timeout
+ */
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+ /* reset it expire in its timeout */
+ mod_timer(&cp->timer, jiffies+cp->timeout);
+
+ __ip_vs_conn_put(cp);
+}
+
+
+/*
+ * Fill a no_client_port connection with a client port number
+ */
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+{
+ if (ip_vs_conn_unhash(cp)) {
+ spin_lock(&cp->lock);
+ if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+ atomic_dec(&ip_vs_conn_no_cport_cnt);
+ cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+ cp->cport = cport;
+ }
+ spin_unlock(&cp->lock);
+
+ /* hash on new dport */
+ ip_vs_conn_hash(cp);
+ }
+}
+
+
+/*
+ * Bind a connection entry with the corresponding packet_xmit.
+ * Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+ switch (IP_VS_FWD_METHOD(cp)) {
+ case IP_VS_CONN_F_MASQ:
+ cp->packet_xmit = ip_vs_nat_xmit;
+ break;
+
+ case IP_VS_CONN_F_TUNNEL:
+ cp->packet_xmit = ip_vs_tunnel_xmit;
+ break;
+
+ case IP_VS_CONN_F_DROUTE:
+ cp->packet_xmit = ip_vs_dr_xmit;
+ break;
+
+ case IP_VS_CONN_F_LOCALNODE:
+ cp->packet_xmit = ip_vs_null_xmit;
+ break;
+
+ case IP_VS_CONN_F_BYPASS:
+ cp->packet_xmit = ip_vs_bypass_xmit;
+ break;
+ }
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+ switch (IP_VS_FWD_METHOD(cp)) {
+ case IP_VS_CONN_F_MASQ:
+ cp->packet_xmit = ip_vs_nat_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_TUNNEL:
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_DROUTE:
+ cp->packet_xmit = ip_vs_dr_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_LOCALNODE:
+ cp->packet_xmit = ip_vs_null_xmit;
+ break;
+
+ case IP_VS_CONN_F_BYPASS:
+ cp->packet_xmit = ip_vs_bypass_xmit_v6;
+ break;
+ }
+}
+#endif
+
+
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+ return atomic_read(&dest->activeconns)
+ + atomic_read(&dest->inactconns);
+}
+
+/*
+ * Bind a connection entry with a virtual service destination
+ * Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+ /* if dest is NULL, then return directly */
+ if (!dest)
+ return;
+
+ /* Increase the refcnt counter of the dest */
+ atomic_inc(&dest->refcnt);
+
+ /* Bind with the destination and its corresponding transmitter */
+ if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+ (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+ /* if the connection is not template and is created
+ * by sync, preserve the activity flag.
+ */
+ cp->flags |= atomic_read(&dest->conn_flags) &
+ (~IP_VS_CONN_F_INACTIVE);
+ else
+ cp->flags |= atomic_read(&dest->conn_flags);
+ cp->dest = dest;
+
+ IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
+
+ /* Update the connection counters */
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+ /* It is a normal connection, so increase the inactive
+ connection counter because it is in TCP SYNRECV
+ state (inactive) or other protocol inacive state */
+ if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+ (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+ atomic_inc(&dest->activeconns);
+ else
+ atomic_inc(&dest->inactconns);
+ } else {
+ /* It is a persistent connection/template, so increase
+ the peristent connection counter */
+ atomic_inc(&dest->persistconns);
+ }
+
+ if (dest->u_threshold != 0 &&
+ ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+ dest->flags |= IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+ struct ip_vs_dest *dest;
+
+ if ((cp) && (!cp->dest)) {
+ dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+ &cp->vaddr, cp->vport,
+ cp->protocol);
+ ip_vs_bind_dest(cp, dest);
+ return dest;
+ } else
+ return NULL;
+}
+
+
+/*
+ * Unbind a connection entry with its VS destination
+ * Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+ struct ip_vs_dest *dest = cp->dest;
+
+ if (!dest)
+ return;
+
+ IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
+
+ /* Update the connection counters */
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+ /* It is a normal connection, so decrease the inactconns
+ or activeconns counter */
+ if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+ atomic_dec(&dest->inactconns);
+ } else {
+ atomic_dec(&dest->activeconns);
+ }
+ } else {
+ /* It is a persistent connection/template, so decrease
+ the peristent connection counter */
+ atomic_dec(&dest->persistconns);
+ }
+
+ if (dest->l_threshold != 0) {
+ if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ } else if (dest->u_threshold != 0) {
+ if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ } else {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+
+ /*
+ * Simply decrease the refcnt of the dest, because the
+ * dest will be either in service's destination list
+ * or in the trash.
+ */
+ atomic_dec(&dest->refcnt);
+}
+
+
+/*
+ * Checking if the destination of a connection template is available.
+ * If available, return 1, otherwise invalidate this connection
+ * template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct)
+{
+ struct ip_vs_dest *dest = ct->dest;
+
+ /*
+ * Checking the dest server status.
+ */
+ if ((dest == NULL) ||
+ !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+ (sysctl_ip_vs_expire_quiescent_template &&
+ (atomic_read(&dest->weight) == 0))) {
+ IP_VS_DBG_BUF(9, "check_template: dest not available for "
+ "protocol %s s:%s:%d v:%s:%d "
+ "-> d:%s:%d\n",
+ ip_vs_proto_name(ct->protocol),
+ IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+ ntohs(ct->cport),
+ IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+ ntohs(ct->vport),
+ IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ ntohs(ct->dport));
+
+ /*
+ * Invalidate the connection template
+ */
+ if (ct->vport != htons(0xffff)) {
+ if (ip_vs_conn_unhash(ct)) {
+ ct->dport = htons(0xffff);
+ ct->vport = htons(0xffff);
+ ct->cport = 0;
+ ip_vs_conn_hash(ct);
+ }
+ }
+
+ /*
+ * Simply decrease the refcnt of the template,
+ * don't restart its timer.
+ */
+ atomic_dec(&ct->refcnt);
+ return 0;
+ }
+ return 1;
+}
+
+static void ip_vs_conn_expire(unsigned long data)
+{
+ struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+
+ cp->timeout = 60*HZ;
+
+ /*
+ * hey, I'm using it
+ */
+ atomic_inc(&cp->refcnt);
+
+ /*
+ * do I control anybody?
+ */
+ if (atomic_read(&cp->n_control))
+ goto expire_later;
+
+ /*
+ * unhash it if it is hashed in the conn table
+ */
+ if (!ip_vs_conn_unhash(cp))
+ goto expire_later;
+
+ /*
+ * refcnt==1 implies I'm the only one referrer
+ */
+ if (likely(atomic_read(&cp->refcnt) == 1)) {
+ /* delete the timer if it is activated by other users */
+ if (timer_pending(&cp->timer))
+ del_timer(&cp->timer);
+
+ /* does anybody control me? */
+ if (cp->control)
+ ip_vs_control_del(cp);
+
+ if (unlikely(cp->app != NULL))
+ ip_vs_unbind_app(cp);
+ ip_vs_unbind_dest(cp);
+ if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+ atomic_dec(&ip_vs_conn_no_cport_cnt);
+ atomic_dec(&ip_vs_conn_count);
+
+ kmem_cache_free(ip_vs_conn_cachep, cp);
+ return;
+ }
+
+ /* hash it back to the table */
+ ip_vs_conn_hash(cp);
+
+ expire_later:
+ IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
+ atomic_read(&cp->refcnt)-1,
+ atomic_read(&cp->n_control));
+
+ ip_vs_conn_put(cp);
+}
+
+
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+ if (del_timer(&cp->timer))
+ mod_timer(&cp->timer, jiffies);
+}
+
+
+/*
+ * Create a new connection entry and hash it into the ip_vs_conn_tab
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ const union nf_inet_addr *vaddr, __be16 vport,
+ const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+ cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ if (cp == NULL) {
+ IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&cp->c_list);
+ setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ cp->af = af;
+ cp->protocol = proto;
+ ip_vs_addr_copy(af, &cp->caddr, caddr);
+ cp->cport = cport;
+ ip_vs_addr_copy(af, &cp->vaddr, vaddr);
+ cp->vport = vport;
+ ip_vs_addr_copy(af, &cp->daddr, daddr);
+ cp->dport = dport;
+ cp->flags = flags;
+ spin_lock_init(&cp->lock);
+
+ /*
+ * Set the entry is referenced by the current thread before hashing
+ * it in the table, so that other thread run ip_vs_random_dropentry
+ * but cannot drop this entry.
+ */
+ atomic_set(&cp->refcnt, 1);
+
+ atomic_set(&cp->n_control, 0);
+ atomic_set(&cp->in_pkts, 0);
+
+ atomic_inc(&ip_vs_conn_count);
+ if (flags & IP_VS_CONN_F_NO_CPORT)
+ atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+ /* Bind the connection with a destination server */
+ ip_vs_bind_dest(cp, dest);
+
+ /* Set its state and timeout */
+ cp->state = 0;
+ cp->timeout = 3*HZ;
+
+ /* Bind its packet transmitter */
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_bind_xmit_v6(cp);
+ else
+#endif
+ ip_vs_bind_xmit(cp);
+
+ if (unlikely(pp && atomic_read(&pp->appcnt)))
+ ip_vs_bind_app(cp, pp);
+
+ /* Hash it in the ip_vs_conn_tab finally */
+ ip_vs_conn_hash(cp);
+
+ return cp;
+}
+
+
+/*
+ * /proc/net/ip_vs_conn entries
+ */
+#ifdef CONFIG_PROC_FS
+
+static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
+{
+ int idx;
+ struct ip_vs_conn *cp;
+
+ for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+ ct_read_lock_bh(idx);
+ list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ if (pos-- == 0) {
+ seq->private = &ip_vs_conn_tab[idx];
+ return cp;
+ }
+ }
+ ct_read_unlock_bh(idx);
+ }
+
+ return NULL;
+}
+
+static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ seq->private = NULL;
+ return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
+}
+
+static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip_vs_conn *cp = v;
+ struct list_head *e, *l = seq->private;
+ int idx;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ip_vs_conn_array(seq, 0);
+
+ /* more on same hash chain? */
+ if ((e = cp->c_list.next) != l)
+ return list_entry(e, struct ip_vs_conn, c_list);
+
+ idx = l - ip_vs_conn_tab;
+ ct_read_unlock_bh(idx);
+
+ while (++idx < IP_VS_CONN_TAB_SIZE) {
+ ct_read_lock_bh(idx);
+ list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ seq->private = &ip_vs_conn_tab[idx];
+ return cp;
+ }
+ ct_read_unlock_bh(idx);
+ }
+ seq->private = NULL;
+ return NULL;
+}
+
+static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+{
+ struct list_head *l = seq->private;
+
+ if (l)
+ ct_read_unlock_bh(l - ip_vs_conn_tab);
+}
+
+static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
+{
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq,
+ "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n");
+ else {
+ const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ seq_printf(seq,
+ "%-3s " NIP6_FMT " %04X " NIP6_FMT
+ " %04X " NIP6_FMT " %04X %-11s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ NIP6(cp->caddr.in6), ntohs(cp->cport),
+ NIP6(cp->vaddr.in6), ntohs(cp->vport),
+ NIP6(cp->daddr.in6), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ (cp->timer.expires-jiffies)/HZ);
+ else
+#endif
+ seq_printf(seq,
+ "%-3s %08X %04X %08X %04X"
+ " %08X %04X %-11s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ ntohl(cp->caddr.ip), ntohs(cp->cport),
+ ntohl(cp->vaddr.ip), ntohs(cp->vport),
+ ntohl(cp->daddr.ip), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ (cp->timer.expires-jiffies)/HZ);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip_vs_conn_seq_ops = {
+ .start = ip_vs_conn_seq_start,
+ .next = ip_vs_conn_seq_next,
+ .stop = ip_vs_conn_seq_stop,
+ .show = ip_vs_conn_seq_show,
+};
+
+static int ip_vs_conn_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ip_vs_conn_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_conn_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+ if (flags & IP_VS_CONN_F_SYNC)
+ return "SYNC";
+ else
+ return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq,
+ "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
+ else {
+ const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ seq_printf(seq,
+ "%-3s " NIP6_FMT " %04X " NIP6_FMT
+ " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ NIP6(cp->caddr.in6), ntohs(cp->cport),
+ NIP6(cp->vaddr.in6), ntohs(cp->vport),
+ NIP6(cp->daddr.in6), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_origin_name(cp->flags),
+ (cp->timer.expires-jiffies)/HZ);
+ else
+#endif
+ seq_printf(seq,
+ "%-3s %08X %04X %08X %04X "
+ "%08X %04X %-11s %-6s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ ntohl(cp->caddr.ip), ntohs(cp->cport),
+ ntohl(cp->vaddr.ip), ntohs(cp->vport),
+ ntohl(cp->daddr.ip), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_origin_name(cp->flags),
+ (cp->timer.expires-jiffies)/HZ);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+ .start = ip_vs_conn_seq_start,
+ .next = ip_vs_conn_seq_next,
+ .stop = ip_vs_conn_seq_stop,
+ .show = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_conn_sync_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#endif
+
+
+/*
+ * Randomly drop connection entries before running out of memory
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+ /*
+ * The drop rate array needs tuning for real environments.
+ * Called from timer bh only => no locking
+ */
+ static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static char todrop_counter[9] = {0};
+ int i;
+
+ /* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+ This will leave enough time for normal connection to get
+ through. */
+ if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
+ return 0;
+
+ /* Don't drop the entry if its number of incoming packets is not
+ located in [0, 8] */
+ i = atomic_read(&cp->in_pkts);
+ if (i > 8 || i < 0) return 0;
+
+ if (!todrop_rate[i]) return 0;
+ if (--todrop_counter[i] > 0) return 0;
+
+ todrop_counter[i] = todrop_rate[i];
+ return 1;
+}
+
+/* Called from keventd and must protect itself from softirqs */
+void ip_vs_random_dropentry(void)
+{
+ int idx;
+ struct ip_vs_conn *cp;
+
+ /*
+ * Randomly scan 1/32 of the whole table every second
+ */
+ for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
+ unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+
+ /*
+ * Lock is actually needed in this loop.
+ */
+ ct_write_lock_bh(hash);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+ /* connection template */
+ continue;
+
+ if (cp->protocol == IPPROTO_TCP) {
+ switch(cp->state) {
+ case IP_VS_TCP_S_SYN_RECV:
+ case IP_VS_TCP_S_SYNACK:
+ break;
+
+ case IP_VS_TCP_S_ESTABLISHED:
+ if (todrop_entry(cp))
+ break;
+ continue;
+
+ default:
+ continue;
+ }
+ } else {
+ if (!todrop_entry(cp))
+ continue;
+ }
+
+ IP_VS_DBG(4, "del connection\n");
+ ip_vs_conn_expire_now(cp);
+ if (cp->control) {
+ IP_VS_DBG(4, "del conn template\n");
+ ip_vs_conn_expire_now(cp->control);
+ }
+ }
+ ct_write_unlock_bh(hash);
+ }
+}
+
+
+/*
+ * Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(void)
+{
+ int idx;
+ struct ip_vs_conn *cp;
+
+ flush_again:
+ for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+ /*
+ * Lock is actually needed in this loop.
+ */
+ ct_write_lock_bh(idx);
+
+ list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+
+ IP_VS_DBG(4, "del connection\n");
+ ip_vs_conn_expire_now(cp);
+ if (cp->control) {
+ IP_VS_DBG(4, "del conn template\n");
+ ip_vs_conn_expire_now(cp->control);
+ }
+ }
+ ct_write_unlock_bh(idx);
+ }
+
+ /* the counter may be not NULL, because maybe some conn entries
+ are run by slow timer handler or unhashed but still referred */
+ if (atomic_read(&ip_vs_conn_count) != 0) {
+ schedule();
+ goto flush_again;
+ }
+}
+
+
+int __init ip_vs_conn_init(void)
+{
+ int idx;
+
+ /*
+ * Allocate the connection hash table and initialize its list heads
+ */
+ ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+ if (!ip_vs_conn_tab)
+ return -ENOMEM;
+
+ /* Allocate ip_vs_conn slab cache */
+ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ sizeof(struct ip_vs_conn), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ip_vs_conn_cachep) {
+ vfree(ip_vs_conn_tab);
+ return -ENOMEM;
+ }
+
+ IP_VS_INFO("Connection hash table configured "
+ "(size=%d, memory=%ldKbytes)\n",
+ IP_VS_CONN_TAB_SIZE,
+ (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+ IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
+ sizeof(struct ip_vs_conn));
+
+ for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+ }
+
+ for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
+ rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+ }
+
+ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+ proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+
+ /* calculate the random value for connection hash */
+ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+
+ return 0;
+}
+
+
+void ip_vs_conn_cleanup(void)
+{
+ /* flush all the connection entries first */
+ ip_vs_conn_flush();
+
+ /* Release the empty cache */
+ kmem_cache_destroy(ip_vs_conn_cachep);
+ proc_net_remove(&init_net, "ip_vs_conn");
+ proc_net_remove(&init_net, "ip_vs_conn_sync");
+ vfree(ip_vs_conn_tab);
+}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
new file mode 100644
index 0000000..958abf3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -0,0 +1,1542 @@
+/*
+ * IPVS An implementation of the IP virtual server support for the
+ * LINUX operating system. IPVS is now implemented as a module
+ * over the Netfilter framework. IPVS can be used to build a
+ * high-performance and highly available server based on a
+ * cluster of servers.
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ * Paul `Rusty' Russell properly handle non-linear skbs
+ * Harald Welte don't use nfcache
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h> /* for icmp_send */
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
+#include <net/ip_vs.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_skb_replace);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
+#endif
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph) (((icmph)->un).echo.id)
+#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier)
+
+const char *ip_vs_proto_name(unsigned proto)
+{
+ static char buf[20];
+
+ switch (proto) {
+ case IPPROTO_IP:
+ return "IP";
+ case IPPROTO_UDP:
+ return "UDP";
+ case IPPROTO_TCP:
+ return "TCP";
+ case IPPROTO_ICMP:
+ return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+ case IPPROTO_ICMPV6:
+ return "ICMPv6";
+#endif
+ default:
+ sprintf(buf, "IP_%d", proto);
+ return buf;
+ }
+}
+
+void ip_vs_init_hash_table(struct list_head *table, int rows)
+{
+ while (--rows >= 0)
+ INIT_LIST_HEAD(&table[rows]);
+}
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest = cp->dest;
+ if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ spin_lock(&dest->stats.lock);
+ dest->stats.ustats.inpkts++;
+ dest->stats.ustats.inbytes += skb->len;
+ spin_unlock(&dest->stats.lock);
+
+ spin_lock(&dest->svc->stats.lock);
+ dest->svc->stats.ustats.inpkts++;
+ dest->svc->stats.ustats.inbytes += skb->len;
+ spin_unlock(&dest->svc->stats.lock);
+
+ spin_lock(&ip_vs_stats.lock);
+ ip_vs_stats.ustats.inpkts++;
+ ip_vs_stats.ustats.inbytes += skb->len;
+ spin_unlock(&ip_vs_stats.lock);
+ }
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest = cp->dest;
+ if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ spin_lock(&dest->stats.lock);
+ dest->stats.ustats.outpkts++;
+ dest->stats.ustats.outbytes += skb->len;
+ spin_unlock(&dest->stats.lock);
+
+ spin_lock(&dest->svc->stats.lock);
+ dest->svc->stats.ustats.outpkts++;
+ dest->svc->stats.ustats.outbytes += skb->len;
+ spin_unlock(&dest->svc->stats.lock);
+
+ spin_lock(&ip_vs_stats.lock);
+ ip_vs_stats.ustats.outpkts++;
+ ip_vs_stats.ustats.outbytes += skb->len;
+ spin_unlock(&ip_vs_stats.lock);
+ }
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+ spin_lock(&cp->dest->stats.lock);
+ cp->dest->stats.ustats.conns++;
+ spin_unlock(&cp->dest->stats.lock);
+
+ spin_lock(&svc->stats.lock);
+ svc->stats.ustats.conns++;
+ spin_unlock(&svc->stats.lock);
+
+ spin_lock(&ip_vs_stats.lock);
+ ip_vs_stats.ustats.conns++;
+ spin_unlock(&ip_vs_stats.lock);
+}
+
+
+static inline int
+ip_vs_set_state(struct ip_vs_conn *cp, int direction,
+ const struct sk_buff *skb,
+ struct ip_vs_protocol *pp)
+{
+ if (unlikely(!pp->state_transition))
+ return 0;
+ return pp->state_transition(cp, direction, skb, pp);
+}
+
+
+/*
+ * IPVS persistent scheduling function
+ * It creates a connection entry according to its template if exists,
+ * or selects a server and creates a connection entry plus a template.
+ * Locking: we are svc user (svc->refcnt), so we hold all dests too
+ * Protocols supported: TCP, UDP
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc,
+ const struct sk_buff *skb,
+ __be16 ports[2])
+{
+ struct ip_vs_conn *cp = NULL;
+ struct ip_vs_iphdr iph;
+ struct ip_vs_dest *dest;
+ struct ip_vs_conn *ct;
+ __be16 dport; /* destination port to forward */
+ union nf_inet_addr snet; /* source network of the client,
+ after masking */
+
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+ /* Mask saddr with the netmask to adjust template granularity */
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+ else
+#endif
+ snet.ip = iph.saddr.ip & svc->netmask;
+
+ IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+ "mnet %s\n",
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &snet));
+
+ /*
+ * As far as we know, FTP is a very complicated network protocol, and
+ * it uses control connection and data connections. For active FTP,
+ * FTP server initialize data connection to the client, its source port
+ * is often 20. For passive FTP, FTP server tells the clients the port
+ * that it passively listens to, and the client issues the data
+ * connection. In the tunneling or direct routing mode, the load
+ * balancer is on the client-to-server half of connection, the port
+ * number is unknown to the load balancer. So, a conn template like
+ * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+ * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+ * is created for other persistent services.
+ */
+ if (ports[1] == svc->port) {
+ /* Check if a template already exists */
+ if (svc->port != FTPPORT)
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, ports[1]);
+ else
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, 0);
+
+ if (!ct || !ip_vs_check_template(ct)) {
+ /*
+ * No template found or the dest of the connection
+ * template is not available.
+ */
+ dest = svc->scheduler->schedule(svc, skb);
+ if (dest == NULL) {
+ IP_VS_DBG(1, "p-schedule: no dest found.\n");
+ return NULL;
+ }
+
+ /*
+ * Create a template like <protocol,caddr,0,
+ * vaddr,vport,daddr,dport> for non-ftp service,
+ * and <protocol,caddr,0,vaddr,0,daddr,0>
+ * for ftp service.
+ */
+ if (svc->port != FTPPORT)
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr,
+ ports[1],
+ &dest->addr, dest->port,
+ IP_VS_CONN_F_TEMPLATE,
+ dest);
+ else
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr, 0,
+ &dest->addr, 0,
+ IP_VS_CONN_F_TEMPLATE,
+ dest);
+ if (ct == NULL)
+ return NULL;
+
+ ct->timeout = svc->timeout;
+ } else {
+ /* set destination with the found template */
+ dest = ct->dest;
+ }
+ dport = dest->port;
+ } else {
+ /*
+ * Note: persistent fwmark-based services and persistent
+ * port zero service are handled here.
+ * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+ * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
+ */
+ if (svc->fwmark) {
+ union nf_inet_addr fwmark = {
+ .all = { 0, 0, 0, htonl(svc->fwmark) }
+ };
+
+ ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
+ &fwmark, 0);
+ } else
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, 0);
+
+ if (!ct || !ip_vs_check_template(ct)) {
+ /*
+ * If it is not persistent port zero, return NULL,
+ * otherwise create a connection template.
+ */
+ if (svc->port)
+ return NULL;
+
+ dest = svc->scheduler->schedule(svc, skb);
+ if (dest == NULL) {
+ IP_VS_DBG(1, "p-schedule: no dest found.\n");
+ return NULL;
+ }
+
+ /*
+ * Create a template according to the service
+ */
+ if (svc->fwmark) {
+ union nf_inet_addr fwmark = {
+ .all = { 0, 0, 0, htonl(svc->fwmark) }
+ };
+
+ ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+ &snet, 0,
+ &fwmark, 0,
+ &dest->addr, 0,
+ IP_VS_CONN_F_TEMPLATE,
+ dest);
+ } else
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr, 0,
+ &dest->addr, 0,
+ IP_VS_CONN_F_TEMPLATE,
+ dest);
+ if (ct == NULL)
+ return NULL;
+
+ ct->timeout = svc->timeout;
+ } else {
+ /* set destination with the found template */
+ dest = ct->dest;
+ }
+ dport = ports[1];
+ }
+
+ /*
+ * Create a new connection according to the template
+ */
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, ports[0],
+ &iph.daddr, ports[1],
+ &dest->addr, dport,
+ 0,
+ dest);
+ if (cp == NULL) {
+ ip_vs_conn_put(ct);
+ return NULL;
+ }
+
+ /*
+ * Add its control
+ */
+ ip_vs_control_add(cp, ct);
+ ip_vs_conn_put(ct);
+
+ ip_vs_conn_stats(cp, svc);
+ return cp;
+}
+
+
+/*
+ * IPVS main scheduling function
+ * It selects a server according to the virtual service, and
+ * creates a connection entry.
+ * Protocols supported: TCP, UDP
+ */
+struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_conn *cp = NULL;
+ struct ip_vs_iphdr iph;
+ struct ip_vs_dest *dest;
+ __be16 _ports[2], *pptr;
+
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ /*
+ * Persistent service
+ */
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ return ip_vs_sched_persist(svc, skb, pptr);
+
+ /*
+ * Non-persistent service
+ */
+ if (!svc->fwmark && pptr[1] != svc->port) {
+ if (!svc->port)
+ IP_VS_ERR("Schedule: port zero only supported "
+ "in persistent services, "
+ "check your ipvs configuration\n");
+ return NULL;
+ }
+
+ dest = svc->scheduler->schedule(svc, skb);
+ if (dest == NULL) {
+ IP_VS_DBG(1, "Schedule: no dest found.\n");
+ return NULL;
+ }
+
+ /*
+ * Create a connection entry.
+ */
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, pptr[0],
+ &iph.daddr, pptr[1],
+ &dest->addr, dest->port ? dest->port : pptr[1],
+ 0,
+ dest);
+ if (cp == NULL)
+ return NULL;
+
+ IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+ "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+ ip_vs_fwd_tag(cp),
+ IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ cp->flags, atomic_read(&cp->refcnt));
+
+ ip_vs_conn_stats(cp, svc);
+ return cp;
+}
+
+
+/*
+ * Pass or drop the packet.
+ * Called by ip_vs_in, when the virtual service is available but
+ * no destination is available for a new connection.
+ */
+int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
+ struct ip_vs_protocol *pp)
+{
+ __be16 _ports[2], *pptr;
+ struct ip_vs_iphdr iph;
+ int unicast;
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+ pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+ if (pptr == NULL) {
+ ip_vs_service_put(svc);
+ return NF_DROP;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+ else
+#endif
+ unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
+ /* if it is fwmark-based service, the cache_bypass sysctl is up
+ and the destination is a non-local unicast, then create
+ a cache_bypass connection entry */
+ if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+ int ret, cs;
+ struct ip_vs_conn *cp;
+ union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
+
+ ip_vs_service_put(svc);
+
+ /* create a new connection entry */
+ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, pptr[0],
+ &iph.daddr, pptr[1],
+ &daddr, 0,
+ IP_VS_CONN_F_BYPASS,
+ NULL);
+ if (cp == NULL)
+ return NF_DROP;
+
+ /* statistics */
+ ip_vs_in_stats(cp, skb);
+
+ /* set state */
+ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+
+ /* transmit the first SYN packet */
+ ret = cp->packet_xmit(skb, cp, pp);
+ /* do not touch skb anymore */
+
+ atomic_inc(&cp->in_pkts);
+ ip_vs_conn_put(cp);
+ return ret;
+ }
+
+ /*
+ * When the virtual ftp service is presented, packets destined
+ * for other services on the VIP may get here (except services
+ * listed in the ipvs table), pass the packets, because it is
+ * not ipvs job to decide to drop the packets.
+ */
+ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
+ ip_vs_service_put(svc);
+ return NF_ACCEPT;
+ }
+
+ ip_vs_service_put(svc);
+
+ /*
+ * Notify the client that the destination is unreachable, and
+ * release the socket buffer.
+ * Since it is in IP layer, the TCP socket is not actually
+ * created, the TCP RST packet cannot be sent, instead that
+ * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+ */
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+ skb->dev);
+ else
+#endif
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ return NF_DROP;
+}
+
+
+/*
+ * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
+ * chain, and is used for VS/NAT.
+ * It detects packets for VS/NAT connections and sends the packets
+ * immediately. This can avoid that iptable_nat mangles the packets
+ * for VS/NAT.
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (!skb->ipvs_property)
+ return NF_ACCEPT;
+ /* The packet was sent from IPVS, exit this chain */
+ return NF_STOP;
+}
+
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+{
+ return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+}
+
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ int err = ip_defrag(skb, user);
+
+ if (!err)
+ ip_send_check(ip_hdr(skb));
+
+ return err;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+ /* TODO IPv6: Find out what to do here for IPv6 */
+ return 0;
+}
+#endif
+
+/*
+ * Packet has been made sufficiently writable in caller
+ * - inout: 1=in->out, 0=out->in
+ */
+void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int inout)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int icmp_offset = iph->ihl*4;
+ struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
+ icmp_offset);
+ struct iphdr *ciph = (struct iphdr *)(icmph + 1);
+
+ if (inout) {
+ iph->saddr = cp->vaddr.ip;
+ ip_send_check(iph);
+ ciph->daddr = cp->vaddr.ip;
+ ip_send_check(ciph);
+ } else {
+ iph->daddr = cp->daddr.ip;
+ ip_send_check(iph);
+ ciph->saddr = cp->daddr.ip;
+ ip_send_check(ciph);
+ }
+
+ /* the TCP/UDP port */
+ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+ __be16 *ports = (void *)ciph + ciph->ihl*4;
+
+ if (inout)
+ ports[1] = cp->vport;
+ else
+ ports[0] = cp->dport;
+ }
+
+ /* And finally the ICMP checksum */
+ icmph->checksum = 0;
+ icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (inout)
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered outgoing ICMP");
+ else
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered incoming ICMP");
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int inout)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int icmp_offset = sizeof(struct ipv6hdr);
+ struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) +
+ icmp_offset);
+ struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1);
+
+ if (inout) {
+ iph->saddr = cp->vaddr.in6;
+ ciph->daddr = cp->vaddr.in6;
+ } else {
+ iph->daddr = cp->daddr.in6;
+ ciph->saddr = cp->daddr.in6;
+ }
+
+ /* the TCP/UDP port */
+ if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+ __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+ if (inout)
+ ports[1] = cp->vport;
+ else
+ ports[0] = cp->dport;
+ }
+
+ /* And finally the ICMP checksum */
+ icmph->icmp6_cksum = 0;
+ /* TODO IPv6: is this correct for ICMPv6? */
+ ip_vs_checksum_complete(skb, icmp_offset);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (inout)
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered outgoing ICMPv6");
+ else
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+ union nf_inet_addr *snet,
+ __u8 protocol, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp,
+ unsigned int offset, unsigned int ihl)
+{
+ unsigned int verdict = NF_DROP;
+
+ if (IP_VS_FWD_METHOD(cp) != 0) {
+ IP_VS_ERR("shouldn't reach here, because the box is on the "
+ "half connection in the tun/dr module.\n");
+ }
+
+ /* Ensure the checksum is correct */
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+ /* Failed checksum! */
+ IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+ IP_VS_DBG_ADDR(af, snet));
+ goto out;
+ }
+
+ if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+ offset += 2 * sizeof(__u16);
+ if (!skb_make_writable(skb, offset))
+ goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+ else
+#endif
+ ip_vs_nat_icmp(skb, pp, cp, 1);
+
+ /* do the statistics and put it back */
+ ip_vs_out_stats(cp, skb);
+
+ skb->ipvs_property = 1;
+ verdict = NF_ACCEPT;
+
+out:
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+
+/*
+ * Handle ICMP messages in the inside-to-outside direction (outgoing).
+ * Find any that might be relevant, check against existing connections.
+ * Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
+{
+ struct iphdr *iph;
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset, ihl;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+ return NF_STOLEN;
+ }
+
+ iph = ip_hdr(skb);
+ offset = ihl = iph->ihl * 4;
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+ ic->type, ntohs(icmp_id(ic)),
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
+ *related = 0;
+ return NF_ACCEPT;
+ }
+
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+ pp = ip_vs_proto_get(cih->protocol);
+ if (!pp)
+ return NF_ACCEPT;
+
+ /* Is the embedded protocol header present? */
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+ pp->dont_defrag))
+ return NF_ACCEPT;
+
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+
+ offset += cih->ihl * 4;
+
+ ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (!cp)
+ return NF_ACCEPT;
+
+ snet.ip = iph->saddr;
+ return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+ pp, offset, ihl);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+ struct ipv6hdr *iph;
+ struct icmp6hdr _icmph, *ic;
+ struct ipv6hdr _ciph, *cih; /* The ip header contained
+ within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+ if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+ return NF_STOLEN;
+ }
+
+ iph = ipv6_hdr(skb);
+ offset = sizeof(struct ipv6hdr);
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ NIP6(iph->saddr), NIP6(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+ (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+ (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ *related = 0;
+ return NF_ACCEPT;
+ }
+
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+ pp = ip_vs_proto_get(cih->nexthdr);
+ if (!pp)
+ return NF_ACCEPT;
+
+ /* Is the embedded protocol header present? */
+ /* TODO: we don't support fragmentation at the moment anyways */
+ if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ return NF_ACCEPT;
+
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+
+ offset += sizeof(struct ipv6hdr);
+
+ ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (!cp)
+ return NF_ACCEPT;
+
+ ipv6_addr_copy(&snet.in6, &iph->saddr);
+ return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+ pp, offset, sizeof(struct ipv6hdr));
+}
+#endif
+
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
+{
+ struct tcphdr _tcph, *th;
+
+ th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return 0;
+ return th->rst;
+}
+
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int ihl)
+{
+ IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+ if (!skb_make_writable(skb, ihl))
+ goto drop;
+
+ /* mangle the packet */
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+ goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+ else
+#endif
+ {
+ ip_hdr(skb)->saddr = cp->vaddr.ip;
+ ip_send_check(ip_hdr(skb));
+ }
+
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (ip6_route_me_harder(skb) != 0)
+ goto drop;
+ } else
+#endif
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto drop;
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+ ip_vs_out_stats(cp, skb);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_conn_put(cp);
+
+ skb->ipvs_property = 1;
+
+ LeaveFunction(11);
+ return NF_ACCEPT;
+
+drop:
+ ip_vs_conn_put(cp);
+ kfree_skb(skb);
+ return NF_STOLEN;
+}
+
+/*
+ * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
+ * Check if outgoing packet belongs to the established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_vs_iphdr iph;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn *cp;
+ int af;
+
+ EnterFunction(11);
+
+ af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+ if (skb->ipvs_property)
+ return NF_ACCEPT;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+ int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+ if (related)
+ return verdict;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+ } else
+#endif
+ if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+ int related, verdict = ip_vs_out_icmp(skb, &related);
+
+ if (related)
+ return verdict;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+
+ pp = ip_vs_proto_get(iph.protocol);
+ if (unlikely(!pp))
+ return NF_ACCEPT;
+
+ /* reassemble IP fragments */
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+ int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+ if (related)
+ return verdict;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+ } else
+#endif
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+ !pp->dont_defrag)) {
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+ return NF_STOLEN;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+
+ /*
+ * Check if the packet belongs to an existing entry
+ */
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+
+ if (unlikely(!cp)) {
+ if (sysctl_ip_vs_nat_icmp_send &&
+ (pp->protocol == IPPROTO_TCP ||
+ pp->protocol == IPPROTO_UDP)) {
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, iph.len,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NF_ACCEPT; /* Not for me */
+ if (ip_vs_lookup_real_service(af, iph.protocol,
+ &iph.saddr,
+ pptr[0])) {
+ /*
+ * Notify the real server: there is no
+ * existing entry if it is not RST
+ * packet or not TCP packet.
+ */
+ if (iph.protocol != IPPROTO_TCP
+ || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ icmpv6_send(skb,
+ ICMPV6_DEST_UNREACH,
+ ICMPV6_PORT_UNREACH,
+ 0, skb->dev);
+ else
+#endif
+ icmp_send(skb,
+ ICMP_DEST_UNREACH,
+ ICMP_PORT_UNREACH, 0);
+ return NF_DROP;
+ }
+ }
+ }
+ IP_VS_DBG_PKT(12, pp, skb, 0,
+ "packet continues traversal as normal");
+ return NF_ACCEPT;
+ }
+
+ return handle_response(af, skb, pp, cp, iph.len);
+}
+
+
+/*
+ * Handle ICMP messages in the outside-to-inside direction (incoming).
+ * Find any that might be relevant, check against existing connections,
+ * forward to the right destination host if relevant.
+ * Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+ struct iphdr *iph;
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset, ihl, verdict;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
+ return NF_STOLEN;
+ }
+
+ iph = ip_hdr(skb);
+ offset = ihl = iph->ihl * 4;
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+ ic->type, ntohs(icmp_id(ic)),
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
+ *related = 0;
+ return NF_ACCEPT;
+ }
+
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+ pp = ip_vs_proto_get(cih->protocol);
+ if (!pp)
+ return NF_ACCEPT;
+
+ /* Is the embedded protocol header present? */
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+ pp->dont_defrag))
+ return NF_ACCEPT;
+
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+
+ offset += cih->ihl * 4;
+
+ ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (!cp) {
+ /* The packet could also belong to a local client */
+ cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (cp) {
+ snet.ip = iph->saddr;
+ return handle_response_icmp(AF_INET, skb, &snet,
+ cih->protocol, cp, pp,
+ offset, ihl);
+ }
+ return NF_ACCEPT;
+ }
+
+ verdict = NF_DROP;
+
+ /* Ensure the checksum is correct */
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+ /* Failed checksum! */
+ IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
+ NIPQUAD(iph->saddr));
+ goto out;
+ }
+
+ /* do the statistics and put it back */
+ ip_vs_in_stats(cp, skb);
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+ offset += 2 * sizeof(__u16);
+ verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
+ /* do not touch skb anymore */
+
+ out:
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+ struct ipv6hdr *iph;
+ struct icmp6hdr _icmph, *ic;
+ struct ipv6hdr _ciph, *cih; /* The ip header contained
+ within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset, verdict;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+ if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+ IP_DEFRAG_VS_IN :
+ IP_DEFRAG_VS_FWD))
+ return NF_STOLEN;
+ }
+
+ iph = ipv6_hdr(skb);
+ offset = sizeof(struct ipv6hdr);
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ NIP6(iph->saddr), NIP6(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+ (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+ (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ *related = 0;
+ return NF_ACCEPT;
+ }
+
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+ pp = ip_vs_proto_get(cih->nexthdr);
+ if (!pp)
+ return NF_ACCEPT;
+
+ /* Is the embedded protocol header present? */
+ /* TODO: we don't support fragmentation at the moment anyways */
+ if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ return NF_ACCEPT;
+
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+ offset += sizeof(struct ipv6hdr);
+
+ ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (!cp) {
+ /* The packet could also belong to a local client */
+ cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (cp) {
+ ipv6_addr_copy(&snet.in6, &iph->saddr);
+ return handle_response_icmp(AF_INET6, skb, &snet,
+ cih->nexthdr,
+ cp, pp, offset,
+ sizeof(struct ipv6hdr));
+ }
+ return NF_ACCEPT;
+ }
+
+ verdict = NF_DROP;
+
+ /* do the statistics and put it back */
+ ip_vs_in_stats(cp, skb);
+ if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+ offset += 2 * sizeof(__u16);
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+ /* do not touch skb anymore */
+
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+#endif
+
+
+/*
+ * Check if it's for virtual services, look it up,
+ * and send it on its way...
+ */
+static unsigned int
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_vs_iphdr iph;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn *cp;
+ int ret, restart, af;
+
+ af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ /*
+ * Big tappo: only PACKET_HOST, including loopback for local client
+ * Don't handle local packets on IPv6 for now
+ */
+ if (unlikely(skb->pkt_type != PACKET_HOST)) {
+ IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+ skb->pkt_type,
+ iph.protocol,
+ IP_VS_DBG_ADDR(af, &iph.daddr));
+ return NF_ACCEPT;
+ }
+
+ if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+ int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
+
+ if (related)
+ return verdict;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+
+ /* Protocol supported? */
+ pp = ip_vs_proto_get(iph.protocol);
+ if (unlikely(!pp))
+ return NF_ACCEPT;
+
+ /*
+ * Check if the packet belongs to an existing connection entry
+ */
+ cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+
+ if (unlikely(!cp)) {
+ int v;
+
+ /* For local client packets, it could be a response */
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ if (cp)
+ return handle_response(af, skb, pp, cp, iph.len);
+
+ if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+ return v;
+ }
+
+ if (unlikely(!cp)) {
+ /* sorry, all this trouble for a no-hit :) */
+ IP_VS_DBG_PKT(12, pp, skb, 0,
+ "packet continues traversal as normal");
+ return NF_ACCEPT;
+ }
+
+ IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+
+ /* Check the server status */
+ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ /* the destination server is not available */
+
+ if (sysctl_ip_vs_expire_nodest_conn) {
+ /* try to expire the connection immediately */
+ ip_vs_conn_expire_now(cp);
+ }
+ /* don't restart its timer, and silently
+ drop the packet. */
+ __ip_vs_conn_put(cp);
+ return NF_DROP;
+ }
+
+ ip_vs_in_stats(cp, skb);
+ restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ if (cp->packet_xmit)
+ ret = cp->packet_xmit(skb, cp, pp);
+ /* do not touch skb anymore */
+ else {
+ IP_VS_DBG_RL("warning: packet_xmit is null");
+ ret = NF_ACCEPT;
+ }
+
+ /* Increase its packet counter and check if it is needed
+ * to be synchronized
+ *
+ * Sync connection if it is about to close to
+ * encorage the standby servers to update the connections timeout
+ */
+ atomic_inc(&cp->in_pkts);
+ if (af == AF_INET &&
+ (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ (((cp->protocol != IPPROTO_TCP ||
+ cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+ (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+ == sysctl_ip_vs_sync_threshold[0])) ||
+ ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+ ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+ (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+ (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
+ ip_vs_sync_conn(cp);
+ cp->old_state = cp->state;
+
+ ip_vs_conn_put(cp);
+ return ret;
+}
+
+
+/*
+ * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
+ * related packets destined for 0.0.0.0/0.
+ * When fwmark-based virtual service is used, such as transparent
+ * cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
+ * and send them to ip_vs_in_icmp.
+ */
+static unsigned int
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ int r;
+
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+ return NF_ACCEPT;
+
+ return ip_vs_in_icmp(skb, &r, hooknum);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ int r;
+
+ if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+ return NF_ACCEPT;
+
+ return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
+
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+ /* After packet filtering, forward packet through VS/DR, VS/TUN,
+ * or VS/NAT(change destination), so that filtering rules can be
+ * applied to IPVS. */
+ {
+ .hook = ip_vs_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 100,
+ },
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
+ },
+ /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+ {
+ .hook = ip_vs_forward_icmp,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
+ },
+ /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ {
+ .hook = ip_vs_post_routing,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC-1,
+ },
+#ifdef CONFIG_IP_VS_IPV6
+ /* After packet filtering, forward packet through VS/DR, VS/TUN,
+ * or VS/NAT(change destination), so that filtering rules can be
+ * applied to IPVS. */
+ {
+ .hook = ip_vs_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 100,
+ },
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
+ },
+ /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+ {
+ .hook = ip_vs_forward_icmp_v6,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
+ },
+ /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ {
+ .hook = ip_vs_post_routing,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC-1,
+ },
+#endif
+};
+
+
+/*
+ * Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+ int ret;
+
+ ip_vs_estimator_init();
+
+ ret = ip_vs_control_init();
+ if (ret < 0) {
+ IP_VS_ERR("can't setup control.\n");
+ goto cleanup_estimator;
+ }
+
+ ip_vs_protocol_init();
+
+ ret = ip_vs_app_init();
+ if (ret < 0) {
+ IP_VS_ERR("can't setup application helper.\n");
+ goto cleanup_protocol;
+ }
+
+ ret = ip_vs_conn_init();
+ if (ret < 0) {
+ IP_VS_ERR("can't setup connection table.\n");
+ goto cleanup_app;
+ }
+
+ ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ if (ret < 0) {
+ IP_VS_ERR("can't register hooks.\n");
+ goto cleanup_conn;
+ }
+
+ IP_VS_INFO("ipvs loaded.\n");
+ return ret;
+
+ cleanup_conn:
+ ip_vs_conn_cleanup();
+ cleanup_app:
+ ip_vs_app_cleanup();
+ cleanup_protocol:
+ ip_vs_protocol_cleanup();
+ ip_vs_control_cleanup();
+ cleanup_estimator:
+ ip_vs_estimator_cleanup();
+ return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+ nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ip_vs_conn_cleanup();
+ ip_vs_app_cleanup();
+ ip_vs_protocol_cleanup();
+ ip_vs_control_cleanup();
+ ip_vs_estimator_cleanup();
+ IP_VS_INFO("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
new file mode 100644
index 0000000..0302cf3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -0,0 +1,3443 @@
+/*
+ * IPVS An implementation of the IP virtual server support for the
+ * LINUX operating system. IPVS is now implemented as a module
+ * over the NetFilter framework. IPVS can be used to build a
+ * high-performance and highly available server based on a
+ * cluster of servers.
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/swap.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/mutex.h>
+
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+#include <asm/uaccess.h>
+
+#include <net/ip_vs.h>
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DEFINE_MUTEX(__ip_vs_mutex);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_svc_lock);
+
+/* lock for table with the real services */
+static DEFINE_RWLOCK(__ip_vs_rs_lock);
+
+/* lock for state and timeout tables */
+static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
+
+/* lock for drop entry handling */
+static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
+
+/* lock for drop packet handling */
+static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
+
+/* 1/rate drop and drop-entry variables */
+int ip_vs_drop_rate = 0;
+int ip_vs_drop_counter = 0;
+static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+
+/* number of virtual services */
+static int ip_vs_num_services = 0;
+
+/* sysctl variables */
+static int sysctl_ip_vs_drop_entry = 0;
+static int sysctl_ip_vs_drop_packet = 0;
+static int sysctl_ip_vs_secure_tcp = 0;
+static int sysctl_ip_vs_amemthresh = 1024;
+static int sysctl_ip_vs_am_droprate = 10;
+int sysctl_ip_vs_cache_bypass = 0;
+int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_expire_quiescent_template = 0;
+int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+int sysctl_ip_vs_nat_icmp_send = 0;
+
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+ return sysctl_ip_vs_debug_level;
+}
+#endif
+
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+ struct rt6_info *rt;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *addr,
+ .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+ return 1;
+
+ return 0;
+}
+#endif
+/*
+ * update_defense_level is called from keventd and from sysctl,
+ * so it needs to protect itself from softirqs
+ */
+static void update_defense_level(void)
+{
+ struct sysinfo i;
+ static int old_secure_tcp = 0;
+ int availmem;
+ int nomem;
+ int to_change = -1;
+
+ /* we only count free and buffered memory (in pages) */
+ si_meminfo(&i);
+ availmem = i.freeram + i.bufferram;
+ /* however in linux 2.5 the i.bufferram is total page cache size,
+ we need adjust it */
+ /* si_swapinfo(&i); */
+ /* availmem = availmem - (i.totalswap - i.freeswap); */
+
+ nomem = (availmem < sysctl_ip_vs_amemthresh);
+
+ local_bh_disable();
+
+ /* drop_entry */
+ spin_lock(&__ip_vs_dropentry_lock);
+ switch (sysctl_ip_vs_drop_entry) {
+ case 0:
+ atomic_set(&ip_vs_dropentry, 0);
+ break;
+ case 1:
+ if (nomem) {
+ atomic_set(&ip_vs_dropentry, 1);
+ sysctl_ip_vs_drop_entry = 2;
+ } else {
+ atomic_set(&ip_vs_dropentry, 0);
+ }
+ break;
+ case 2:
+ if (nomem) {
+ atomic_set(&ip_vs_dropentry, 1);
+ } else {
+ atomic_set(&ip_vs_dropentry, 0);
+ sysctl_ip_vs_drop_entry = 1;
+ };
+ break;
+ case 3:
+ atomic_set(&ip_vs_dropentry, 1);
+ break;
+ }
+ spin_unlock(&__ip_vs_dropentry_lock);
+
+ /* drop_packet */
+ spin_lock(&__ip_vs_droppacket_lock);
+ switch (sysctl_ip_vs_drop_packet) {
+ case 0:
+ ip_vs_drop_rate = 0;
+ break;
+ case 1:
+ if (nomem) {
+ ip_vs_drop_rate = ip_vs_drop_counter
+ = sysctl_ip_vs_amemthresh /
+ (sysctl_ip_vs_amemthresh-availmem);
+ sysctl_ip_vs_drop_packet = 2;
+ } else {
+ ip_vs_drop_rate = 0;
+ }
+ break;
+ case 2:
+ if (nomem) {
+ ip_vs_drop_rate = ip_vs_drop_counter
+ = sysctl_ip_vs_amemthresh /
+ (sysctl_ip_vs_amemthresh-availmem);
+ } else {
+ ip_vs_drop_rate = 0;
+ sysctl_ip_vs_drop_packet = 1;
+ }
+ break;
+ case 3:
+ ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+ break;
+ }
+ spin_unlock(&__ip_vs_droppacket_lock);
+
+ /* secure_tcp */
+ write_lock(&__ip_vs_securetcp_lock);
+ switch (sysctl_ip_vs_secure_tcp) {
+ case 0:
+ if (old_secure_tcp >= 2)
+ to_change = 0;
+ break;
+ case 1:
+ if (nomem) {
+ if (old_secure_tcp < 2)
+ to_change = 1;
+ sysctl_ip_vs_secure_tcp = 2;
+ } else {
+ if (old_secure_tcp >= 2)
+ to_change = 0;
+ }
+ break;
+ case 2:
+ if (nomem) {
+ if (old_secure_tcp < 2)
+ to_change = 1;
+ } else {
+ if (old_secure_tcp >= 2)
+ to_change = 0;
+ sysctl_ip_vs_secure_tcp = 1;
+ }
+ break;
+ case 3:
+ if (old_secure_tcp < 2)
+ to_change = 1;
+ break;
+ }
+ old_secure_tcp = sysctl_ip_vs_secure_tcp;
+ if (to_change >= 0)
+ ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+ write_unlock(&__ip_vs_securetcp_lock);
+
+ local_bh_enable();
+}
+
+
+/*
+ * Timer for checking the defense
+ */
+#define DEFENSE_TIMER_PERIOD 1*HZ
+static void defense_work_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
+
+static void defense_work_handler(struct work_struct *work)
+{
+ update_defense_level();
+ if (atomic_read(&ip_vs_dropentry))
+ ip_vs_random_dropentry();
+
+ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+}
+
+int
+ip_vs_use_count_inc(void)
+{
+ return try_module_get(THIS_MODULE);
+}
+
+void
+ip_vs_use_count_dec(void)
+{
+ module_put(THIS_MODULE);
+}
+
+
+/*
+ * Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+/*
+ * Hash table: for real service lookups
+ */
+#define IP_VS_RTAB_BITS 4
+#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
+
+/*
+ * Trash for destinations
+ */
+static LIST_HEAD(ip_vs_dest_trash);
+
+/*
+ * FTP & NULL virtual service counters
+ */
+static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
+static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
+
+
+/*
+ * Returns hash value for virtual service
+ */
+static __inline__ unsigned
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+ __be16 port)
+{
+ register unsigned porth = ntohs(port);
+ __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ addr_fold = addr->ip6[0]^addr->ip6[1]^
+ addr->ip6[2]^addr->ip6[3];
+#endif
+
+ return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+ & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ * Returns hash value of fwmark for virtual service lookup
+ */
+static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+{
+ return fwmark & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ * or in the ip_vs_svc_fwm_table by fwmark.
+ * Should be called with locked tables.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+ unsigned hash;
+
+ if (svc->flags & IP_VS_SVC_F_HASHED) {
+ IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
+ "called from %p\n", __builtin_return_address(0));
+ return 0;
+ }
+
+ if (svc->fwmark == 0) {
+ /*
+ * Hash it by <protocol,addr,port> in ip_vs_svc_table
+ */
+ hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
+ svc->port);
+ list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+ } else {
+ /*
+ * Hash it by fwmark in ip_vs_svc_fwm_table
+ */
+ hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+ list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+ }
+
+ svc->flags |= IP_VS_SVC_F_HASHED;
+ /* increase its refcnt because it is referenced by the svc table */
+ atomic_inc(&svc->refcnt);
+ return 1;
+}
+
+
+/*
+ * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ * Should be called with locked tables.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+ if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+ IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
+ "called from %p\n", __builtin_return_address(0));
+ return 0;
+ }
+
+ if (svc->fwmark == 0) {
+ /* Remove it from the ip_vs_svc_table table */
+ list_del(&svc->s_list);
+ } else {
+ /* Remove it from the ip_vs_svc_fwm_table table */
+ list_del(&svc->f_list);
+ }
+
+ svc->flags &= ~IP_VS_SVC_F_HASHED;
+ atomic_dec(&svc->refcnt);
+ return 1;
+}
+
+
+/*
+ * Get service by {proto,addr,port} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+ __be16 vport)
+{
+ unsigned hash;
+ struct ip_vs_service *svc;
+
+ /* Check for "full" addressed entries */
+ hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+
+ list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+ if ((svc->af == af)
+ && ip_vs_addr_equal(af, &svc->addr, vaddr)
+ && (svc->port == vport)
+ && (svc->protocol == protocol)) {
+ /* HIT */
+ atomic_inc(&svc->usecnt);
+ return svc;
+ }
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Get service by {fwmark} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
+{
+ unsigned hash;
+ struct ip_vs_service *svc;
+
+ /* Check for fwmark addressed entries */
+ hash = ip_vs_svc_fwm_hashkey(fwmark);
+
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+ if (svc->fwmark == fwmark && svc->af == af) {
+ /* HIT */
+ atomic_inc(&svc->usecnt);
+ return svc;
+ }
+ }
+
+ return NULL;
+}
+
+struct ip_vs_service *
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
+{
+ struct ip_vs_service *svc;
+
+ read_lock(&__ip_vs_svc_lock);
+
+ /*
+ * Check the table hashed by fwmark first
+ */
+ if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
+ goto out;
+
+ /*
+ * Check the table hashed by <protocol,addr,port>
+ * for "full" addressed entries
+ */
+ svc = __ip_vs_service_get(af, protocol, vaddr, vport);
+
+ if (svc == NULL
+ && protocol == IPPROTO_TCP
+ && atomic_read(&ip_vs_ftpsvc_counter)
+ && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+ /*
+ * Check if ftp service entry exists, the packet
+ * might belong to FTP data connections.
+ */
+ svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
+ }
+
+ if (svc == NULL
+ && atomic_read(&ip_vs_nullsvc_counter)) {
+ /*
+ * Check if the catch-all port (port zero) exists
+ */
+ svc = __ip_vs_service_get(af, protocol, vaddr, 0);
+ }
+
+ out:
+ read_unlock(&__ip_vs_svc_lock);
+
+ IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+ fwmark, ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+ svc ? "hit" : "not hit");
+
+ return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+ atomic_inc(&svc->refcnt);
+ dest->svc = svc;
+}
+
+static inline void
+__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+{
+ struct ip_vs_service *svc = dest->svc;
+
+ dest->svc = NULL;
+ if (atomic_dec_and_test(&svc->refcnt))
+ kfree(svc);
+}
+
+
+/*
+ * Returns hash value for real service
+ */
+static inline unsigned ip_vs_rs_hashkey(int af,
+ const union nf_inet_addr *addr,
+ __be16 port)
+{
+ register unsigned porth = ntohs(port);
+ __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ addr_fold = addr->ip6[0]^addr->ip6[1]^
+ addr->ip6[2]^addr->ip6[3];
+#endif
+
+ return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
+ & IP_VS_RTAB_MASK;
+}
+
+/*
+ * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ * should be called with locked tables.
+ */
+static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+{
+ unsigned hash;
+
+ if (!list_empty(&dest->d_list)) {
+ return 0;
+ }
+
+ /*
+ * Hash by proto,addr,port,
+ * which are the parameters of the real service.
+ */
+ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
+ list_add(&dest->d_list, &ip_vs_rtable[hash]);
+
+ return 1;
+}
+
+/*
+ * UNhashes ip_vs_dest from ip_vs_rtable.
+ * should be called with locked tables.
+ */
+static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+ /*
+ * Remove it from the ip_vs_rtable table.
+ */
+ if (!list_empty(&dest->d_list)) {
+ list_del(&dest->d_list);
+ INIT_LIST_HEAD(&dest->d_list);
+ }
+
+ return 1;
+}
+
+/*
+ * Lookup real service by <proto,addr,port> in the real service table.
+ */
+struct ip_vs_dest *
+ip_vs_lookup_real_service(int af, __u16 protocol,
+ const union nf_inet_addr *daddr,
+ __be16 dport)
+{
+ unsigned hash;
+ struct ip_vs_dest *dest;
+
+ /*
+ * Check for "full" addressed entries
+ * Return the first found entry
+ */
+ hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+ read_lock(&__ip_vs_rs_lock);
+ list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+ if ((dest->af == af)
+ && ip_vs_addr_equal(af, &dest->addr, daddr)
+ && (dest->port == dport)
+ && ((dest->protocol == protocol) ||
+ dest->vfwmark)) {
+ /* HIT */
+ read_unlock(&__ip_vs_rs_lock);
+ return dest;
+ }
+ }
+ read_unlock(&__ip_vs_rs_lock);
+
+ return NULL;
+}
+
+/*
+ * Lookup destination by {addr,port} in the given service
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+ __be16 dport)
+{
+ struct ip_vs_dest *dest;
+
+ /*
+ * Find the destination for the given service
+ */
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if ((dest->af == svc->af)
+ && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+ && (dest->port == dport)) {
+ /* HIT */
+ return dest;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+ __be16 dport,
+ const union nf_inet_addr *vaddr,
+ __be16 vport, __u16 protocol)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_service *svc;
+
+ svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+ if (!svc)
+ return NULL;
+ dest = ip_vs_lookup_dest(svc, daddr, dport);
+ if (dest)
+ atomic_inc(&dest->refcnt);
+ ip_vs_service_put(svc);
+ return dest;
+}
+
+/*
+ * Lookup dest by {svc,addr,port} in the destination trash.
+ * The destination trash is used to hold the destinations that are removed
+ * from the service table but are still referenced by some conn entries.
+ * The reason to add the destination trash is when the dest is temporary
+ * down (either by administrator or by monitor program), the dest can be
+ * picked back from the trash, the remaining connections to the dest can
+ * continue, and the counting information of the dest is also useful for
+ * scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+ __be16 dport)
+{
+ struct ip_vs_dest *dest, *nxt;
+
+ /*
+ * Find the destination in trash
+ */
+ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+ "dest->refcnt=%d\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port),
+ atomic_read(&dest->refcnt));
+ if (dest->af == svc->af &&
+ ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+ dest->port == dport &&
+ dest->vfwmark == svc->fwmark &&
+ dest->protocol == svc->protocol &&
+ (svc->fwmark ||
+ (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
+ dest->vport == svc->port))) {
+ /* HIT */
+ return dest;
+ }
+
+ /*
+ * Try to purge the destination from trash if not referenced
+ */
+ if (atomic_read(&dest->refcnt) == 1) {
+ IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+ "from trash\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port));
+ list_del(&dest->n_list);
+ ip_vs_dst_reset(dest);
+ __ip_vs_unbind_svc(dest);
+ kfree(dest);
+ }
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Clean up all the destinations in the trash
+ * Called by the ip_vs_control_cleanup()
+ *
+ * When the ip_vs_control_clearup is activated by ipvs module exit,
+ * the service tables must have been flushed and all the connections
+ * are expired, and the refcnt of each destination in the trash must
+ * be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(void)
+{
+ struct ip_vs_dest *dest, *nxt;
+
+ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ list_del(&dest->n_list);
+ ip_vs_dst_reset(dest);
+ __ip_vs_unbind_svc(dest);
+ kfree(dest);
+ }
+}
+
+
+static void
+ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+ spin_lock_bh(&stats->lock);
+
+ memset(&stats->ustats, 0, sizeof(stats->ustats));
+ ip_vs_zero_estimator(stats);
+
+ spin_unlock_bh(&stats->lock);
+}
+
+/*
+ * Update a destination in the given service
+ */
+static void
+__ip_vs_update_dest(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
+{
+ int conn_flags;
+
+ /* set the weight and the flags */
+ atomic_set(&dest->weight, udest->weight);
+ conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+ /* check if local node and update the flags */
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6) {
+ if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+ conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ | IP_VS_CONN_F_LOCALNODE;
+ }
+ } else
+#endif
+ if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+ conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ | IP_VS_CONN_F_LOCALNODE;
+ }
+
+ /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
+ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+ conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+ } else {
+ /*
+ * Put the real service in ip_vs_rtable if not present.
+ * For now only for NAT!
+ */
+ write_lock_bh(&__ip_vs_rs_lock);
+ ip_vs_rs_hash(dest);
+ write_unlock_bh(&__ip_vs_rs_lock);
+ }
+ atomic_set(&dest->conn_flags, conn_flags);
+
+ /* bind the service */
+ if (!dest->svc) {
+ __ip_vs_bind_svc(dest, svc);
+ } else {
+ if (dest->svc != svc) {
+ __ip_vs_unbind_svc(dest);
+ ip_vs_zero_stats(&dest->stats);
+ __ip_vs_bind_svc(dest, svc);
+ }
+ }
+
+ /* set the dest status flags */
+ dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+ if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ dest->u_threshold = udest->u_threshold;
+ dest->l_threshold = udest->l_threshold;
+}
+
+
+/*
+ * Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
+ struct ip_vs_dest **dest_p)
+{
+ struct ip_vs_dest *dest;
+ unsigned atype;
+
+ EnterFunction(2);
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6) {
+ atype = ipv6_addr_type(&udest->addr.in6);
+ if ((!(atype & IPV6_ADDR_UNICAST) ||
+ atype & IPV6_ADDR_LINKLOCAL) &&
+ !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ return -EINVAL;
+ } else
+#endif
+ {
+ atype = inet_addr_type(&init_net, udest->addr.ip);
+ if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+ return -EINVAL;
+ }
+
+ dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+ if (dest == NULL) {
+ IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+ return -ENOMEM;
+ }
+
+ dest->af = svc->af;
+ dest->protocol = svc->protocol;
+ dest->vaddr = svc->addr;
+ dest->vport = svc->port;
+ dest->vfwmark = svc->fwmark;
+ ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+ dest->port = udest->port;
+
+ atomic_set(&dest->activeconns, 0);
+ atomic_set(&dest->inactconns, 0);
+ atomic_set(&dest->persistconns, 0);
+ atomic_set(&dest->refcnt, 0);
+
+ INIT_LIST_HEAD(&dest->d_list);
+ spin_lock_init(&dest->dst_lock);
+ spin_lock_init(&dest->stats.lock);
+ __ip_vs_update_dest(svc, dest, udest);
+ ip_vs_new_estimator(&dest->stats);
+
+ *dest_p = dest;
+
+ LeaveFunction(2);
+ return 0;
+}
+
+
+/*
+ * Add a destination into an existing service
+ */
+static int
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+ struct ip_vs_dest *dest;
+ union nf_inet_addr daddr;
+ __be16 dport = udest->port;
+ int ret;
+
+ EnterFunction(2);
+
+ if (udest->weight < 0) {
+ IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+ return -ERANGE;
+ }
+
+ if (udest->l_threshold > udest->u_threshold) {
+ IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
+ "upper threshold\n");
+ return -ERANGE;
+ }
+
+ ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+ /*
+ * Check if the dest already exists in the list
+ */
+ dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+ if (dest != NULL) {
+ IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+ return -EEXIST;
+ }
+
+ /*
+ * Check if the dest already exists in the trash and
+ * is from the same service
+ */
+ dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
+ if (dest != NULL) {
+ IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+ "dest->refcnt=%d, service %u/%s:%u\n",
+ IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ atomic_read(&dest->refcnt),
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+ ntohs(dest->vport));
+
+ __ip_vs_update_dest(svc, dest, udest);
+
+ /*
+ * Get the destination from the trash
+ */
+ list_del(&dest->n_list);
+
+ ip_vs_new_estimator(&dest->stats);
+
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ /*
+ * Wait until all other svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ list_add(&dest->n_list, &svc->destinations);
+ svc->num_dests++;
+
+ /* call the update_service function of its scheduler */
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
+
+ write_unlock_bh(&__ip_vs_svc_lock);
+ return 0;
+ }
+
+ /*
+ * Allocate and initialize the dest structure
+ */
+ ret = ip_vs_new_dest(svc, udest, &dest);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * Add the dest entry into the list
+ */
+ atomic_inc(&dest->refcnt);
+
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ /*
+ * Wait until all other svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ list_add(&dest->n_list, &svc->destinations);
+ svc->num_dests++;
+
+ /* call the update_service function of its scheduler */
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
+
+ write_unlock_bh(&__ip_vs_svc_lock);
+
+ LeaveFunction(2);
+
+ return 0;
+}
+
+
+/*
+ * Edit a destination in the given service
+ */
+static int
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+ struct ip_vs_dest *dest;
+ union nf_inet_addr daddr;
+ __be16 dport = udest->port;
+
+ EnterFunction(2);
+
+ if (udest->weight < 0) {
+ IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+ return -ERANGE;
+ }
+
+ if (udest->l_threshold > udest->u_threshold) {
+ IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
+ "upper threshold\n");
+ return -ERANGE;
+ }
+
+ ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+ /*
+ * Lookup the destination list
+ */
+ dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+ if (dest == NULL) {
+ IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+ return -ENOENT;
+ }
+
+ __ip_vs_update_dest(svc, dest, udest);
+
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ /* Wait until all other svc users go away */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ /* call the update_service, because server weight may be changed */
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
+
+ write_unlock_bh(&__ip_vs_svc_lock);
+
+ LeaveFunction(2);
+
+ return 0;
+}
+
+
+/*
+ * Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+{
+ ip_vs_kill_estimator(&dest->stats);
+
+ /*
+ * Remove it from the d-linked list with the real services.
+ */
+ write_lock_bh(&__ip_vs_rs_lock);
+ ip_vs_rs_unhash(dest);
+ write_unlock_bh(&__ip_vs_rs_lock);
+
+ /*
+ * Decrease the refcnt of the dest, and free the dest
+ * if nobody refers to it (refcnt=0). Otherwise, throw
+ * the destination into the trash.
+ */
+ if (atomic_dec_and_test(&dest->refcnt)) {
+ ip_vs_dst_reset(dest);
+ /* simply decrease svc->refcnt here, let the caller check
+ and release the service if nobody refers to it.
+ Only user context can release destination and service,
+ and only one user context can update virtual service at a
+ time, so the operation here is OK */
+ atomic_dec(&dest->svc->refcnt);
+ kfree(dest);
+ } else {
+ IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+ "dest->refcnt=%d\n",
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
+ ntohs(dest->port),
+ atomic_read(&dest->refcnt));
+ list_add(&dest->n_list, &ip_vs_dest_trash);
+ atomic_inc(&dest->refcnt);
+ }
+}
+
+
+/*
+ * Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest,
+ int svcupd)
+{
+ dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+ /*
+ * Remove it from the d-linked destination list.
+ */
+ list_del(&dest->n_list);
+ svc->num_dests--;
+
+ /*
+ * Call the update_service function of its scheduler
+ */
+ if (svcupd && svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
+}
+
+
+/*
+ * Delete a destination server in the given service
+ */
+static int
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+ struct ip_vs_dest *dest;
+ __be16 dport = udest->port;
+
+ EnterFunction(2);
+
+ dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+
+ if (dest == NULL) {
+ IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+ return -ENOENT;
+ }
+
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ /*
+ * Wait until all other svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ /*
+ * Unlink dest from the service
+ */
+ __ip_vs_unlink_dest(svc, dest, 1);
+
+ write_unlock_bh(&__ip_vs_svc_lock);
+
+ /*
+ * Delete the destination
+ */
+ __ip_vs_del_dest(dest);
+
+ LeaveFunction(2);
+
+ return 0;
+}
+
+
+/*
+ * Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ struct ip_vs_service **svc_p)
+{
+ int ret = 0;
+ struct ip_vs_scheduler *sched = NULL;
+ struct ip_vs_service *svc = NULL;
+
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+
+ /* Lookup the scheduler by 'u->sched_name' */
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (sched == NULL) {
+ IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ ret = -ENOENT;
+ goto out_mod_dec;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (u->af == AF_INET6) {
+ if (!sched->supports_ipv6) {
+ ret = -EAFNOSUPPORT;
+ goto out_err;
+ }
+ if ((u->netmask < 1) || (u->netmask > 128)) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+ }
+#endif
+
+ svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+ if (svc == NULL) {
+ IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ /* I'm the first user of the service */
+ atomic_set(&svc->usecnt, 1);
+ atomic_set(&svc->refcnt, 0);
+
+ svc->af = u->af;
+ svc->protocol = u->protocol;
+ ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
+ svc->port = u->port;
+ svc->fwmark = u->fwmark;
+ svc->flags = u->flags;
+ svc->timeout = u->timeout * HZ;
+ svc->netmask = u->netmask;
+
+ INIT_LIST_HEAD(&svc->destinations);
+ rwlock_init(&svc->sched_lock);
+ spin_lock_init(&svc->stats.lock);
+
+ /* Bind the scheduler */
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret)
+ goto out_err;
+ sched = NULL;
+
+ /* Update the virtual service counters */
+ if (svc->port == FTPPORT)
+ atomic_inc(&ip_vs_ftpsvc_counter);
+ else if (svc->port == 0)
+ atomic_inc(&ip_vs_nullsvc_counter);
+
+ ip_vs_new_estimator(&svc->stats);
+
+ /* Count only IPv4 services for old get/setsockopt interface */
+ if (svc->af == AF_INET)
+ ip_vs_num_services++;
+
+ /* Hash the service into the service table */
+ write_lock_bh(&__ip_vs_svc_lock);
+ ip_vs_svc_hash(svc);
+ write_unlock_bh(&__ip_vs_svc_lock);
+
+ *svc_p = svc;
+ return 0;
+
+ out_err:
+ if (svc != NULL) {
+ if (svc->scheduler)
+ ip_vs_unbind_scheduler(svc);
+ if (svc->inc) {
+ local_bh_disable();
+ ip_vs_app_inc_put(svc->inc);
+ local_bh_enable();
+ }
+ kfree(svc);
+ }
+ ip_vs_scheduler_put(sched);
+
+ out_mod_dec:
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+
+ return ret;
+}
+
+
+/*
+ * Edit a service and bind it with a new scheduler
+ */
+static int
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
+{
+ struct ip_vs_scheduler *sched, *old_sched;
+ int ret = 0;
+
+ /*
+ * Lookup the scheduler, by 'u->sched_name'
+ */
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (sched == NULL) {
+ IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ return -ENOENT;
+ }
+ old_sched = sched;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (u->af == AF_INET6) {
+ if (!sched->supports_ipv6) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+ if ((u->netmask < 1) || (u->netmask > 128)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+#endif
+
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ /*
+ * Wait until all other svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ /*
+ * Set the flags and timeout value
+ */
+ svc->flags = u->flags | IP_VS_SVC_F_HASHED;
+ svc->timeout = u->timeout * HZ;
+ svc->netmask = u->netmask;
+
+ old_sched = svc->scheduler;
+ if (sched != old_sched) {
+ /*
+ * Unbind the old scheduler
+ */
+ if ((ret = ip_vs_unbind_scheduler(svc))) {
+ old_sched = sched;
+ goto out_unlock;
+ }
+
+ /*
+ * Bind the new scheduler
+ */
+ if ((ret = ip_vs_bind_scheduler(svc, sched))) {
+ /*
+ * If ip_vs_bind_scheduler fails, restore the old
+ * scheduler.
+ * The main reason of failure is out of memory.
+ *
+ * The question is if the old scheduler can be
+ * restored all the time. TODO: if it cannot be
+ * restored some time, we must delete the service,
+ * otherwise the system may crash.
+ */
+ ip_vs_bind_scheduler(svc, old_sched);
+ old_sched = sched;
+ goto out_unlock;
+ }
+ }
+
+ out_unlock:
+ write_unlock_bh(&__ip_vs_svc_lock);
+#ifdef CONFIG_IP_VS_IPV6
+ out:
+#endif
+
+ if (old_sched)
+ ip_vs_scheduler_put(old_sched);
+
+ return ret;
+}
+
+
+/*
+ * Delete a service from the service list
+ * - The service must be unlinked, unlocked and not referenced!
+ * - We are called under _bh lock
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc)
+{
+ struct ip_vs_dest *dest, *nxt;
+ struct ip_vs_scheduler *old_sched;
+
+ /* Count only IPv4 services for old get/setsockopt interface */
+ if (svc->af == AF_INET)
+ ip_vs_num_services--;
+
+ ip_vs_kill_estimator(&svc->stats);
+
+ /* Unbind scheduler */
+ old_sched = svc->scheduler;
+ ip_vs_unbind_scheduler(svc);
+ if (old_sched)
+ ip_vs_scheduler_put(old_sched);
+
+ /* Unbind app inc */
+ if (svc->inc) {
+ ip_vs_app_inc_put(svc->inc);
+ svc->inc = NULL;
+ }
+
+ /*
+ * Unlink the whole destination list
+ */
+ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
+ __ip_vs_unlink_dest(svc, dest, 0);
+ __ip_vs_del_dest(dest);
+ }
+
+ /*
+ * Update the virtual service counters
+ */
+ if (svc->port == FTPPORT)
+ atomic_dec(&ip_vs_ftpsvc_counter);
+ else if (svc->port == 0)
+ atomic_dec(&ip_vs_nullsvc_counter);
+
+ /*
+ * Free the service if nobody refers to it
+ */
+ if (atomic_read(&svc->refcnt) == 0)
+ kfree(svc);
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+}
+
+/*
+ * Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+ if (svc == NULL)
+ return -EEXIST;
+
+ /*
+ * Unhash it from the service table
+ */
+ write_lock_bh(&__ip_vs_svc_lock);
+
+ ip_vs_svc_unhash(svc);
+
+ /*
+ * Wait until all the svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+ __ip_vs_del_service(svc);
+
+ write_unlock_bh(&__ip_vs_svc_lock);
+
+ return 0;
+}
+
+
+/*
+ * Flush all the virtual services
+ */
+static int ip_vs_flush(void)
+{
+ int idx;
+ struct ip_vs_service *svc, *nxt;
+
+ /*
+ * Flush the service table hashed by <protocol,addr,port>
+ */
+ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
+ write_lock_bh(&__ip_vs_svc_lock);
+ ip_vs_svc_unhash(svc);
+ /*
+ * Wait until all the svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+ __ip_vs_del_service(svc);
+ write_unlock_bh(&__ip_vs_svc_lock);
+ }
+ }
+
+ /*
+ * Flush the service table hashed by fwmark
+ */
+ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry_safe(svc, nxt,
+ &ip_vs_svc_fwm_table[idx], f_list) {
+ write_lock_bh(&__ip_vs_svc_lock);
+ ip_vs_svc_unhash(svc);
+ /*
+ * Wait until all the svc users go away.
+ */
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+ __ip_vs_del_service(svc);
+ write_unlock_bh(&__ip_vs_svc_lock);
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Zero counters in a service or all services
+ */
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+ struct ip_vs_dest *dest;
+
+ write_lock_bh(&__ip_vs_svc_lock);
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ ip_vs_zero_stats(&dest->stats);
+ }
+ ip_vs_zero_stats(&svc->stats);
+ write_unlock_bh(&__ip_vs_svc_lock);
+ return 0;
+}
+
+static int ip_vs_zero_all(void)
+{
+ int idx;
+ struct ip_vs_service *svc;
+
+ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ ip_vs_zero_service(svc);
+ }
+ }
+
+ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ ip_vs_zero_service(svc);
+ }
+ }
+
+ ip_vs_zero_stats(&ip_vs_stats);
+ return 0;
+}
+
+
+static int
+proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ if ((*valp < 0) || (*valp > 3)) {
+ /* Restore the correct value */
+ *valp = val;
+ } else {
+ update_defense_level();
+ }
+ }
+ return rc;
+}
+
+
+static int
+proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val[2];
+ int rc;
+
+ /* backup the value first */
+ memcpy(val, valp, sizeof(val));
+
+ rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+ /* Restore the correct value */
+ memcpy(valp, val, sizeof(val));
+ }
+ return rc;
+}
+
+
+/*
+ * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ */
+
+static struct ctl_table vs_vars[] = {
+ {
+ .procname = "amemthresh",
+ .data = &sysctl_ip_vs_amemthresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#ifdef CONFIG_IP_VS_DEBUG
+ {
+ .procname = "debug_level",
+ .data = &sysctl_ip_vs_debug_level,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
+ {
+ .procname = "am_droprate",
+ .data = &sysctl_ip_vs_am_droprate,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .procname = "drop_entry",
+ .data = &sysctl_ip_vs_drop_entry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_defense_mode,
+ },
+ {
+ .procname = "drop_packet",
+ .data = &sysctl_ip_vs_drop_packet,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_defense_mode,
+ },
+ {
+ .procname = "secure_tcp",
+ .data = &sysctl_ip_vs_secure_tcp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_defense_mode,
+ },
+#if 0
+ {
+ .procname = "timeout_established",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_synsent",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_synrecv",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_finwait",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_timewait",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_close",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_closewait",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_lastack",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_listen",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_synack",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_udp",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "timeout_icmp",
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+#endif
+ {
+ .procname = "cache_bypass",
+ .data = &sysctl_ip_vs_cache_bypass,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .procname = "expire_nodest_conn",
+ .data = &sysctl_ip_vs_expire_nodest_conn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .procname = "expire_quiescent_template",
+ .data = &sysctl_ip_vs_expire_quiescent_template,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .procname = "sync_threshold",
+ .data = &sysctl_ip_vs_sync_threshold,
+ .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
+ .mode = 0644,
+ .proc_handler = &proc_do_sync_threshold,
+ },
+ {
+ .procname = "nat_icmp_send",
+ .data = &sysctl_ip_vs_nat_icmp_send,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+const struct ctl_path net_vs_ctl_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "ipv4", .ctl_name = NET_IPV4, },
+ { .procname = "vs", },
+ { }
+};
+EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+
+static struct ctl_table_header * sysctl_header;
+
+#ifdef CONFIG_PROC_FS
+
+struct ip_vs_iter {
+ struct list_head *table;
+ int bucket;
+};
+
+/*
+ * Write the contents of the VS rule table to a PROCfs file.
+ * (It is kept just for backward compatibility)
+ */
+static inline const char *ip_vs_fwd_name(unsigned flags)
+{
+ switch (flags & IP_VS_CONN_F_FWD_MASK) {
+ case IP_VS_CONN_F_LOCALNODE:
+ return "Local";
+ case IP_VS_CONN_F_TUNNEL:
+ return "Tunnel";
+ case IP_VS_CONN_F_DROUTE:
+ return "Route";
+ default:
+ return "Masq";
+ }
+}
+
+
+/* Get the Nth entry in the two lists */
+static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
+{
+ struct ip_vs_iter *iter = seq->private;
+ int idx;
+ struct ip_vs_service *svc;
+
+ /* look in hash by protocol */
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ if (pos-- == 0){
+ iter->table = ip_vs_svc_table;
+ iter->bucket = idx;
+ return svc;
+ }
+ }
+ }
+
+ /* keep looking in fwmark */
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ if (pos-- == 0) {
+ iter->table = ip_vs_svc_fwm_table;
+ iter->bucket = idx;
+ return svc;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+__acquires(__ip_vs_svc_lock)
+{
+
+ read_lock_bh(&__ip_vs_svc_lock);
+ return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+
+static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct list_head *e;
+ struct ip_vs_iter *iter;
+ struct ip_vs_service *svc;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ip_vs_info_array(seq,0);
+
+ svc = v;
+ iter = seq->private;
+
+ if (iter->table == ip_vs_svc_table) {
+ /* next service in table hashed by protocol */
+ if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+ return list_entry(e, struct ip_vs_service, s_list);
+
+
+ while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+ list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+ s_list) {
+ return svc;
+ }
+ }
+
+ iter->table = ip_vs_svc_fwm_table;
+ iter->bucket = -1;
+ goto scan_fwmark;
+ }
+
+ /* next service in hashed by fwmark */
+ if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+ return list_entry(e, struct ip_vs_service, f_list);
+
+ scan_fwmark:
+ while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+ f_list)
+ return svc;
+ }
+
+ return NULL;
+}
+
+static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+__releases(__ip_vs_svc_lock)
+{
+ read_unlock_bh(&__ip_vs_svc_lock);
+}
+
+
+static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq,
+ "IP Virtual Server version %d.%d.%d (size=%d)\n",
+ NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+ seq_puts(seq,
+ "Prot LocalAddress:Port Scheduler Flags\n");
+ seq_puts(seq,
+ " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
+ } else {
+ const struct ip_vs_service *svc = v;
+ const struct ip_vs_iter *iter = seq->private;
+ const struct ip_vs_dest *dest;
+
+ if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
+ ip_vs_proto_name(svc->protocol),
+ NIP6(svc->addr.in6),
+ ntohs(svc->port),
+ svc->scheduler->name);
+ else
+#endif
+ seq_printf(seq, "%s %08X:%04X %s ",
+ ip_vs_proto_name(svc->protocol),
+ ntohl(svc->addr.ip),
+ ntohs(svc->port),
+ svc->scheduler->name);
+ } else {
+ seq_printf(seq, "FWM %08X %s ",
+ svc->fwmark, svc->scheduler->name);
+ }
+
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ seq_printf(seq, "persistent %d %08X\n",
+ svc->timeout,
+ ntohl(svc->netmask));
+ else
+ seq_putc(seq, '\n');
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+#ifdef CONFIG_IP_VS_IPV6
+ if (dest->af == AF_INET6)
+ seq_printf(seq,
+ " -> [" NIP6_FMT "]:%04X"
+ " %-7s %-6d %-10d %-10d\n",
+ NIP6(dest->addr.in6),
+ ntohs(dest->port),
+ ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+ atomic_read(&dest->weight),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->inactconns));
+ else
+#endif
+ seq_printf(seq,
+ " -> %08X:%04X "
+ "%-7s %-6d %-10d %-10d\n",
+ ntohl(dest->addr.ip),
+ ntohs(dest->port),
+ ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+ atomic_read(&dest->weight),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->inactconns));
+
+ }
+ }
+ return 0;
+}
+
+static const struct seq_operations ip_vs_info_seq_ops = {
+ .start = ip_vs_info_seq_start,
+ .next = ip_vs_info_seq_next,
+ .stop = ip_vs_info_seq_stop,
+ .show = ip_vs_info_seq_show,
+};
+
+static int ip_vs_info_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &ip_vs_info_seq_ops,
+ sizeof(struct ip_vs_iter));
+}
+
+static const struct file_operations ip_vs_info_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+#endif
+
+struct ip_vs_stats ip_vs_stats = {
+ .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
+};
+
+#ifdef CONFIG_PROC_FS
+static int ip_vs_stats_show(struct seq_file *seq, void *v)
+{
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_puts(seq,
+ " Total Incoming Outgoing Incoming Outgoing\n");
+ seq_printf(seq,
+ " Conns Packets Packets Bytes Bytes\n");
+
+ spin_lock_bh(&ip_vs_stats.lock);
+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+ ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+ (unsigned long long) ip_vs_stats.ustats.inbytes,
+ (unsigned long long) ip_vs_stats.ustats.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_puts(seq,
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq,"%8X %8X %8X %16X %16X\n",
+ ip_vs_stats.ustats.cps,
+ ip_vs_stats.ustats.inpps,
+ ip_vs_stats.ustats.outpps,
+ ip_vs_stats.ustats.inbps,
+ ip_vs_stats.ustats.outbps);
+ spin_unlock_bh(&ip_vs_stats.lock);
+
+ return 0;
+}
+
+static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ip_vs_stats_show, NULL);
+}
+
+static const struct file_operations ip_vs_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_stats_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif
+
+/*
+ * Set timeout values for tcp tcpfin udp in the timeout_table.
+ */
+static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+{
+ IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+ u->tcp_timeout,
+ u->tcp_fin_timeout,
+ u->udp_timeout);
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ if (u->tcp_timeout) {
+ ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+ = u->tcp_timeout * HZ;
+ }
+
+ if (u->tcp_fin_timeout) {
+ ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+ = u->tcp_fin_timeout * HZ;
+ }
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ if (u->udp_timeout) {
+ ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+ = u->udp_timeout * HZ;
+ }
+#endif
+ return 0;
+}
+
+
+#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
+#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
+#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
+ sizeof(struct ip_vs_dest_user))
+#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
+#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
+#define MAX_ARG_LEN SVCDEST_ARG_LEN
+
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+ [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
+ [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
+ [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
+};
+
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+ struct ip_vs_service_user *usvc_compat)
+{
+ usvc->af = AF_INET;
+ usvc->protocol = usvc_compat->protocol;
+ usvc->addr.ip = usvc_compat->addr;
+ usvc->port = usvc_compat->port;
+ usvc->fwmark = usvc_compat->fwmark;
+
+ /* Deep copy of sched_name is not needed here */
+ usvc->sched_name = usvc_compat->sched_name;
+
+ usvc->flags = usvc_compat->flags;
+ usvc->timeout = usvc_compat->timeout;
+ usvc->netmask = usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+ struct ip_vs_dest_user *udest_compat)
+{
+ udest->addr.ip = udest_compat->addr;
+ udest->port = udest_compat->port;
+ udest->conn_flags = udest_compat->conn_flags;
+ udest->weight = udest_compat->weight;
+ udest->u_threshold = udest_compat->u_threshold;
+ udest->l_threshold = udest_compat->l_threshold;
+}
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+ unsigned char arg[MAX_ARG_LEN];
+ struct ip_vs_service_user *usvc_compat;
+ struct ip_vs_service_user_kern usvc;
+ struct ip_vs_service *svc;
+ struct ip_vs_dest_user *udest_compat;
+ struct ip_vs_dest_user_kern udest;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (len != set_arglen[SET_CMDID(cmd)]) {
+ IP_VS_ERR("set_ctl: len %u != %u\n",
+ len, set_arglen[SET_CMDID(cmd)]);
+ return -EINVAL;
+ }
+
+ if (copy_from_user(arg, user, len) != 0)
+ return -EFAULT;
+
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+
+ if (mutex_lock_interruptible(&__ip_vs_mutex)) {
+ ret = -ERESTARTSYS;
+ goto out_dec;
+ }
+
+ if (cmd == IP_VS_SO_SET_FLUSH) {
+ /* Flush the virtual service */
+ ret = ip_vs_flush();
+ goto out_unlock;
+ } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
+ /* Set timeout values for (tcp tcpfin udp) */
+ ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+ goto out_unlock;
+ } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+ struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+ ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+ goto out_unlock;
+ } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
+ struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+ ret = stop_sync_thread(dm->state);
+ goto out_unlock;
+ }
+
+ usvc_compat = (struct ip_vs_service_user *)arg;
+ udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+ /* We only use the new structs internally, so copy userspace compat
+ * structs to extended internal versions */
+ ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+ ip_vs_copy_udest_compat(&udest, udest_compat);
+
+ if (cmd == IP_VS_SO_SET_ZERO) {
+ /* if no service address is set, zero counters in all */
+ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
+ ret = ip_vs_zero_all();
+ goto out_unlock;
+ }
+ }
+
+ /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
+ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+ IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
+ usvc.protocol, NIPQUAD(usvc.addr.ip),
+ ntohs(usvc.port), usvc.sched_name);
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ /* Lookup the exact service by <protocol, addr, port> or fwmark */
+ if (usvc.fwmark == 0)
+ svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
+ else
+ svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+ if (cmd != IP_VS_SO_SET_ADD
+ && (svc == NULL || svc->protocol != usvc.protocol)) {
+ ret = -ESRCH;
+ goto out_unlock;
+ }
+
+ switch (cmd) {
+ case IP_VS_SO_SET_ADD:
+ if (svc != NULL)
+ ret = -EEXIST;
+ else
+ ret = ip_vs_add_service(&usvc, &svc);
+ break;
+ case IP_VS_SO_SET_EDIT:
+ ret = ip_vs_edit_service(svc, &usvc);
+ break;
+ case IP_VS_SO_SET_DEL:
+ ret = ip_vs_del_service(svc);
+ if (!ret)
+ goto out_unlock;
+ break;
+ case IP_VS_SO_SET_ZERO:
+ ret = ip_vs_zero_service(svc);
+ break;
+ case IP_VS_SO_SET_ADDDEST:
+ ret = ip_vs_add_dest(svc, &udest);
+ break;
+ case IP_VS_SO_SET_EDITDEST:
+ ret = ip_vs_edit_dest(svc, &udest);
+ break;
+ case IP_VS_SO_SET_DELDEST:
+ ret = ip_vs_del_dest(svc, &udest);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (svc)
+ ip_vs_service_put(svc);
+
+ out_unlock:
+ mutex_unlock(&__ip_vs_mutex);
+ out_dec:
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+
+ return ret;
+}
+
+
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+ spin_lock_bh(&src->lock);
+ memcpy(dst, &src->ustats, sizeof(*dst));
+ spin_unlock_bh(&src->lock);
+}
+
+static void
+ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
+{
+ dst->protocol = src->protocol;
+ dst->addr = src->addr.ip;
+ dst->port = src->port;
+ dst->fwmark = src->fwmark;
+ strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+ dst->flags = src->flags;
+ dst->timeout = src->timeout / HZ;
+ dst->netmask = src->netmask;
+ dst->num_dests = src->num_dests;
+ ip_vs_copy_stats(&dst->stats, &src->stats);
+}
+
+static inline int
+__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+ struct ip_vs_get_services __user *uptr)
+{
+ int idx, count=0;
+ struct ip_vs_service *svc;
+ struct ip_vs_service_entry entry;
+ int ret = 0;
+
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ /* Only expose IPv4 entries to old interface */
+ if (svc->af != AF_INET)
+ continue;
+
+ if (count >= get->num_services)
+ goto out;
+ memset(&entry, 0, sizeof(entry));
+ ip_vs_copy_service(&entry, svc);
+ if (copy_to_user(&uptr->entrytable[count],
+ &entry, sizeof(entry))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ count++;
+ }
+ }
+
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ /* Only expose IPv4 entries to old interface */
+ if (svc->af != AF_INET)
+ continue;
+
+ if (count >= get->num_services)
+ goto out;
+ memset(&entry, 0, sizeof(entry));
+ ip_vs_copy_service(&entry, svc);
+ if (copy_to_user(&uptr->entrytable[count],
+ &entry, sizeof(entry))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ count++;
+ }
+ }
+ out:
+ return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+ struct ip_vs_get_dests __user *uptr)
+{
+ struct ip_vs_service *svc;
+ union nf_inet_addr addr = { .ip = get->addr };
+ int ret = 0;
+
+ if (get->fwmark)
+ svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
+ else
+ svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+ get->port);
+
+ if (svc) {
+ int count = 0;
+ struct ip_vs_dest *dest;
+ struct ip_vs_dest_entry entry;
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (count >= get->num_dests)
+ break;
+
+ entry.addr = dest->addr.ip;
+ entry.port = dest->port;
+ entry.conn_flags = atomic_read(&dest->conn_flags);
+ entry.weight = atomic_read(&dest->weight);
+ entry.u_threshold = dest->u_threshold;
+ entry.l_threshold = dest->l_threshold;
+ entry.activeconns = atomic_read(&dest->activeconns);
+ entry.inactconns = atomic_read(&dest->inactconns);
+ entry.persistconns = atomic_read(&dest->persistconns);
+ ip_vs_copy_stats(&entry.stats, &dest->stats);
+ if (copy_to_user(&uptr->entrytable[count],
+ &entry, sizeof(entry))) {
+ ret = -EFAULT;
+ break;
+ }
+ count++;
+ }
+ ip_vs_service_put(svc);
+ } else
+ ret = -ESRCH;
+ return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ u->tcp_timeout =
+ ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+ u->tcp_fin_timeout =
+ ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ u->udp_timeout =
+ ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+#endif
+}
+
+
+#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
+#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
+#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
+#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
+#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
+#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
+#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
+
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+ [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
+ [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
+ [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
+ [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
+ [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
+ [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
+ [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
+};
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ unsigned char arg[128];
+ int ret = 0;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (*len < get_arglen[GET_CMDID(cmd)]) {
+ IP_VS_ERR("get_ctl: len %u < %u\n",
+ *len, get_arglen[GET_CMDID(cmd)]);
+ return -EINVAL;
+ }
+
+ if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
+ return -EFAULT;
+
+ if (mutex_lock_interruptible(&__ip_vs_mutex))
+ return -ERESTARTSYS;
+
+ switch (cmd) {
+ case IP_VS_SO_GET_VERSION:
+ {
+ char buf[64];
+
+ sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+ NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+ if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ *len = strlen(buf)+1;
+ }
+ break;
+
+ case IP_VS_SO_GET_INFO:
+ {
+ struct ip_vs_getinfo info;
+ info.version = IP_VS_VERSION_CODE;
+ info.size = IP_VS_CONN_TAB_SIZE;
+ info.num_services = ip_vs_num_services;
+ if (copy_to_user(user, &info, sizeof(info)) != 0)
+ ret = -EFAULT;
+ }
+ break;
+
+ case IP_VS_SO_GET_SERVICES:
+ {
+ struct ip_vs_get_services *get;
+ int size;
+
+ get = (struct ip_vs_get_services *)arg;
+ size = sizeof(*get) +
+ sizeof(struct ip_vs_service_entry) * get->num_services;
+ if (*len != size) {
+ IP_VS_ERR("length: %u != %u\n", *len, size);
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = __ip_vs_get_service_entries(get, user);
+ }
+ break;
+
+ case IP_VS_SO_GET_SERVICE:
+ {
+ struct ip_vs_service_entry *entry;
+ struct ip_vs_service *svc;
+ union nf_inet_addr addr;
+
+ entry = (struct ip_vs_service_entry *)arg;
+ addr.ip = entry->addr;
+ if (entry->fwmark)
+ svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
+ else
+ svc = __ip_vs_service_get(AF_INET, entry->protocol,
+ &addr, entry->port);
+ if (svc) {
+ ip_vs_copy_service(entry, svc);
+ if (copy_to_user(user, entry, sizeof(*entry)) != 0)
+ ret = -EFAULT;
+ ip_vs_service_put(svc);
+ } else
+ ret = -ESRCH;
+ }
+ break;
+
+ case IP_VS_SO_GET_DESTS:
+ {
+ struct ip_vs_get_dests *get;
+ int size;
+
+ get = (struct ip_vs_get_dests *)arg;
+ size = sizeof(*get) +
+ sizeof(struct ip_vs_dest_entry) * get->num_dests;
+ if (*len != size) {
+ IP_VS_ERR("length: %u != %u\n", *len, size);
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = __ip_vs_get_dest_entries(get, user);
+ }
+ break;
+
+ case IP_VS_SO_GET_TIMEOUT:
+ {
+ struct ip_vs_timeout_user t;
+
+ __ip_vs_get_timeouts(&t);
+ if (copy_to_user(user, &t, sizeof(t)) != 0)
+ ret = -EFAULT;
+ }
+ break;
+
+ case IP_VS_SO_GET_DAEMON:
+ {
+ struct ip_vs_daemon_user d[2];
+
+ memset(&d, 0, sizeof(d));
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+ d[0].state = IP_VS_STATE_MASTER;
+ strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
+ d[0].syncid = ip_vs_master_syncid;
+ }
+ if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+ d[1].state = IP_VS_STATE_BACKUP;
+ strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
+ d[1].syncid = ip_vs_backup_syncid;
+ }
+ if (copy_to_user(user, &d, sizeof(d)) != 0)
+ ret = -EFAULT;
+ }
+ break;
+
+ default:
+ ret = -EINVAL;
+ }
+
+ out:
+ mutex_unlock(&__ip_vs_mutex);
+ return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = IP_VS_BASE_CTL,
+ .set_optmax = IP_VS_SO_SET_MAX+1,
+ .set = do_ip_vs_set_ctl,
+ .get_optmin = IP_VS_BASE_CTL,
+ .get_optmax = IP_VS_SO_GET_MAX+1,
+ .get = do_ip_vs_get_ctl,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = IPVS_GENL_NAME,
+ .version = IPVS_GENL_VERSION,
+ .maxattr = IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+ [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
+ [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
+ [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+ [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
+ .len = IP_VS_IFNAME_MAXLEN },
+ [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+ [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
+ .len = sizeof(union nf_inet_addr) },
+ [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
+ .len = IP_VS_SCHEDNAME_MAXLEN },
+ [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
+ .len = sizeof(struct ip_vs_flags) },
+ [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+ [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
+ .len = sizeof(union nf_inet_addr) },
+ [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+ struct ip_vs_stats *stats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ spin_lock_bh(&stats->lock);
+
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+
+ spin_unlock_bh(&stats->lock);
+
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ spin_unlock_bh(&stats->lock);
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+ struct ip_vs_service *svc)
+{
+ struct nlattr *nl_service;
+ struct ip_vs_flags flags = { .flags = svc->flags,
+ .mask = ~0 };
+
+ nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+ if (!nl_service)
+ return -EMSGSIZE;
+
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
+
+ if (svc->fwmark) {
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+ } else {
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+ NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+ }
+
+ NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+ NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nl_service);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_service);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+ struct ip_vs_service *svc,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_SERVICE);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_service(skb, svc) < 0)
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ int idx = 0, i;
+ int start = cb->args[0];
+ struct ip_vs_service *svc;
+
+ mutex_lock(&__ip_vs_mutex);
+ for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+ list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+ }
+
+ for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+ }
+
+nla_put_failure:
+ mutex_unlock(&__ip_vs_mutex);
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+ struct nlattr *nla, int full_entry)
+{
+ struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+ struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+ /* Parse mandatory identifying service fields first */
+ if (nla == NULL ||
+ nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+ return -EINVAL;
+
+ nla_af = attrs[IPVS_SVC_ATTR_AF];
+ nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
+ nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
+ nla_port = attrs[IPVS_SVC_ATTR_PORT];
+ nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
+
+ if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+ return -EINVAL;
+
+ usvc->af = nla_get_u16(nla_af);
+#ifdef CONFIG_IP_VS_IPV6
+ if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+ if (usvc->af != AF_INET)
+#endif
+ return -EAFNOSUPPORT;
+
+ if (nla_fwmark) {
+ usvc->protocol = IPPROTO_TCP;
+ usvc->fwmark = nla_get_u32(nla_fwmark);
+ } else {
+ usvc->protocol = nla_get_u16(nla_protocol);
+ nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+ usvc->port = nla_get_u16(nla_port);
+ usvc->fwmark = 0;
+ }
+
+ /* If a full entry was requested, check for the additional fields */
+ if (full_entry) {
+ struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+ *nla_netmask;
+ struct ip_vs_flags flags;
+ struct ip_vs_service *svc;
+
+ nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+ nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+ nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+ nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+ if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+ return -EINVAL;
+
+ nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+ /* prefill flags from service if it already exists */
+ if (usvc->fwmark)
+ svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
+ else
+ svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+ &usvc->addr, usvc->port);
+ if (svc) {
+ usvc->flags = svc->flags;
+ ip_vs_service_put(svc);
+ } else
+ usvc->flags = 0;
+
+ /* set new flags from userland */
+ usvc->flags = (usvc->flags & ~flags.mask) |
+ (flags.flags & flags.mask);
+ usvc->sched_name = nla_data(nla_sched);
+ usvc->timeout = nla_get_u32(nla_timeout);
+ usvc->netmask = nla_get_u32(nla_netmask);
+ }
+
+ return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+ struct ip_vs_service_user_kern usvc;
+ int ret;
+
+ ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (usvc.fwmark)
+ return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+ else
+ return __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+ struct nlattr *nl_dest;
+
+ nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+ if (!nl_dest)
+ return -EMSGSIZE;
+
+ NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+ NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+ atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+ atomic_read(&dest->activeconns));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+ atomic_read(&dest->inactconns));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+ atomic_read(&dest->persistconns));
+
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nl_dest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_dest);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_DEST);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_dest(skb, dest) < 0)
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ int idx = 0;
+ int start = cb->args[0];
+ struct ip_vs_service *svc;
+ struct ip_vs_dest *dest;
+ struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+ mutex_lock(&__ip_vs_mutex);
+
+ /* Try to find the service for which to dump destinations */
+ if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+ IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+ goto out_err;
+
+ svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+ if (IS_ERR(svc) || svc == NULL)
+ goto out_err;
+
+ /* Dump the destinations */
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+
+nla_put_failure:
+ cb->args[0] = idx;
+ ip_vs_service_put(svc);
+
+out_err:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
+ struct nlattr *nla, int full_entry)
+{
+ struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+ struct nlattr *nla_addr, *nla_port;
+
+ /* Parse mandatory identifying destination fields first */
+ if (nla == NULL ||
+ nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+ return -EINVAL;
+
+ nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
+ nla_port = attrs[IPVS_DEST_ATTR_PORT];
+
+ if (!(nla_addr && nla_port))
+ return -EINVAL;
+
+ nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+ udest->port = nla_get_u16(nla_port);
+
+ /* If a full entry was requested, check for the additional fields */
+ if (full_entry) {
+ struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+ *nla_l_thresh;
+
+ nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
+ nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
+ nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
+ nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
+
+ if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+ return -EINVAL;
+
+ udest->conn_flags = nla_get_u32(nla_fwd)
+ & IP_VS_CONN_F_FWD_MASK;
+ udest->weight = nla_get_u32(nla_weight);
+ udest->u_threshold = nla_get_u32(nla_u_thresh);
+ udest->l_threshold = nla_get_u32(nla_l_thresh);
+ }
+
+ return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+ const char *mcast_ifn, __be32 syncid)
+{
+ struct nlattr *nl_daemon;
+
+ nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+ if (!nl_daemon)
+ return -EMSGSIZE;
+
+ NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+ NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+ NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+ nla_nest_end(skb, nl_daemon);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_daemon);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+ const char *mcast_ifn, __be32 syncid,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_DAEMON);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ mutex_lock(&__ip_vs_mutex);
+ if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+ ip_vs_master_mcast_ifn,
+ ip_vs_master_syncid, cb) < 0)
+ goto nla_put_failure;
+
+ cb->args[0] = 1;
+ }
+
+ if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+ ip_vs_backup_mcast_ifn,
+ ip_vs_backup_syncid, cb) < 0)
+ goto nla_put_failure;
+
+ cb->args[1] = 1;
+ }
+
+nla_put_failure:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+ if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+ attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+ attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+ return -EINVAL;
+
+ return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+ if (!attrs[IPVS_DAEMON_ATTR_STATE])
+ return -EINVAL;
+
+ return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+ struct ip_vs_timeout_user t;
+
+ __ip_vs_get_timeouts(&t);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+ t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+ t.tcp_fin_timeout =
+ nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+ t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+ return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ip_vs_service *svc = NULL;
+ struct ip_vs_service_user_kern usvc;
+ struct ip_vs_dest_user_kern udest;
+ int ret = 0, cmd;
+ int need_full_svc = 0, need_full_dest = 0;
+
+ cmd = info->genlhdr->cmd;
+
+ mutex_lock(&__ip_vs_mutex);
+
+ if (cmd == IPVS_CMD_FLUSH) {
+ ret = ip_vs_flush();
+ goto out;
+ } else if (cmd == IPVS_CMD_SET_CONFIG) {
+ ret = ip_vs_genl_set_config(info->attrs);
+ goto out;
+ } else if (cmd == IPVS_CMD_NEW_DAEMON ||
+ cmd == IPVS_CMD_DEL_DAEMON) {
+
+ struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+ if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+ nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+ info->attrs[IPVS_CMD_ATTR_DAEMON],
+ ip_vs_daemon_policy)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (cmd == IPVS_CMD_NEW_DAEMON)
+ ret = ip_vs_genl_new_daemon(daemon_attrs);
+ else
+ ret = ip_vs_genl_del_daemon(daemon_attrs);
+ goto out;
+ } else if (cmd == IPVS_CMD_ZERO &&
+ !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+ ret = ip_vs_zero_all();
+ goto out;
+ }
+
+ /* All following commands require a service argument, so check if we
+ * received a valid one. We need a full service specification when
+ * adding / editing a service. Only identifying members otherwise. */
+ if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+ need_full_svc = 1;
+
+ ret = ip_vs_genl_parse_service(&usvc,
+ info->attrs[IPVS_CMD_ATTR_SERVICE],
+ need_full_svc);
+ if (ret)
+ goto out;
+
+ /* Lookup the exact service by <protocol, addr, port> or fwmark */
+ if (usvc.fwmark == 0)
+ svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
+ else
+ svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+ /* Unless we're adding a new service, the service must already exist */
+ if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+ ret = -ESRCH;
+ goto out;
+ }
+
+ /* Destination commands require a valid destination argument. For
+ * adding / editing a destination, we need a full destination
+ * specification. */
+ if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+ cmd == IPVS_CMD_DEL_DEST) {
+ if (cmd != IPVS_CMD_DEL_DEST)
+ need_full_dest = 1;
+
+ ret = ip_vs_genl_parse_dest(&udest,
+ info->attrs[IPVS_CMD_ATTR_DEST],
+ need_full_dest);
+ if (ret)
+ goto out;
+ }
+
+ switch (cmd) {
+ case IPVS_CMD_NEW_SERVICE:
+ if (svc == NULL)
+ ret = ip_vs_add_service(&usvc, &svc);
+ else
+ ret = -EEXIST;
+ break;
+ case IPVS_CMD_SET_SERVICE:
+ ret = ip_vs_edit_service(svc, &usvc);
+ break;
+ case IPVS_CMD_DEL_SERVICE:
+ ret = ip_vs_del_service(svc);
+ break;
+ case IPVS_CMD_NEW_DEST:
+ ret = ip_vs_add_dest(svc, &udest);
+ break;
+ case IPVS_CMD_SET_DEST:
+ ret = ip_vs_edit_dest(svc, &udest);
+ break;
+ case IPVS_CMD_DEL_DEST:
+ ret = ip_vs_del_dest(svc, &udest);
+ break;
+ case IPVS_CMD_ZERO:
+ ret = ip_vs_zero_service(svc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+out:
+ if (svc)
+ ip_vs_service_put(svc);
+ mutex_unlock(&__ip_vs_mutex);
+
+ return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *reply;
+ int ret, cmd, reply_cmd;
+
+ cmd = info->genlhdr->cmd;
+
+ if (cmd == IPVS_CMD_GET_SERVICE)
+ reply_cmd = IPVS_CMD_NEW_SERVICE;
+ else if (cmd == IPVS_CMD_GET_INFO)
+ reply_cmd = IPVS_CMD_SET_INFO;
+ else if (cmd == IPVS_CMD_GET_CONFIG)
+ reply_cmd = IPVS_CMD_SET_CONFIG;
+ else {
+ IP_VS_ERR("unknown Generic Netlink command\n");
+ return -EINVAL;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&__ip_vs_mutex);
+
+ reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+ if (reply == NULL)
+ goto nla_put_failure;
+
+ switch (cmd) {
+ case IPVS_CMD_GET_SERVICE:
+ {
+ struct ip_vs_service *svc;
+
+ svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+ if (IS_ERR(svc)) {
+ ret = PTR_ERR(svc);
+ goto out_err;
+ } else if (svc) {
+ ret = ip_vs_genl_fill_service(msg, svc);
+ ip_vs_service_put(svc);
+ if (ret)
+ goto nla_put_failure;
+ } else {
+ ret = -ESRCH;
+ goto out_err;
+ }
+
+ break;
+ }
+
+ case IPVS_CMD_GET_CONFIG:
+ {
+ struct ip_vs_timeout_user t;
+
+ __ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+ t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+ break;
+ }
+
+ case IPVS_CMD_GET_INFO:
+ NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+ NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+ IP_VS_CONN_TAB_SIZE);
+ break;
+ }
+
+ genlmsg_end(msg, reply);
+ ret = genlmsg_unicast(msg, info->snd_pid);
+ goto out;
+
+nla_put_failure:
+ IP_VS_ERR("not enough space in Netlink message\n");
+ ret = -EMSGSIZE;
+
+out_err:
+ nlmsg_free(msg);
+out:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+ {
+ .cmd = IPVS_CMD_NEW_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_SET_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ .dumpit = ip_vs_genl_dump_services,
+ .policy = ip_vs_cmd_policy,
+ },
+ {
+ .cmd = IPVS_CMD_NEW_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_SET_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .dumpit = ip_vs_genl_dump_dests,
+ },
+ {
+ .cmd = IPVS_CMD_NEW_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .dumpit = ip_vs_genl_dump_daemons,
+ },
+ {
+ .cmd = IPVS_CMD_SET_CONFIG,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_CONFIG,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_INFO,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_ZERO,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_FLUSH,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_set_cmd,
+ },
+};
+
+static int __init ip_vs_genl_register(void)
+{
+ int ret, i;
+
+ ret = genl_register_family(&ip_vs_genl_family);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+ ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+ if (ret)
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ genl_unregister_family(&ip_vs_genl_family);
+ return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+ genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
+
+int __init ip_vs_control_init(void)
+{
+ int ret;
+ int idx;
+
+ EnterFunction(2);
+
+ ret = nf_register_sockopt(&ip_vs_sockopts);
+ if (ret) {
+ IP_VS_ERR("cannot register sockopt.\n");
+ return ret;
+ }
+
+ ret = ip_vs_genl_register();
+ if (ret) {
+ IP_VS_ERR("cannot register Generic Netlink interface.\n");
+ nf_unregister_sockopt(&ip_vs_sockopts);
+ return ret;
+ }
+
+ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
+
+ /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+ INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+ }
+ for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+ }
+
+ ip_vs_new_estimator(&ip_vs_stats);
+
+ /* Hook the defense timer */
+ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+
+ LeaveFunction(2);
+ return 0;
+}
+
+
+void ip_vs_control_cleanup(void)
+{
+ EnterFunction(2);
+ ip_vs_trash_cleanup();
+ cancel_rearming_delayed_work(&defense_work);
+ cancel_work_sync(&defense_work.work);
+ ip_vs_kill_estimator(&ip_vs_stats);
+ unregister_sysctl_table(sysctl_header);
+ proc_net_remove(&init_net, "ip_vs_stats");
+ proc_net_remove(&init_net, "ip_vs");
+ ip_vs_genl_unregister();
+ nf_unregister_sockopt(&ip_vs_sockopts);
+ LeaveFunction(2);
+}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
new file mode 100644
index 0000000..a16943f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -0,0 +1,261 @@
+/*
+ * IPVS: Destination Hashing scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@gnuchina.org>
+ *
+ * Inspired by the consistent hashing scheduler patch from
+ * Thomas Proell <proellt@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ * n <- servernode[dest_ip];
+ * if (n is dead) OR
+ * (n is overloaded) OR (n.weight <= 0) then
+ * return NULL;
+ *
+ * return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+ struct ip_vs_dest *dest; /* real server (cache) */
+};
+
+/*
+ * for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS 8
+#endif
+#define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
+
+
+/*
+ * Returns hash value for IPVS DH entry
+ */
+static inline unsigned ip_vs_dh_hashkey(__be32 addr)
+{
+ return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
+}
+
+
+/*
+ * Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
+{
+ return (tbl[ip_vs_dh_hashkey(addr)]).dest;
+}
+
+
+/*
+ * Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+{
+ int i;
+ struct ip_vs_dh_bucket *b;
+ struct list_head *p;
+ struct ip_vs_dest *dest;
+
+ b = tbl;
+ p = &svc->destinations;
+ for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+ if (list_empty(p)) {
+ b->dest = NULL;
+ } else {
+ if (p == &svc->destinations)
+ p = p->next;
+
+ dest = list_entry(p, struct ip_vs_dest, n_list);
+ atomic_inc(&dest->refcnt);
+ b->dest = dest;
+
+ p = p->next;
+ }
+ b++;
+ }
+ return 0;
+}
+
+
+/*
+ * Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+{
+ int i;
+ struct ip_vs_dh_bucket *b;
+
+ b = tbl;
+ for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+ if (b->dest) {
+ atomic_dec(&b->dest->refcnt);
+ b->dest = NULL;
+ }
+ b++;
+ }
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_dh_bucket *tbl;
+
+ /* allocate the DH table for this service */
+ tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
+ GFP_ATOMIC);
+ if (tbl == NULL) {
+ IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+ return -ENOMEM;
+ }
+ svc->sched_data = tbl;
+ IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
+ "current service\n",
+ sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+ /* assign the hash buckets with the updated service */
+ ip_vs_dh_assign(tbl, svc);
+
+ return 0;
+}
+
+
+static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+ /* got to clean up hash buckets here */
+ ip_vs_dh_flush(tbl);
+
+ /* release the table itself */
+ kfree(svc->sched_data);
+ IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
+ sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+ return 0;
+}
+
+
+static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+ /* got to clean up hash buckets here */
+ ip_vs_dh_flush(tbl);
+
+ /* assign the hash buckets with the updated service */
+ ip_vs_dh_assign(tbl, svc);
+
+ return 0;
+}
+
+
+/*
+ * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ * consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+ return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_dh_bucket *tbl;
+ struct iphdr *iph = ip_hdr(skb);
+
+ IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+
+ tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
+ dest = ip_vs_dh_get(tbl, iph->daddr);
+ if (!dest
+ || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+ || atomic_read(&dest->weight) <= 0
+ || is_overloaded(dest)) {
+ return NULL;
+ }
+
+ IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
+ "--> server %u.%u.%u.%u:%d\n",
+ NIPQUAD(iph->daddr),
+ NIPQUAD(dest->addr.ip),
+ ntohs(dest->port));
+
+ return dest;
+}
+
+
+/*
+ * IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+ .name = "dh",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
+ .init_service = ip_vs_dh_init_svc,
+ .done_service = ip_vs_dh_done_svc,
+ .update_service = ip_vs_dh_update_svc,
+ .schedule = ip_vs_dh_schedule,
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
new file mode 100644
index 0000000..2eb2860
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -0,0 +1,166 @@
+/*
+ * ip_vs_est.c: simple rate estimator for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+
+#include <net/ip_vs.h>
+
+/*
+ This code is to estimate rate in a shorter interval (such as 8
+ seconds) for virtual services and real servers. For measure rate in a
+ long interval, it is easy to implement a user level daemon which
+ periodically reads those statistical counters and measure rate.
+
+ Currently, the measurement is activated by slow timer handler. Hope
+ this measurement will not introduce too much load.
+
+ We measure rate during the last 8 seconds every 2 seconds:
+
+ avgrate = avgrate*(1-W) + rate*W
+
+ where W = 2^(-2)
+
+ NOTES.
+
+ * The stored value for average bps is scaled by 2^5, so that maximal
+ rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+
+ * A lot code is taken from net/sched/estimator.c
+ */
+
+
+static void estimation_timer(unsigned long arg);
+
+static LIST_HEAD(est_list);
+static DEFINE_SPINLOCK(est_lock);
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
+
+static void estimation_timer(unsigned long arg)
+{
+ struct ip_vs_estimator *e;
+ struct ip_vs_stats *s;
+ u32 n_conns;
+ u32 n_inpkts, n_outpkts;
+ u64 n_inbytes, n_outbytes;
+ u32 rate;
+
+ spin_lock(&est_lock);
+ list_for_each_entry(e, &est_list, list) {
+ s = container_of(e, struct ip_vs_stats, est);
+
+ spin_lock(&s->lock);
+ n_conns = s->ustats.conns;
+ n_inpkts = s->ustats.inpkts;
+ n_outpkts = s->ustats.outpkts;
+ n_inbytes = s->ustats.inbytes;
+ n_outbytes = s->ustats.outbytes;
+
+ /* scaled by 2^10, but divided 2 seconds */
+ rate = (n_conns - e->last_conns)<<9;
+ e->last_conns = n_conns;
+ e->cps += ((long)rate - (long)e->cps)>>2;
+ s->ustats.cps = (e->cps+0x1FF)>>10;
+
+ rate = (n_inpkts - e->last_inpkts)<<9;
+ e->last_inpkts = n_inpkts;
+ e->inpps += ((long)rate - (long)e->inpps)>>2;
+ s->ustats.inpps = (e->inpps+0x1FF)>>10;
+
+ rate = (n_outpkts - e->last_outpkts)<<9;
+ e->last_outpkts = n_outpkts;
+ e->outpps += ((long)rate - (long)e->outpps)>>2;
+ s->ustats.outpps = (e->outpps+0x1FF)>>10;
+
+ rate = (n_inbytes - e->last_inbytes)<<4;
+ e->last_inbytes = n_inbytes;
+ e->inbps += ((long)rate - (long)e->inbps)>>2;
+ s->ustats.inbps = (e->inbps+0xF)>>5;
+
+ rate = (n_outbytes - e->last_outbytes)<<4;
+ e->last_outbytes = n_outbytes;
+ e->outbps += ((long)rate - (long)e->outbps)>>2;
+ s->ustats.outbps = (e->outbps+0xF)>>5;
+ spin_unlock(&s->lock);
+ }
+ spin_unlock(&est_lock);
+ mod_timer(&est_timer, jiffies + 2*HZ);
+}
+
+void ip_vs_new_estimator(struct ip_vs_stats *stats)
+{
+ struct ip_vs_estimator *est = &stats->est;
+
+ INIT_LIST_HEAD(&est->list);
+
+ est->last_conns = stats->ustats.conns;
+ est->cps = stats->ustats.cps<<10;
+
+ est->last_inpkts = stats->ustats.inpkts;
+ est->inpps = stats->ustats.inpps<<10;
+
+ est->last_outpkts = stats->ustats.outpkts;
+ est->outpps = stats->ustats.outpps<<10;
+
+ est->last_inbytes = stats->ustats.inbytes;
+ est->inbps = stats->ustats.inbps<<5;
+
+ est->last_outbytes = stats->ustats.outbytes;
+ est->outbps = stats->ustats.outbps<<5;
+
+ spin_lock_bh(&est_lock);
+ list_add(&est->list, &est_list);
+ spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+{
+ struct ip_vs_estimator *est = &stats->est;
+
+ spin_lock_bh(&est_lock);
+ list_del(&est->list);
+ spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+ struct ip_vs_estimator *est = &stats->est;
+
+ /* set counters zero, caller must hold the stats->lock lock */
+ est->last_inbytes = 0;
+ est->last_outbytes = 0;
+ est->last_conns = 0;
+ est->last_inpkts = 0;
+ est->last_outpkts = 0;
+ est->cps = 0;
+ est->inpps = 0;
+ est->outpps = 0;
+ est->inbps = 0;
+ est->outbps = 0;
+}
+
+int __init ip_vs_estimator_init(void)
+{
+ mod_timer(&est_timer, jiffies + 2 * HZ);
+ return 0;
+}
+
+void ip_vs_estimator_cleanup(void)
+{
+ del_timer_sync(&est_timer);
+}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
new file mode 100644
index 0000000..2e7dbd8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -0,0 +1,410 @@
+/*
+ * ip_vs_ftp.c: IPVS ftp application module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ * IP_MASQ_FTP ftp masquerading module
+ *
+ * Version: @(#)ip_masq_ftp.c 0.04 02/05/96
+ *
+ * Author: Wouter Gadeyne
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/unaligned.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING "227 Entering Passive Mode ("
+#define CLIENT_STRING "PORT "
+
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, ushort, NULL, 0);
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
+
+
+/* Dummy variable */
+static int ip_vs_ftp_pasv;
+
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+ return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+ return 0;
+}
+
+
+/*
+ * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern" and terminated with the "term" character.
+ * <addr,port> is in network order.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+ const char *pattern, size_t plen, char term,
+ __be32 *addr, __be16 *port,
+ char **start, char **end)
+{
+ unsigned char p[6];
+ int i = 0;
+
+ if (data_limit - data < plen) {
+ /* check if there is partial match */
+ if (strnicmp(data, pattern, data_limit - data) == 0)
+ return -1;
+ else
+ return 0;
+ }
+
+ if (strnicmp(data, pattern, plen) != 0) {
+ return 0;
+ }
+ *start = data + plen;
+
+ for (data = *start; *data != term; data++) {
+ if (data == data_limit)
+ return -1;
+ }
+ *end = data;
+
+ memset(p, 0, sizeof(p));
+ for (data = *start; data != *end; data++) {
+ if (*data >= '0' && *data <= '9') {
+ p[i] = p[i]*10 + *data - '0';
+ } else if (*data == ',' && i < 5) {
+ i++;
+ } else {
+ /* unexpected character */
+ return -1;
+ }
+ }
+
+ if (i != 5)
+ return -1;
+
+ *addr = get_unaligned((__be32 *)p);
+ *port = get_unaligned((__be16 *)(p + 4));
+ return 1;
+}
+
+
+/*
+ * Look at outgoing ftp packets to catch the response to a PASV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port. Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
+ struct sk_buff *skb, int *diff)
+{
+ struct iphdr *iph;
+ struct tcphdr *th;
+ char *data, *data_limit;
+ char *start, *end;
+ union nf_inet_addr from;
+ __be16 port;
+ struct ip_vs_conn *n_cp;
+ char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+ unsigned buf_len;
+ int ret;
+
+#ifdef CONFIG_IP_VS_IPV6
+ /* This application helper doesn't work with IPv6 yet,
+ * so turn this into a no-op for IPv6 packets
+ */
+ if (cp->af == AF_INET6)
+ return 1;
+#endif
+
+ *diff = 0;
+
+ /* Only useful for established sessions */
+ if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+ return 1;
+
+ /* Linear packets are much easier to deal with. */
+ if (!skb_make_writable(skb, skb->len))
+ return 0;
+
+ if (cp->app_data == &ip_vs_ftp_pasv) {
+ iph = ip_hdr(skb);
+ th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+ data = (char *)th + (th->doff << 2);
+ data_limit = skb_tail_pointer(skb);
+
+ if (ip_vs_ftp_get_addrport(data, data_limit,
+ SERVER_STRING,
+ sizeof(SERVER_STRING)-1, ')',
+ &from.ip, &port,
+ &start, &end) != 1)
+ return 1;
+
+ IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
+ "%u.%u.%u.%u:%d detected\n",
+ NIPQUAD(from.ip), ntohs(port),
+ NIPQUAD(cp->caddr.ip), 0);
+
+ /*
+ * Now update or create an connection entry for it
+ */
+ n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+ &cp->caddr, 0);
+ if (!n_cp) {
+ n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+ &cp->caddr, 0,
+ &cp->vaddr, port,
+ &from, port,
+ IP_VS_CONN_F_NO_CPORT,
+ cp->dest);
+ if (!n_cp)
+ return 0;
+
+ /* add its controller */
+ ip_vs_control_add(n_cp, cp);
+ }
+
+ /*
+ * Replace the old passive address with the new one
+ */
+ from.ip = n_cp->vaddr.ip;
+ port = n_cp->vport;
+ sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+ (ntohs(port)>>8)&255, ntohs(port)&255);
+ buf_len = strlen(buf);
+
+ /*
+ * Calculate required delta-offset to keep TCP happy
+ */
+ *diff = buf_len - (end-start);
+
+ if (*diff == 0) {
+ /* simply replace it with new passive address */
+ memcpy(start, buf, buf_len);
+ ret = 1;
+ } else {
+ ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
+ end-start, buf, buf_len);
+ }
+
+ cp->app_data = NULL;
+ ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_conn_put(n_cp);
+ return ret;
+ }
+ return 1;
+}
+
+
+/*
+ * Look at incoming ftp packets to catch the PASV/PORT command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ * "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
+ struct sk_buff *skb, int *diff)
+{
+ struct iphdr *iph;
+ struct tcphdr *th;
+ char *data, *data_start, *data_limit;
+ char *start, *end;
+ union nf_inet_addr to;
+ __be16 port;
+ struct ip_vs_conn *n_cp;
+
+#ifdef CONFIG_IP_VS_IPV6
+ /* This application helper doesn't work with IPv6 yet,
+ * so turn this into a no-op for IPv6 packets
+ */
+ if (cp->af == AF_INET6)
+ return 1;
+#endif
+
+ /* no diff required for incoming packets */
+ *diff = 0;
+
+ /* Only useful for established sessions */
+ if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+ return 1;
+
+ /* Linear packets are much easier to deal with. */
+ if (!skb_make_writable(skb, skb->len))
+ return 0;
+
+ /*
+ * Detecting whether it is passive
+ */
+ iph = ip_hdr(skb);
+ th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+ /* Since there may be OPTIONS in the TCP packet and the HLEN is
+ the length of the header in 32-bit multiples, it is accurate
+ to calculate data address by th+HLEN*4 */
+ data = data_start = (char *)th + (th->doff << 2);
+ data_limit = skb_tail_pointer(skb);
+
+ while (data <= data_limit - 6) {
+ if (strnicmp(data, "PASV\r\n", 6) == 0) {
+ /* Passive mode on */
+ IP_VS_DBG(7, "got PASV at %td of %td\n",
+ data - data_start,
+ data_limit - data_start);
+ cp->app_data = &ip_vs_ftp_pasv;
+ return 1;
+ }
+ data++;
+ }
+
+ /*
+ * To support virtual FTP server, the scenerio is as follows:
+ * FTP client ----> Load Balancer ----> FTP server
+ * First detect the port number in the application data,
+ * then create a new connection entry for the coming data
+ * connection.
+ */
+ if (ip_vs_ftp_get_addrport(data_start, data_limit,
+ CLIENT_STRING, sizeof(CLIENT_STRING)-1,
+ '\r', &to.ip, &port,
+ &start, &end) != 1)
+ return 1;
+
+ IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
+ NIPQUAD(to.ip), ntohs(port));
+
+ /* Passive mode off */
+ cp->app_data = NULL;
+
+ /*
+ * Now update or create a connection entry for it
+ */
+ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+ ip_vs_proto_name(iph->protocol),
+ NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
+
+ n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+ &to, port,
+ &cp->vaddr, htons(ntohs(cp->vport)-1));
+ if (!n_cp) {
+ n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+ &to, port,
+ &cp->vaddr, htons(ntohs(cp->vport)-1),
+ &cp->daddr, htons(ntohs(cp->dport)-1),
+ 0,
+ cp->dest);
+ if (!n_cp)
+ return 0;
+
+ /* add its controller */
+ ip_vs_control_add(n_cp, cp);
+ }
+
+ /*
+ * Move tunnel to listen state
+ */
+ ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_conn_put(n_cp);
+
+ return 1;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+ .name = "ftp",
+ .type = IP_VS_APP_TYPE_FTP,
+ .protocol = IPPROTO_TCP,
+ .module = THIS_MODULE,
+ .incs_list = LIST_HEAD_INIT(ip_vs_ftp.incs_list),
+ .init_conn = ip_vs_ftp_init_conn,
+ .done_conn = ip_vs_ftp_done_conn,
+ .bind_conn = NULL,
+ .unbind_conn = NULL,
+ .pkt_out = ip_vs_ftp_out,
+ .pkt_in = ip_vs_ftp_in,
+};
+
+
+/*
+ * ip_vs_ftp initialization
+ */
+static int __init ip_vs_ftp_init(void)
+{
+ int i, ret;
+ struct ip_vs_app *app = &ip_vs_ftp;
+
+ ret = register_ip_vs_app(app);
+ if (ret)
+ return ret;
+
+ for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+ if (!ports[i])
+ continue;
+ ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+ if (ret)
+ break;
+ IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
+ app->name, i, ports[i]);
+ }
+
+ if (ret)
+ unregister_ip_vs_app(app);
+
+ return ret;
+}
+
+
+/*
+ * ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+ unregister_ip_vs_app(&ip_vs_ftp);
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
new file mode 100644
index 0000000..6ecef35
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -0,0 +1,555 @@
+/*
+ * IPVS: Locality-Based Least-Connection scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@gnuchina.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Martin Hamilton : fixed the terrible locking bugs
+ * *lock(tbl->lock) ==> *lock(&tbl->lock)
+ * Wensong Zhang : fixed the uninitilized tbl->lock bug
+ * Wensong Zhang : added doing full expiration check to
+ * collect stale entries of 24+ hours when
+ * no partial expire check in a half hour
+ * Julian Anastasov : replaced del_timer call with del_timer_sync
+ * to avoid the possible race between timer
+ * handler and del_timer thread in SMP
+ *
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ * if cachenode[dest_ip] is null then
+ * n, cachenode[dest_ip] <- {weighted least-conn node};
+ * else
+ * n <- cachenode[dest_ip];
+ * if (n is dead) OR
+ * (n.conns>n.weight AND
+ * there is a node m with m.conns<m.weight/2) then
+ * n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ * return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * It is for garbage collection of stale IPVS lblc entries,
+ * when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL (60*HZ)
+#define ENTRY_TIMEOUT (6*60*HZ)
+
+/*
+ * It is for full expiration check.
+ * When there is no partial expiration check (garbage collection)
+ * in a half hour, do a full expiration check to collect stale
+ * entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION 30
+static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
+
+
+/*
+ * for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS 10
+#endif
+#define IP_VS_LBLC_TAB_BITS CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ * IPVS lblc entry represents an association between destination
+ * IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+ struct list_head list;
+ __be32 addr; /* destination IP address */
+ struct ip_vs_dest *dest; /* real server (cache) */
+ unsigned long lastuse; /* last used time */
+};
+
+
+/*
+ * IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+ struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
+ atomic_t entries; /* number of entries */
+ int max_size; /* maximum size of entries */
+ struct timer_list periodic_timer; /* collect stale entries */
+ int rover; /* rover for expire check */
+ int counter; /* counter for no expire */
+};
+
+
+/*
+ * IPVS LBLC sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+ {
+ .procname = "lblc_expiration",
+ .data = &sysctl_ip_vs_lblc_expiration,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ { .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+{
+ list_del(&en->list);
+ /*
+ * We don't kfree dest because it is refered either by its service
+ * or the trash dest list.
+ */
+ atomic_dec(&en->dest->refcnt);
+ kfree(en);
+}
+
+
+/*
+ * Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
+{
+ return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+}
+
+
+/*
+ * Hash an entry in the ip_vs_lblc_table.
+ * returns bool success.
+ */
+static void
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+ unsigned hash = ip_vs_lblc_hashkey(en->addr);
+
+ list_add(&en->list, &tbl->bucket[hash]);
+ atomic_inc(&tbl->entries);
+}
+
+
+/*
+ * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
+ * lock
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
+{
+ unsigned hash = ip_vs_lblc_hashkey(addr);
+ struct ip_vs_lblc_entry *en;
+
+ list_for_each_entry(en, &tbl->bucket[hash], list)
+ if (en->addr == addr)
+ return en;
+
+ return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblc_entry *en;
+
+ en = ip_vs_lblc_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+ return NULL;
+ }
+
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+
+ ip_vs_lblc_hash(tbl, en);
+ } else if (en->dest != dest) {
+ atomic_dec(&en->dest->refcnt);
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+ }
+
+ return en;
+}
+
+
+/*
+ * Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+{
+ struct ip_vs_lblc_entry *en, *nxt;
+ int i;
+
+ for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ ip_vs_lblc_free(en);
+ atomic_dec(&tbl->entries);
+ }
+ }
+}
+
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
+{
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_entry *en, *nxt;
+ unsigned long now = jiffies;
+ int i, j;
+
+ for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+ write_lock(&svc->sched_lock);
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ if (time_before(now,
+ en->lastuse + sysctl_ip_vs_lblc_expiration))
+ continue;
+
+ ip_vs_lblc_free(en);
+ atomic_dec(&tbl->entries);
+ }
+ write_unlock(&svc->sched_lock);
+ }
+ tbl->rover = j;
+}
+
+
+/*
+ * Periodical timer handler for IPVS lblc table
+ * It is used to collect stale entries when the number of entries
+ * exceeds the maximum size of the table.
+ *
+ * Fixme: we probably need more complicated algorithm to collect
+ * entries that have not been used for a long time even
+ * if the number of entries doesn't exceed the maximum size
+ * of the table.
+ * The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(unsigned long data)
+{
+ struct ip_vs_service *svc = (struct ip_vs_service *) data;
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ unsigned long now = jiffies;
+ int goal;
+ int i, j;
+ struct ip_vs_lblc_entry *en, *nxt;
+
+ if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+ /* do full expiration check */
+ ip_vs_lblc_full_check(svc);
+ tbl->counter = 1;
+ goto out;
+ }
+
+ if (atomic_read(&tbl->entries) <= tbl->max_size) {
+ tbl->counter++;
+ goto out;
+ }
+
+ goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+ if (goal > tbl->max_size/2)
+ goal = tbl->max_size/2;
+
+ for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+ write_lock(&svc->sched_lock);
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
+ continue;
+
+ ip_vs_lblc_free(en);
+ atomic_dec(&tbl->entries);
+ goal--;
+ }
+ write_unlock(&svc->sched_lock);
+ if (goal <= 0)
+ break;
+ }
+ tbl->rover = j;
+
+ out:
+ mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+ int i;
+ struct ip_vs_lblc_table *tbl;
+
+ /*
+ * Allocate the ip_vs_lblc_table for this service
+ */
+ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+ if (tbl == NULL) {
+ IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+ return -ENOMEM;
+ }
+ svc->sched_data = tbl;
+ IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
+ "current service\n", sizeof(*tbl));
+
+ /*
+ * Initialize the hash buckets
+ */
+ for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ INIT_LIST_HEAD(&tbl->bucket[i]);
+ }
+ tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+ tbl->rover = 0;
+ tbl->counter = 1;
+
+ /*
+ * Hook periodic timer for garbage collection
+ */
+ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
+ (unsigned long)svc);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+ return 0;
+}
+
+
+static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+ /* remove periodic timer */
+ del_timer_sync(&tbl->periodic_timer);
+
+ /* got to clean up table entries here */
+ ip_vs_lblc_flush(tbl);
+
+ /* release the table itself */
+ kfree(tbl);
+ IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
+ sizeof(*tbl));
+
+ return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+ struct ip_vs_dest *dest, *least;
+ int loh, doh;
+
+ /*
+ * We think the overhead of processing active connections is fifty
+ * times higher than that of inactive connections in average. (This
+ * fifty times might not be accurate, we will change it later.) We
+ * use the following formula to estimate the overhead:
+ * dest->activeconns*50 + dest->inactconns
+ * and the load:
+ * (dest overhead) / dest->weight
+ *
+ * Remember -- no floats in kernel mode!!!
+ * The comparison of h1*w2 > h2*w1 is equivalent to that of
+ * h1/w1 > h2/w2
+ * if every weight is larger than zero.
+ *
+ * The server with weight=0 is quiesced and will not receive any
+ * new connection.
+ */
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+ if (atomic_read(&dest->weight) > 0) {
+ least = dest;
+ loh = atomic_read(&least->activeconns) * 50
+ + atomic_read(&least->inactconns);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /*
+ * Find the destination with the least load.
+ */
+ nextstage:
+ list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ doh = atomic_read(&dest->activeconns) * 50
+ + atomic_read(&dest->inactconns);
+ if (loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight)) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ NIPQUAD(least->addr.ip), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+
+ return least;
+}
+
+
+/*
+ * If this destination server is overloaded and there is a less loaded
+ * server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+ if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+ struct ip_vs_dest *d;
+
+ list_for_each_entry(d, &svc->destinations, n_list) {
+ if (atomic_read(&d->activeconns)*2
+ < atomic_read(&d->weight)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_dest *dest = NULL;
+ struct ip_vs_lblc_entry *en;
+
+ IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+
+ /* First look in our cache */
+ read_lock(&svc->sched_lock);
+ en = ip_vs_lblc_get(tbl, iph->daddr);
+ if (en) {
+ /* We only hold a read lock, but this is atomic */
+ en->lastuse = jiffies;
+
+ /*
+ * If the destination is not available, i.e. it's in the trash,
+ * we must ignore it, as it may be removed from under our feet,
+ * if someone drops our reference count. Our caller only makes
+ * sure that destinations, that are not in the trash, are not
+ * moved to the trash, while we are scheduling. But anyone can
+ * free up entries from the trash at any time.
+ */
+
+ if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
+ dest = en->dest;
+ }
+ read_unlock(&svc->sched_lock);
+
+ /* If the destination has a weight and is not overloaded, use it */
+ if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+ goto out;
+
+ /* No cache entry or it is invalid, time to schedule */
+ dest = __ip_vs_lblc_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
+ }
+
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&svc->sched_lock);
+ ip_vs_lblc_new(tbl, iph->daddr, dest);
+ write_unlock(&svc->sched_lock);
+
+out:
+ IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
+ "--> server %u.%u.%u.%u:%d\n",
+ NIPQUAD(iph->daddr),
+ NIPQUAD(dest->addr.ip),
+ ntohs(dest->port));
+
+ return dest;
+}
+
+
+/*
+ * IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler =
+{
+ .name = "lblc",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
+ .init_service = ip_vs_lblc_init_svc,
+ .done_service = ip_vs_lblc_done_svc,
+ .schedule = ip_vs_lblc_schedule,
+};
+
+
+static int __init ip_vs_lblc_init(void)
+{
+ int ret;
+
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+ if (ret)
+ unregister_sysctl_table(sysctl_header);
+ return ret;
+}
+
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+ unregister_sysctl_table(sysctl_header);
+ unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
new file mode 100644
index 0000000..1f75ea8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -0,0 +1,755 @@
+/*
+ * IPVS: Locality-Based Least-Connection with Replication scheduler
+ *
+ * Authors: Wensong Zhang <wensong@gnuchina.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Julian Anastasov : Added the missing (dest->weight>0)
+ * condition in the ip_vs_dest_set_max.
+ *
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ * if serverSet[dest_ip] is null then
+ * n, serverSet[dest_ip] <- {weighted least-conn node};
+ * else
+ * n <- {least-conn (alive) node in serverSet[dest_ip]};
+ * if (n is null) OR
+ * (n.conns>n.weight AND
+ * there is a node m with m.conns<m.weight/2) then
+ * n <- {weighted least-conn node};
+ * add n to serverSet[dest_ip];
+ * if |serverSet[dest_ip]| > 1 AND
+ * now - serverSet[dest_ip].lastMod > T then
+ * m <- {most conn node in serverSet[dest_ip]};
+ * remove m from serverSet[dest_ip];
+ * if serverSet[dest_ip] changed then
+ * serverSet[dest_ip].lastMod <- now;
+ *
+ * return n;
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * It is for garbage collection of stale IPVS lblcr entries,
+ * when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL (60*HZ)
+#define ENTRY_TIMEOUT (6*60*HZ)
+
+/*
+ * It is for full expiration check.
+ * When there is no partial expiration check (garbage collection)
+ * in a half hour, do a full expiration check to collect stale
+ * entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION 30
+static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
+
+
+/*
+ * for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS 10
+#endif
+#define IP_VS_LBLCR_TAB_BITS CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ * IPVS destination set structure and operations
+ */
+struct ip_vs_dest_list {
+ struct ip_vs_dest_list *next; /* list link */
+ struct ip_vs_dest *dest; /* destination server */
+};
+
+struct ip_vs_dest_set {
+ atomic_t size; /* set size */
+ unsigned long lastmod; /* last modified time */
+ struct ip_vs_dest_list *list; /* destination list */
+ rwlock_t lock; /* lock for this list */
+};
+
+
+static struct ip_vs_dest_list *
+ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+ struct ip_vs_dest_list *e;
+
+ for (e=set->list; e!=NULL; e=e->next) {
+ if (e->dest == dest)
+ /* already existed */
+ return NULL;
+ }
+
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
+ if (e == NULL) {
+ IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+ return NULL;
+ }
+
+ atomic_inc(&dest->refcnt);
+ e->dest = dest;
+
+ /* link it to the list */
+ e->next = set->list;
+ set->list = e;
+ atomic_inc(&set->size);
+
+ set->lastmod = jiffies;
+ return e;
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+ struct ip_vs_dest_list *e, **ep;
+
+ for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+ if (e->dest == dest) {
+ /* HIT */
+ *ep = e->next;
+ atomic_dec(&set->size);
+ set->lastmod = jiffies;
+ atomic_dec(&e->dest->refcnt);
+ kfree(e);
+ break;
+ }
+ ep = &e->next;
+ }
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+ struct ip_vs_dest_list *e, **ep;
+
+ write_lock(&set->lock);
+ for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+ *ep = e->next;
+ /*
+ * We don't kfree dest because it is refered either
+ * by its service or by the trash dest list.
+ */
+ atomic_dec(&e->dest->refcnt);
+ kfree(e);
+ }
+ write_unlock(&set->lock);
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+ register struct ip_vs_dest_list *e;
+ struct ip_vs_dest *dest, *least;
+ int loh, doh;
+
+ if (set == NULL)
+ return NULL;
+
+ /* select the first destination server, whose weight > 0 */
+ for (e=set->list; e!=NULL; e=e->next) {
+ least = e->dest;
+ if (least->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ if ((atomic_read(&least->weight) > 0)
+ && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+ loh = atomic_read(&least->activeconns) * 50
+ + atomic_read(&least->inactconns);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /* find the destination with the weighted least load */
+ nextstage:
+ for (e=e->next; e!=NULL; e=e->next) {
+ dest = e->dest;
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ doh = atomic_read(&dest->activeconns) * 50
+ + atomic_read(&dest->inactconns);
+ if ((loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight))
+ && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ NIPQUAD(least->addr.ip), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+ return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+ register struct ip_vs_dest_list *e;
+ struct ip_vs_dest *dest, *most;
+ int moh, doh;
+
+ if (set == NULL)
+ return NULL;
+
+ /* select the first destination server, whose weight > 0 */
+ for (e=set->list; e!=NULL; e=e->next) {
+ most = e->dest;
+ if (atomic_read(&most->weight) > 0) {
+ moh = atomic_read(&most->activeconns) * 50
+ + atomic_read(&most->inactconns);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /* find the destination with the weighted most load */
+ nextstage:
+ for (e=e->next; e!=NULL; e=e->next) {
+ dest = e->dest;
+ doh = atomic_read(&dest->activeconns) * 50
+ + atomic_read(&dest->inactconns);
+ /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+ if ((moh * atomic_read(&dest->weight) <
+ doh * atomic_read(&most->weight))
+ && (atomic_read(&dest->weight) > 0)) {
+ most = dest;
+ moh = doh;
+ }
+ }
+
+ IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ NIPQUAD(most->addr.ip), ntohs(most->port),
+ atomic_read(&most->activeconns),
+ atomic_read(&most->refcnt),
+ atomic_read(&most->weight), moh);
+ return most;
+}
+
+
+/*
+ * IPVS lblcr entry represents an association between destination
+ * IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+ struct list_head list;
+ __be32 addr; /* destination IP address */
+ struct ip_vs_dest_set set; /* destination server set */
+ unsigned long lastuse; /* last used time */
+};
+
+
+/*
+ * IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+ struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
+ atomic_t entries; /* number of entries */
+ int max_size; /* maximum size of entries */
+ struct timer_list periodic_timer; /* collect stale entries */
+ int rover; /* rover for expire check */
+ int counter; /* counter for no expire */
+};
+
+
+/*
+ * IPVS LBLCR sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+ {
+ .procname = "lblcr_expiration",
+ .data = &sysctl_ip_vs_lblcr_expiration,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ { .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+ list_del(&en->list);
+ ip_vs_dest_set_eraseall(&en->set);
+ kfree(en);
+}
+
+
+/*
+ * Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
+{
+ return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+}
+
+
+/*
+ * Hash an entry in the ip_vs_lblcr_table.
+ * returns bool success.
+ */
+static void
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+ unsigned hash = ip_vs_lblcr_hashkey(en->addr);
+
+ list_add(&en->list, &tbl->bucket[hash]);
+ atomic_inc(&tbl->entries);
+}
+
+
+/*
+ * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ * read lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
+{
+ unsigned hash = ip_vs_lblcr_hashkey(addr);
+ struct ip_vs_lblcr_entry *en;
+
+ list_for_each_entry(en, &tbl->bucket[hash], list)
+ if (en->addr == addr)
+ return en;
+
+ return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblcr_entry *en;
+
+ en = ip_vs_lblcr_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+ return NULL;
+ }
+
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ /* initilize its dest set */
+ atomic_set(&(en->set.size), 0);
+ en->set.list = NULL;
+ rwlock_init(&en->set.lock);
+
+ ip_vs_lblcr_hash(tbl, en);
+ }
+
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
+
+ return en;
+}
+
+
+/*
+ * Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+{
+ int i;
+ struct ip_vs_lblcr_entry *en, *nxt;
+
+ /* No locking required, only called during cleanup. */
+ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ ip_vs_lblcr_free(en);
+ }
+ }
+}
+
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
+{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
+ unsigned long now = jiffies;
+ int i, j;
+ struct ip_vs_lblcr_entry *en, *nxt;
+
+ for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+ write_lock(&svc->sched_lock);
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
+ now))
+ continue;
+
+ ip_vs_lblcr_free(en);
+ atomic_dec(&tbl->entries);
+ }
+ write_unlock(&svc->sched_lock);
+ }
+ tbl->rover = j;
+}
+
+
+/*
+ * Periodical timer handler for IPVS lblcr table
+ * It is used to collect stale entries when the number of entries
+ * exceeds the maximum size of the table.
+ *
+ * Fixme: we probably need more complicated algorithm to collect
+ * entries that have not been used for a long time even
+ * if the number of entries doesn't exceed the maximum size
+ * of the table.
+ * The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(unsigned long data)
+{
+ struct ip_vs_service *svc = (struct ip_vs_service *) data;
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
+ unsigned long now = jiffies;
+ int goal;
+ int i, j;
+ struct ip_vs_lblcr_entry *en, *nxt;
+
+ if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+ /* do full expiration check */
+ ip_vs_lblcr_full_check(svc);
+ tbl->counter = 1;
+ goto out;
+ }
+
+ if (atomic_read(&tbl->entries) <= tbl->max_size) {
+ tbl->counter++;
+ goto out;
+ }
+
+ goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+ if (goal > tbl->max_size/2)
+ goal = tbl->max_size/2;
+
+ for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+ write_lock(&svc->sched_lock);
+ list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
+ continue;
+
+ ip_vs_lblcr_free(en);
+ atomic_dec(&tbl->entries);
+ goal--;
+ }
+ write_unlock(&svc->sched_lock);
+ if (goal <= 0)
+ break;
+ }
+ tbl->rover = j;
+
+ out:
+ mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+ int i;
+ struct ip_vs_lblcr_table *tbl;
+
+ /*
+ * Allocate the ip_vs_lblcr_table for this service
+ */
+ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+ if (tbl == NULL) {
+ IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+ return -ENOMEM;
+ }
+ svc->sched_data = tbl;
+ IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
+ "current service\n", sizeof(*tbl));
+
+ /*
+ * Initialize the hash buckets
+ */
+ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ INIT_LIST_HEAD(&tbl->bucket[i]);
+ }
+ tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+ tbl->rover = 0;
+ tbl->counter = 1;
+
+ /*
+ * Hook periodic timer for garbage collection
+ */
+ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
+ (unsigned long)svc);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+ return 0;
+}
+
+
+static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+ /* remove periodic timer */
+ del_timer_sync(&tbl->periodic_timer);
+
+ /* got to clean up table entries here */
+ ip_vs_lblcr_flush(tbl);
+
+ /* release the table itself */
+ kfree(tbl);
+ IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
+ sizeof(*tbl));
+
+ return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+ struct ip_vs_dest *dest, *least;
+ int loh, doh;
+
+ /*
+ * We think the overhead of processing active connections is fifty
+ * times higher than that of inactive connections in average. (This
+ * fifty times might not be accurate, we will change it later.) We
+ * use the following formula to estimate the overhead:
+ * dest->activeconns*50 + dest->inactconns
+ * and the load:
+ * (dest overhead) / dest->weight
+ *
+ * Remember -- no floats in kernel mode!!!
+ * The comparison of h1*w2 > h2*w1 is equivalent to that of
+ * h1/w1 > h2/w2
+ * if every weight is larger than zero.
+ *
+ * The server with weight=0 is quiesced and will not receive any
+ * new connection.
+ */
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ if (atomic_read(&dest->weight) > 0) {
+ least = dest;
+ loh = atomic_read(&least->activeconns) * 50
+ + atomic_read(&least->inactconns);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /*
+ * Find the destination with the least load.
+ */
+ nextstage:
+ list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ doh = atomic_read(&dest->activeconns) * 50
+ + atomic_read(&dest->inactconns);
+ if (loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight)) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ NIPQUAD(least->addr.ip), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+
+ return least;
+}
+
+
+/*
+ * If this destination server is overloaded and there is a less loaded
+ * server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+ if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+ struct ip_vs_dest *d;
+
+ list_for_each_entry(d, &svc->destinations, n_list) {
+ if (atomic_read(&d->activeconns)*2
+ < atomic_read(&d->weight)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
+ struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_dest *dest = NULL;
+ struct ip_vs_lblcr_entry *en;
+
+ IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+
+ /* First look in our cache */
+ read_lock(&svc->sched_lock);
+ en = ip_vs_lblcr_get(tbl, iph->daddr);
+ if (en) {
+ /* We only hold a read lock, but this is atomic */
+ en->lastuse = jiffies;
+
+ /* Get the least loaded destination */
+ read_lock(&en->set.lock);
+ dest = ip_vs_dest_set_min(&en->set);
+ read_unlock(&en->set.lock);
+
+ /* More than one destination + enough time passed by, cleanup */
+ if (atomic_read(&en->set.size) > 1 &&
+ time_after(jiffies, en->set.lastmod +
+ sysctl_ip_vs_lblcr_expiration)) {
+ struct ip_vs_dest *m;
+
+ write_lock(&en->set.lock);
+ m = ip_vs_dest_set_max(&en->set);
+ if (m)
+ ip_vs_dest_set_erase(&en->set, m);
+ write_unlock(&en->set.lock);
+ }
+
+ /* If the destination is not overloaded, use it */
+ if (dest && !is_overloaded(dest, svc)) {
+ read_unlock(&svc->sched_lock);
+ goto out;
+ }
+
+ /* The cache entry is invalid, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ read_unlock(&svc->sched_lock);
+ return NULL;
+ }
+
+ /* Update our cache entry */
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
+ }
+ read_unlock(&svc->sched_lock);
+
+ if (dest)
+ goto out;
+
+ /* No cache entry, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
+ }
+
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&svc->sched_lock);
+ ip_vs_lblcr_new(tbl, iph->daddr, dest);
+ write_unlock(&svc->sched_lock);
+
+out:
+ IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
+ "--> server %u.%u.%u.%u:%d\n",
+ NIPQUAD(iph->daddr),
+ NIPQUAD(dest->addr.ip),
+ ntohs(dest->port));
+
+ return dest;
+}
+
+
+/*
+ * IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+ .name = "lblcr",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
+ .init_service = ip_vs_lblcr_init_svc,
+ .done_service = ip_vs_lblcr_done_svc,
+ .schedule = ip_vs_lblcr_schedule,
+};
+
+
+static int __init ip_vs_lblcr_init(void)
+{
+ int ret;
+
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ if (ret)
+ unregister_sysctl_table(sysctl_header);
+ return ret;
+}
+
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+ unregister_sysctl_table(sysctl_header);
+ unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
new file mode 100644
index 0000000..b69f808
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -0,0 +1,103 @@
+/*
+ * IPVS: Least-Connection Scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Wensong Zhang : added the ip_vs_lc_update_svc
+ * Wensong Zhang : added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
+{
+ /*
+ * We think the overhead of processing active connections is 256
+ * times higher than that of inactive connections in average. (This
+ * 256 times might not be accurate, we will change it later) We
+ * use the following formula to estimate the overhead now:
+ * dest->activeconns*256 + dest->inactconns
+ */
+ return (atomic_read(&dest->activeconns) << 8) +
+ atomic_read(&dest->inactconns);
+}
+
+
+/*
+ * Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest, *least = NULL;
+ unsigned int loh = 0, doh;
+
+ IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+
+ /*
+ * Simply select the server with the least number of
+ * (activeconns<<5) + inactconns
+ * Except whose weight is equal to zero.
+ * If the weight is equal to zero, it means that the server is
+ * quiesced, the existing connections to the server still get
+ * served, but no new connection is assigned to the server.
+ */
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+ atomic_read(&dest->weight) == 0)
+ continue;
+ doh = ip_vs_lc_dest_overhead(dest);
+ if (!least || doh < loh) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ if (least)
+ IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->inactconns));
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+ .name = "lc",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .schedule = ip_vs_lc_schedule,
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
new file mode 100644
index 0000000..9a2d803
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -0,0 +1,138 @@
+/*
+ * IPVS: Never Queue scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+ /*
+ * We only use the active connection number in the cost
+ * calculation here.
+ */
+ return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ * Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest, *least = NULL;
+ unsigned int loh = 0, doh;
+
+ IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+
+ /*
+ * We calculate the load of each dest server as follows:
+ * (server expected overhead) / dest->weight
+ *
+ * Remember -- no floats in kernel mode!!!
+ * The comparison of h1*w2 > h2*w1 is equivalent to that of
+ * h1/w1 > h2/w2
+ * if every weight is larger than zero.
+ *
+ * The server with weight=0 is quiesced and will not receive any
+ * new connections.
+ */
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
+ !atomic_read(&dest->weight))
+ continue;
+
+ doh = ip_vs_nq_dest_overhead(dest);
+
+ /* return the server directly if it is idle */
+ if (atomic_read(&dest->activeconns) == 0) {
+ least = dest;
+ loh = doh;
+ goto out;
+ }
+
+ if (!least ||
+ (loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight))) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ if (!least)
+ return NULL;
+
+ out:
+ IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+ .name = "nq",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .schedule = ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
new file mode 100644
index 0000000..0791f9e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -0,0 +1,288 @@
+/*
+ * ip_vs_proto.c: transport protocol load balancing support for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS protocols can only be registered/unregistered when the ipvs
+ * module is loaded/unloaded, so no lock is needed in accessing the
+ * ipvs protocol table.
+ */
+
+#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
+#define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1))
+
+static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+
+
+/*
+ * register an ipvs protocol
+ */
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+ unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+ pp->next = ip_vs_proto_table[hash];
+ ip_vs_proto_table[hash] = pp;
+
+ if (pp->init != NULL)
+ pp->init(pp);
+
+ return 0;
+}
+
+
+/*
+ * unregister an ipvs protocol
+ */
+static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+ struct ip_vs_protocol **pp_p;
+ unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+ pp_p = &ip_vs_proto_table[hash];
+ for (; *pp_p; pp_p = &(*pp_p)->next) {
+ if (*pp_p == pp) {
+ *pp_p = pp->next;
+ if (pp->exit != NULL)
+ pp->exit(pp);
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+
+/*
+ * get ip_vs_protocol object by its proto.
+ */
+struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
+{
+ struct ip_vs_protocol *pp;
+ unsigned hash = IP_VS_PROTO_HASH(proto);
+
+ for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
+ if (pp->protocol == proto)
+ return pp;
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Propagate event for state change to all protocols
+ */
+void ip_vs_protocol_timeout_change(int flags)
+{
+ struct ip_vs_protocol *pp;
+ int i;
+
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
+ if (pp->timeout_change)
+ pp->timeout_change(pp, flags);
+ }
+ }
+}
+
+
+int *
+ip_vs_create_timeout_table(int *table, int size)
+{
+ return kmemdup(table, size, GFP_ATOMIC);
+}
+
+
+/*
+ * Set timeout value for state specified by name
+ */
+int
+ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
+{
+ int i;
+
+ if (!table || !name || !to)
+ return -EINVAL;
+
+ for (i = 0; i < num; i++) {
+ if (strcmp(names[i], name))
+ continue;
+ table[i] = to * HZ;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+
+const char * ip_vs_state_name(__u16 proto, int state)
+{
+ struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+ if (pp == NULL || pp->state_name == NULL)
+ return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+ return pp->state_name(state);
+}
+
+
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
+{
+ char buf[128];
+ struct iphdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else if (ih->frag_off & htons(IP_OFFSET))
+ sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+ else {
+ __be16 _ports[2], *pptr
+;
+ pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
+ pp->name,
+ NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+ else
+ sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
+ pp->name,
+ NIPQUAD(ih->saddr),
+ ntohs(pptr[0]),
+ NIPQUAD(ih->daddr),
+ ntohs(pptr[1]));
+ }
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
+{
+ char buf[192];
+ struct ipv6hdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else if (ih->nexthdr == IPPROTO_FRAGMENT)
+ sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+ pp->name, NIP6(ih->saddr),
+ NIP6(ih->daddr));
+ else {
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+ pp->name,
+ NIP6(ih->saddr),
+ NIP6(ih->daddr));
+ else
+ sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+ pp->name,
+ NIP6(ih->saddr),
+ ntohs(pptr[0]),
+ NIP6(ih->daddr),
+ ntohs(pptr[1]));
+ }
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb->protocol == htons(ETH_P_IPV6))
+ ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+ else
+#endif
+ ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+int __init ip_vs_protocol_init(void)
+{
+ char protocols[64];
+#define REGISTER_PROTOCOL(p) \
+ do { \
+ register_ip_vs_protocol(p); \
+ strcat(protocols, ", "); \
+ strcat(protocols, (p)->name); \
+ } while (0)
+
+ protocols[0] = '\0';
+ protocols[2] = '\0';
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ REGISTER_PROTOCOL(&ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+ REGISTER_PROTOCOL(&ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+ REGISTER_PROTOCOL(&ip_vs_protocol_esp);
+#endif
+ IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
+
+ return 0;
+}
+
+
+void ip_vs_protocol_cleanup(void)
+{
+ struct ip_vs_protocol *pp;
+ int i;
+
+ /* unregister all the ipvs protocols */
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ while ((pp = ip_vs_proto_table[i]) != NULL)
+ unregister_ip_vs_protocol(pp);
+ }
+}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
new file mode 100644
index 0000000..80ab0c8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -0,0 +1,235 @@
+/*
+ * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS
+ *
+ * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
+ * Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+ __u8 icookie[8];
+ __u8 rcookie[8];
+ __u8 np;
+ __u8 version;
+ __u8 xchgtype;
+ __u8 flags;
+ __u32 msgid;
+ __u32 length;
+};
+
+*/
+
+#define PORT_ISAKMP 500
+
+
+static struct ip_vs_conn *
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
+{
+ struct ip_vs_conn *cp;
+
+ if (likely(!inverse)) {
+ cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+ &iph->saddr,
+ htons(PORT_ISAKMP),
+ &iph->daddr,
+ htons(PORT_ISAKMP));
+ } else {
+ cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+ &iph->daddr,
+ htons(PORT_ISAKMP),
+ &iph->saddr,
+ htons(PORT_ISAKMP));
+ }
+
+ if (!cp) {
+ /*
+ * We are not sure if the packet is from our
+ * service, so our conn_schedule hook should return NF_ACCEPT
+ */
+ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+ "%s%s %s->%s\n",
+ inverse ? "ICMP+" : "",
+ pp->name,
+ IP_VS_DBG_ADDR(af, &iph->saddr),
+ IP_VS_DBG_ADDR(af, &iph->daddr));
+ }
+
+ return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph,
+ unsigned int proto_off,
+ int inverse)
+{
+ struct ip_vs_conn *cp;
+
+ if (likely(!inverse)) {
+ cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+ &iph->saddr,
+ htons(PORT_ISAKMP),
+ &iph->daddr,
+ htons(PORT_ISAKMP));
+ } else {
+ cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+ &iph->daddr,
+ htons(PORT_ISAKMP),
+ &iph->saddr,
+ htons(PORT_ISAKMP));
+ }
+
+ if (!cp) {
+ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+ "%s%s %s->%s\n",
+ inverse ? "ICMP+" : "",
+ pp->name,
+ IP_VS_DBG_ADDR(af, &iph->saddr),
+ IP_VS_DBG_ADDR(af, &iph->daddr));
+ }
+
+ return cp;
+}
+
+
+static int
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ int *verdict, struct ip_vs_conn **cpp)
+{
+ /*
+ * AH/ESP is only related traffic. Pass the packet to IP stack.
+ */
+ *verdict = NF_ACCEPT;
+ return 0;
+}
+
+
+static void
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
+{
+ char buf[256];
+ struct iphdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else
+ sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+ pp->name, NIPQUAD(ih->saddr),
+ NIPQUAD(ih->daddr));
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
+{
+ char buf[256];
+ struct ipv6hdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else
+ sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+ pp->name, NIP6(ih->saddr),
+ NIP6(ih->daddr));
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb->protocol == htons(ETH_P_IPV6))
+ ah_esp_debug_packet_v6(pp, skb, offset, msg);
+ else
+#endif
+ ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+static void ah_esp_init(struct ip_vs_protocol *pp)
+{
+ /* nothing to do now */
+}
+
+
+static void ah_esp_exit(struct ip_vs_protocol *pp)
+{
+ /* nothing to do now */
+}
+
+
+#ifdef CONFIG_IP_VS_PROTO_AH
+struct ip_vs_protocol ip_vs_protocol_ah = {
+ .name = "AH",
+ .protocol = IPPROTO_AH,
+ .num_states = 1,
+ .dont_defrag = 1,
+ .init = ah_esp_init,
+ .exit = ah_esp_exit,
+ .conn_schedule = ah_esp_conn_schedule,
+ .conn_in_get = ah_esp_conn_in_get,
+ .conn_out_get = ah_esp_conn_out_get,
+ .snat_handler = NULL,
+ .dnat_handler = NULL,
+ .csum_check = NULL,
+ .state_transition = NULL,
+ .register_app = NULL,
+ .unregister_app = NULL,
+ .app_conn_bind = NULL,
+ .debug_packet = ah_esp_debug_packet,
+ .timeout_change = NULL, /* ISAKMP */
+ .set_state_timeout = NULL,
+};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+ .name = "ESP",
+ .protocol = IPPROTO_ESP,
+ .num_states = 1,
+ .dont_defrag = 1,
+ .init = ah_esp_init,
+ .exit = ah_esp_exit,
+ .conn_schedule = ah_esp_conn_schedule,
+ .conn_in_get = ah_esp_conn_in_get,
+ .conn_out_get = ah_esp_conn_out_get,
+ .snat_handler = NULL,
+ .dnat_handler = NULL,
+ .csum_check = NULL,
+ .state_transition = NULL,
+ .register_app = NULL,
+ .unregister_app = NULL,
+ .app_conn_bind = NULL,
+ .debug_packet = ah_esp_debug_packet,
+ .timeout_change = NULL, /* ISAKMP */
+};
+#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
new file mode 100644
index 0000000..dd4566e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -0,0 +1,732 @@
+/*
+ * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h> /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h> /* for csum_tcpudp_magic */
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
+{
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse)) {
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ } else {
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+ }
+}
+
+static struct ip_vs_conn *
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
+{
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse)) {
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ } else {
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+ }
+}
+
+
+static int
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ int *verdict, struct ip_vs_conn **cpp)
+{
+ struct ip_vs_service *svc;
+ struct tcphdr _tcph, *th;
+ struct ip_vs_iphdr iph;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ *verdict = NF_DROP;
+ return 0;
+ }
+
+ if (th->syn &&
+ (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+ th->dest))) {
+ if (ip_vs_todrop()) {
+ /*
+ * It seems that we are very loaded.
+ * We have to drop this packet :(
+ */
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ return 0;
+ }
+
+ /*
+ * Let the virtual server select a real server for the
+ * incoming connection, and create a connection entry.
+ */
+ *cpp = ip_vs_schedule(svc, skb);
+ if (!*cpp) {
+ *verdict = ip_vs_leave(svc, skb, pp);
+ return 0;
+ }
+ ip_vs_service_put(svc);
+ }
+ return 1;
+}
+
+
+static inline void
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcph->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(tcph->check))));
+ else
+#endif
+ tcph->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(tcph->check))));
+}
+
+
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcph->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(tcph->check))));
+ else
+#endif
+ tcph->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(tcph->check))));
+}
+
+
+static int
+tcp_snat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+ struct tcphdr *tcph;
+ unsigned int tcphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
+ oldlen = skb->len - tcphoff;
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /* Call application helper if needed */
+ if (!ip_vs_app_pkt_out(cp, skb))
+ return 0;
+ }
+
+ tcph = (void *)skb_network_header(skb) + tcphoff;
+ tcph->source = cp->vport;
+
+ /* Adjust TCP checksums */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - tcphoff));
+ } else if (!cp->app) {
+ /* Only port and addr are changed, do fast csum update */
+ tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+ cp->dport, cp->vport);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* full checksum calculation */
+ tcph->check = 0;
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+ &cp->caddr.in6,
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+ cp->caddr.ip,
+ skb->len - tcphoff,
+ cp->protocol,
+ skb->csum);
+
+ IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+ pp->name, tcph->check,
+ (char*)&(tcph->check) - (char*)tcph);
+ }
+ return 1;
+}
+
+
+static int
+tcp_dnat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+ struct tcphdr *tcph;
+ unsigned int tcphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
+ oldlen = skb->len - tcphoff;
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /*
+ * Attempt ip_vs_app call.
+ * It will fix ip_vs_conn and iph ack_seq stuff
+ */
+ if (!ip_vs_app_pkt_in(cp, skb))
+ return 0;
+ }
+
+ tcph = (void *)skb_network_header(skb) + tcphoff;
+ tcph->dest = cp->dport;
+
+ /*
+ * Adjust TCP checksums
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - tcphoff));
+ } else if (!cp->app) {
+ /* Only port and addr are changed, do fast csum update */
+ tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
+ cp->vport, cp->dport);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* full checksum calculation */
+ tcph->check = 0;
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+ &cp->daddr.in6,
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+ cp->daddr.ip,
+ skb->len - tcphoff,
+ cp->protocol,
+ skb->csum);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ return 1;
+}
+
+
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+ unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_NONE:
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+ case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - tcphoff,
+ ipv6_hdr(skb)->nexthdr,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ } else
+#endif
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - tcphoff,
+ ip_hdr(skb)->protocol,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ break;
+ default:
+ /* No need to checksum. */
+ break;
+ }
+
+ return 1;
+}
+
+
+#define TCP_DIR_INPUT 0
+#define TCP_DIR_OUTPUT 4
+#define TCP_DIR_INPUT_ONLY 8
+
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
+ [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
+ [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
+ [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
+};
+
+/*
+ * Timeout table[state]
+ */
+static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+ [IP_VS_TCP_S_NONE] = 2*HZ,
+ [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
+ [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
+ [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
+ [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
+ [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
+ [IP_VS_TCP_S_CLOSE] = 10*HZ,
+ [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
+ [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
+ [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
+ [IP_VS_TCP_S_SYNACK] = 120*HZ,
+ [IP_VS_TCP_S_LAST] = 2*HZ,
+};
+
+static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+ [IP_VS_TCP_S_NONE] = "NONE",
+ [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
+ [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
+ [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
+ [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
+ [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
+ [IP_VS_TCP_S_CLOSE] = "CLOSE",
+ [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
+ [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
+ [IP_VS_TCP_S_LISTEN] = "LISTEN",
+ [IP_VS_TCP_S_SYNACK] = "SYNACK",
+ [IP_VS_TCP_S_LAST] = "BUG!",
+};
+
+#define sNO IP_VS_TCP_S_NONE
+#define sES IP_VS_TCP_S_ESTABLISHED
+#define sSS IP_VS_TCP_S_SYN_SENT
+#define sSR IP_VS_TCP_S_SYN_RECV
+#define sFW IP_VS_TCP_S_FIN_WAIT
+#define sTW IP_VS_TCP_S_TIME_WAIT
+#define sCL IP_VS_TCP_S_CLOSE
+#define sCW IP_VS_TCP_S_CLOSE_WAIT
+#define sLA IP_VS_TCP_S_LAST_ACK
+#define sLI IP_VS_TCP_S_LISTEN
+#define sSA IP_VS_TCP_S_SYNACK
+
+struct tcp_states_t {
+ int next_state[IP_VS_TCP_S_LAST];
+};
+
+static const char * tcp_state_name(int state)
+{
+ if (state >= IP_VS_TCP_S_LAST)
+ return "ERR!";
+ return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
+}
+
+static struct tcp_states_t tcp_states [] = {
+/* INPUT */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/* OUTPUT */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/* INPUT-ONLY */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t tcp_states_dos [] = {
+/* INPUT */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/* OUTPUT */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/* INPUT-ONLY */
+/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t *tcp_state_table = tcp_states;
+
+
+static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+ int on = (flags & 1); /* secure_tcp */
+
+ /*
+ ** FIXME: change secure_tcp to independent sysctl var
+ ** or make it per-service or per-app because it is valid
+ ** for most if not for all of the applications. Something
+ ** like "capabilities" (flags) for each object.
+ */
+ tcp_state_table = (on? tcp_states_dos : tcp_states);
+}
+
+static int
+tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+ return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
+ tcp_state_name_table, sname, to);
+}
+
+static inline int tcp_state_idx(struct tcphdr *th)
+{
+ if (th->rst)
+ return 3;
+ if (th->syn)
+ return 0;
+ if (th->fin)
+ return 1;
+ if (th->ack)
+ return 2;
+ return -1;
+}
+
+static inline void
+set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+ int direction, struct tcphdr *th)
+{
+ int state_idx;
+ int new_state = IP_VS_TCP_S_CLOSE;
+ int state_off = tcp_state_off[direction];
+
+ /*
+ * Update state offset to INPUT_ONLY if necessary
+ * or delete NO_OUTPUT flag if output packet detected
+ */
+ if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+ if (state_off == TCP_DIR_OUTPUT)
+ cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+ else
+ state_off = TCP_DIR_INPUT_ONLY;
+ }
+
+ if ((state_idx = tcp_state_idx(th)) < 0) {
+ IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
+ goto tcp_state_out;
+ }
+
+ new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+
+ tcp_state_out:
+ if (new_state != cp->state) {
+ struct ip_vs_dest *dest = cp->dest;
+
+ IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+ "%s:%d state: %s->%s conn->refcnt:%d\n",
+ pp->name,
+ ((state_off == TCP_DIR_OUTPUT) ?
+ "output " : "input "),
+ th->syn ? 'S' : '.',
+ th->fin ? 'F' : '.',
+ th->ack ? 'A' : '.',
+ th->rst ? 'R' : '.',
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ tcp_state_name(cp->state),
+ tcp_state_name(new_state),
+ atomic_read(&cp->refcnt));
+
+ if (dest) {
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
+ }
+
+ cp->timeout = pp->timeout_table[cp->state = new_state];
+}
+
+
+/*
+ * Handle state transitions
+ */
+static int
+tcp_state_transition(struct ip_vs_conn *cp, int direction,
+ const struct sk_buff *skb,
+ struct ip_vs_protocol *pp)
+{
+ struct tcphdr _tcph, *th;
+
+#ifdef CONFIG_IP_VS_IPV6
+ int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+ int ihl = ip_hdrlen(skb);
+#endif
+
+ th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return 0;
+
+ spin_lock(&cp->lock);
+ set_tcp_state(pp, cp, direction, th);
+ spin_unlock(&cp->lock);
+
+ return 1;
+}
+
+
+/*
+ * Hash table for TCP application incarnations
+ */
+#define TCP_APP_TAB_BITS 4
+#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+
+static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(tcp_app_lock);
+
+static inline __u16 tcp_app_hashkey(__be16 port)
+{
+ return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
+ & TCP_APP_TAB_MASK;
+}
+
+
+static int tcp_register_app(struct ip_vs_app *inc)
+{
+ struct ip_vs_app *i;
+ __u16 hash;
+ __be16 port = inc->port;
+ int ret = 0;
+
+ hash = tcp_app_hashkey(port);
+
+ spin_lock_bh(&tcp_app_lock);
+ list_for_each_entry(i, &tcp_apps[hash], p_list) {
+ if (i->port == port) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ list_add(&inc->p_list, &tcp_apps[hash]);
+ atomic_inc(&ip_vs_protocol_tcp.appcnt);
+
+ out:
+ spin_unlock_bh(&tcp_app_lock);
+ return ret;
+}
+
+
+static void
+tcp_unregister_app(struct ip_vs_app *inc)
+{
+ spin_lock_bh(&tcp_app_lock);
+ atomic_dec(&ip_vs_protocol_tcp.appcnt);
+ list_del(&inc->p_list);
+ spin_unlock_bh(&tcp_app_lock);
+}
+
+
+static int
+tcp_app_conn_bind(struct ip_vs_conn *cp)
+{
+ int hash;
+ struct ip_vs_app *inc;
+ int result = 0;
+
+ /* Default binding: bind app only for NAT */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+ return 0;
+
+ /* Lookup application incarnations and bind the right one */
+ hash = tcp_app_hashkey(cp->vport);
+
+ spin_lock(&tcp_app_lock);
+ list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+ if (inc->port == cp->vport) {
+ if (unlikely(!ip_vs_app_inc_get(inc)))
+ break;
+ spin_unlock(&tcp_app_lock);
+
+ IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+ "%s:%u to app %s on port %u\n",
+ __func__,
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ inc->name, ntohs(inc->port));
+
+ cp->app = inc;
+ if (inc->init_conn)
+ result = inc->init_conn(inc, cp);
+ goto out;
+ }
+ }
+ spin_unlock(&tcp_app_lock);
+
+ out:
+ return result;
+}
+
+
+/*
+ * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+{
+ spin_lock(&cp->lock);
+ cp->state = IP_VS_TCP_S_LISTEN;
+ cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+ spin_unlock(&cp->lock);
+}
+
+
+static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+{
+ IP_VS_INIT_HASH_TABLE(tcp_apps);
+ pp->timeout_table = tcp_timeouts;
+}
+
+
+static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_tcp = {
+ .name = "TCP",
+ .protocol = IPPROTO_TCP,
+ .num_states = IP_VS_TCP_S_LAST,
+ .dont_defrag = 0,
+ .appcnt = ATOMIC_INIT(0),
+ .init = ip_vs_tcp_init,
+ .exit = ip_vs_tcp_exit,
+ .register_app = tcp_register_app,
+ .unregister_app = tcp_unregister_app,
+ .conn_schedule = tcp_conn_schedule,
+ .conn_in_get = tcp_conn_in_get,
+ .conn_out_get = tcp_conn_out_get,
+ .snat_handler = tcp_snat_handler,
+ .dnat_handler = tcp_dnat_handler,
+ .csum_check = tcp_csum_check,
+ .state_name = tcp_state_name,
+ .state_transition = tcp_state_transition,
+ .app_conn_bind = tcp_app_conn_bind,
+ .debug_packet = ip_vs_tcpudp_debug_packet,
+ .timeout_change = tcp_timeout_change,
+ .set_state_timeout = tcp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
new file mode 100644
index 0000000..6eb6039
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -0,0 +1,533 @@
+/*
+ * ip_vs_proto_udp.c: UDP load balancing support for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
+
+#include <net/ip_vs.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+
+static struct ip_vs_conn *
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
+{
+ struct ip_vs_conn *cp;
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse)) {
+ cp = ip_vs_conn_in_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ } else {
+ cp = ip_vs_conn_in_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+ }
+
+ return cp;
+}
+
+
+static struct ip_vs_conn *
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
+{
+ struct ip_vs_conn *cp;
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse)) {
+ cp = ip_vs_conn_out_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ } else {
+ cp = ip_vs_conn_out_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+ }
+
+ return cp;
+}
+
+
+static int
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ int *verdict, struct ip_vs_conn **cpp)
+{
+ struct ip_vs_service *svc;
+ struct udphdr _udph, *uh;
+ struct ip_vs_iphdr iph;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ *verdict = NF_DROP;
+ return 0;
+ }
+
+ svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ &iph.daddr, uh->dest);
+ if (svc) {
+ if (ip_vs_todrop()) {
+ /*
+ * It seems that we are very loaded.
+ * We have to drop this packet :(
+ */
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ return 0;
+ }
+
+ /*
+ * Let the virtual server select a real server for the
+ * incoming connection, and create a connection entry.
+ */
+ *cpp = ip_vs_schedule(svc, skb);
+ if (!*cpp) {
+ *verdict = ip_vs_leave(svc, skb, pp);
+ return 0;
+ }
+ ip_vs_service_put(svc);
+ }
+ return 1;
+}
+
+
+static inline void
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ uhdr->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(uhdr->check))));
+ else
+#endif
+ uhdr->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(uhdr->check))));
+ if (!uhdr->check)
+ uhdr->check = CSUM_MANGLED_0;
+}
+
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ uhdr->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(uhdr->check))));
+ else
+#endif
+ uhdr->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(uhdr->check))));
+}
+
+
+static int
+udp_snat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+ struct udphdr *udph;
+ unsigned int udphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
+ oldlen = skb->len - udphoff;
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /*
+ * Call application helper if needed
+ */
+ if (!ip_vs_app_pkt_out(cp, skb))
+ return 0;
+ }
+
+ udph = (void *)skb_network_header(skb) + udphoff;
+ udph->source = cp->vport;
+
+ /*
+ * Adjust UDP checksums
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - udphoff));
+ } else if (!cp->app && (udph->check != 0)) {
+ /* Only port and addr are changed, do fast csum update */
+ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+ cp->dport, cp->vport);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* full checksum calculation */
+ udph->check = 0;
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+ &cp->caddr.in6,
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+ cp->caddr.ip,
+ skb->len - udphoff,
+ cp->protocol,
+ skb->csum);
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+ pp->name, udph->check,
+ (char*)&(udph->check) - (char*)udph);
+ }
+ return 1;
+}
+
+
+static int
+udp_dnat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+ struct udphdr *udph;
+ unsigned int udphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
+ oldlen = skb->len - udphoff;
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /*
+ * Attempt ip_vs_app call.
+ * It will fix ip_vs_conn
+ */
+ if (!ip_vs_app_pkt_in(cp, skb))
+ return 0;
+ }
+
+ udph = (void *)skb_network_header(skb) + udphoff;
+ udph->dest = cp->dport;
+
+ /*
+ * Adjust UDP checksums
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - udphoff));
+ } else if (!cp->app && (udph->check != 0)) {
+ /* Only port and addr are changed, do fast csum update */
+ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
+ cp->vport, cp->dport);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* full checksum calculation */
+ udph->check = 0;
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udph->check = csum_ipv6_magic(&cp->caddr.in6,
+ &cp->daddr.in6,
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ udph->check = csum_tcpudp_magic(cp->caddr.ip,
+ cp->daddr.ip,
+ skb->len - udphoff,
+ cp->protocol,
+ skb->csum);
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ return 1;
+}
+
+
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+ struct udphdr _udph, *uh;
+ unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
+
+ uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+ if (uh == NULL)
+ return 0;
+
+ if (uh->check != 0) {
+ switch (skb->ip_summed) {
+ case CHECKSUM_NONE:
+ skb->csum = skb_checksum(skb, udphoff,
+ skb->len - udphoff, 0);
+ case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - udphoff,
+ ipv6_hdr(skb)->nexthdr,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ } else
+#endif
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - udphoff,
+ ip_hdr(skb)->protocol,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ break;
+ default:
+ /* No need to checksum. */
+ break;
+ }
+ }
+ return 1;
+}
+
+
+/*
+ * Note: the caller guarantees that only one of register_app,
+ * unregister_app or app_conn_bind is called each time.
+ */
+
+#define UDP_APP_TAB_BITS 4
+#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+
+static struct list_head udp_apps[UDP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(udp_app_lock);
+
+static inline __u16 udp_app_hashkey(__be16 port)
+{
+ return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
+ & UDP_APP_TAB_MASK;
+}
+
+
+static int udp_register_app(struct ip_vs_app *inc)
+{
+ struct ip_vs_app *i;
+ __u16 hash;
+ __be16 port = inc->port;
+ int ret = 0;
+
+ hash = udp_app_hashkey(port);
+
+
+ spin_lock_bh(&udp_app_lock);
+ list_for_each_entry(i, &udp_apps[hash], p_list) {
+ if (i->port == port) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ list_add(&inc->p_list, &udp_apps[hash]);
+ atomic_inc(&ip_vs_protocol_udp.appcnt);
+
+ out:
+ spin_unlock_bh(&udp_app_lock);
+ return ret;
+}
+
+
+static void
+udp_unregister_app(struct ip_vs_app *inc)
+{
+ spin_lock_bh(&udp_app_lock);
+ atomic_dec(&ip_vs_protocol_udp.appcnt);
+ list_del(&inc->p_list);
+ spin_unlock_bh(&udp_app_lock);
+}
+
+
+static int udp_app_conn_bind(struct ip_vs_conn *cp)
+{
+ int hash;
+ struct ip_vs_app *inc;
+ int result = 0;
+
+ /* Default binding: bind app only for NAT */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+ return 0;
+
+ /* Lookup application incarnations and bind the right one */
+ hash = udp_app_hashkey(cp->vport);
+
+ spin_lock(&udp_app_lock);
+ list_for_each_entry(inc, &udp_apps[hash], p_list) {
+ if (inc->port == cp->vport) {
+ if (unlikely(!ip_vs_app_inc_get(inc)))
+ break;
+ spin_unlock(&udp_app_lock);
+
+ IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+ "%s:%u to app %s on port %u\n",
+ __func__,
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ inc->name, ntohs(inc->port));
+
+ cp->app = inc;
+ if (inc->init_conn)
+ result = inc->init_conn(inc, cp);
+ goto out;
+ }
+ }
+ spin_unlock(&udp_app_lock);
+
+ out:
+ return result;
+}
+
+
+static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+ [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
+ [IP_VS_UDP_S_LAST] = 2*HZ,
+};
+
+static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+ [IP_VS_UDP_S_NORMAL] = "UDP",
+ [IP_VS_UDP_S_LAST] = "BUG!",
+};
+
+
+static int
+udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+ return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
+ udp_state_name_table, sname, to);
+}
+
+static const char * udp_state_name(int state)
+{
+ if (state >= IP_VS_UDP_S_LAST)
+ return "ERR!";
+ return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
+}
+
+static int
+udp_state_transition(struct ip_vs_conn *cp, int direction,
+ const struct sk_buff *skb,
+ struct ip_vs_protocol *pp)
+{
+ cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+ return 1;
+}
+
+static void udp_init(struct ip_vs_protocol *pp)
+{
+ IP_VS_INIT_HASH_TABLE(udp_apps);
+ pp->timeout_table = udp_timeouts;
+}
+
+static void udp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_udp = {
+ .name = "UDP",
+ .protocol = IPPROTO_UDP,
+ .num_states = IP_VS_UDP_S_LAST,
+ .dont_defrag = 0,
+ .init = udp_init,
+ .exit = udp_exit,
+ .conn_schedule = udp_conn_schedule,
+ .conn_in_get = udp_conn_in_get,
+ .conn_out_get = udp_conn_out_get,
+ .snat_handler = udp_snat_handler,
+ .dnat_handler = udp_dnat_handler,
+ .csum_check = udp_csum_check,
+ .state_transition = udp_state_transition,
+ .state_name = udp_state_name,
+ .register_app = udp_register_app,
+ .unregister_app = udp_unregister_app,
+ .app_conn_bind = udp_app_conn_bind,
+ .debug_packet = ip_vs_tcpudp_debug_packet,
+ .timeout_change = NULL,
+ .set_state_timeout = udp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
new file mode 100644
index 0000000..a22195f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -0,0 +1,112 @@
+/*
+ * IPVS: Round-Robin Scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes/Changes:
+ * Wensong Zhang : changed the ip_vs_rr_schedule to return dest
+ * Julian Anastasov : fixed the NULL pointer access bug in debugging
+ * Wensong Zhang : changed some comestics things for debugging
+ * Wensong Zhang : changed for the d-linked destination list
+ * Wensong Zhang : added the ip_vs_rr_update_svc
+ * Wensong Zhang : added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct list_head *p, *q;
+ struct ip_vs_dest *dest;
+
+ IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+
+ write_lock(&svc->sched_lock);
+ p = (struct list_head *)svc->sched_data;
+ p = p->next;
+ q = p;
+ do {
+ /* skip list head */
+ if (q == &svc->destinations) {
+ q = q->next;
+ continue;
+ }
+
+ dest = list_entry(q, struct ip_vs_dest, n_list);
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > 0)
+ /* HIT */
+ goto out;
+ q = q->next;
+ } while (q != p);
+ write_unlock(&svc->sched_lock);
+ return NULL;
+
+ out:
+ svc->sched_data = q;
+ write_unlock(&svc->sched_lock);
+ IP_VS_DBG_BUF(6, "RR: server %s:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+
+ return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+ .name = "rr", /* name */
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .init_service = ip_vs_rr_init_svc,
+ .update_service = ip_vs_rr_update_svc,
+ .schedule = ip_vs_rr_schedule,
+};
+
+static int __init ip_vs_rr_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
new file mode 100644
index 0000000..a46ad9e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -0,0 +1,251 @@
+/*
+ * IPVS An implementation of the IP virtual server support for the
+ * LINUX operating system. IPVS is now implemented as a module
+ * over the Netfilter framework. IPVS can be used to build a
+ * high-performance and highly available server based on a
+ * cluster of servers.
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_sched_lock);
+
+
+/*
+ * Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+ struct ip_vs_scheduler *scheduler)
+{
+ int ret;
+
+ if (svc == NULL) {
+ IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+ return -EINVAL;
+ }
+ if (scheduler == NULL) {
+ IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+ return -EINVAL;
+ }
+
+ svc->scheduler = scheduler;
+
+ if (scheduler->init_service) {
+ ret = scheduler->init_service(svc);
+ if (ret) {
+ IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Unbind a service with its scheduler
+ */
+int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+{
+ struct ip_vs_scheduler *sched;
+
+ if (svc == NULL) {
+ IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+ return -EINVAL;
+ }
+
+ sched = svc->scheduler;
+ if (sched == NULL) {
+ IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+ return -EINVAL;
+ }
+
+ if (sched->done_service) {
+ if (sched->done_service(svc) != 0) {
+ IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+ return -EINVAL;
+ }
+ }
+
+ svc->scheduler = NULL;
+ return 0;
+}
+
+
+/*
+ * Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+ struct ip_vs_scheduler *sched;
+
+ IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
+ sched_name);
+
+ read_lock_bh(&__ip_vs_sched_lock);
+
+ list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+ /*
+ * Test and get the modules atomically
+ */
+ if (sched->module && !try_module_get(sched->module)) {
+ /*
+ * This scheduler is just deleted
+ */
+ continue;
+ }
+ if (strcmp(sched_name, sched->name)==0) {
+ /* HIT */
+ read_unlock_bh(&__ip_vs_sched_lock);
+ return sched;
+ }
+ if (sched->module)
+ module_put(sched->module);
+ }
+
+ read_unlock_bh(&__ip_vs_sched_lock);
+ return NULL;
+}
+
+
+/*
+ * Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+ struct ip_vs_scheduler *sched;
+
+ /*
+ * Search for the scheduler by sched_name
+ */
+ sched = ip_vs_sched_getbyname(sched_name);
+
+ /*
+ * If scheduler not found, load the module and search again
+ */
+ if (sched == NULL) {
+ request_module("ip_vs_%s", sched_name);
+ sched = ip_vs_sched_getbyname(sched_name);
+ }
+
+ return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+ if (scheduler->module)
+ module_put(scheduler->module);
+}
+
+
+/*
+ * Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+ struct ip_vs_scheduler *sched;
+
+ if (!scheduler) {
+ IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+ return -EINVAL;
+ }
+
+ if (!scheduler->name) {
+ IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+ return -EINVAL;
+ }
+
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+
+ write_lock_bh(&__ip_vs_sched_lock);
+
+ if (!list_empty(&scheduler->n_list)) {
+ write_unlock_bh(&__ip_vs_sched_lock);
+ ip_vs_use_count_dec();
+ IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+ "already linked\n", scheduler->name);
+ return -EINVAL;
+ }
+
+ /*
+ * Make sure that the scheduler with this name doesn't exist
+ * in the scheduler list.
+ */
+ list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+ if (strcmp(scheduler->name, sched->name) == 0) {
+ write_unlock_bh(&__ip_vs_sched_lock);
+ ip_vs_use_count_dec();
+ IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+ "already existed in the system\n",
+ scheduler->name);
+ return -EINVAL;
+ }
+ }
+ /*
+ * Add it into the d-linked scheduler list
+ */
+ list_add(&scheduler->n_list, &ip_vs_schedulers);
+ write_unlock_bh(&__ip_vs_sched_lock);
+
+ IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+
+ return 0;
+}
+
+
+/*
+ * Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+ if (!scheduler) {
+ IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+ return -EINVAL;
+ }
+
+ write_lock_bh(&__ip_vs_sched_lock);
+ if (list_empty(&scheduler->n_list)) {
+ write_unlock_bh(&__ip_vs_sched_lock);
+ IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
+ "is not in the list. failed\n", scheduler->name);
+ return -EINVAL;
+ }
+
+ /*
+ * Remove it from the d-linked scheduler list
+ */
+ list_del(&scheduler->n_list);
+ write_unlock_bh(&__ip_vs_sched_lock);
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+
+ IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+
+ return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
new file mode 100644
index 0000000..7d2f22f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -0,0 +1,140 @@
+/*
+ * IPVS: Shortest Expected Delay scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+ /*
+ * We only use the active connection number in the cost
+ * calculation here.
+ */
+ return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ * Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest, *least;
+ unsigned int loh, doh;
+
+ IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+
+ /*
+ * We calculate the load of each dest server as follows:
+ * (server expected overhead) / dest->weight
+ *
+ * Remember -- no floats in kernel mode!!!
+ * The comparison of h1*w2 > h2*w1 is equivalent to that of
+ * h1/w1 > h2/w2
+ * if every weight is larger than zero.
+ *
+ * The server with weight=0 is quiesced and will not receive any
+ * new connections.
+ */
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > 0) {
+ least = dest;
+ loh = ip_vs_sed_dest_overhead(least);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /*
+ * Find the destination with the least load.
+ */
+ nextstage:
+ list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+ doh = ip_vs_sed_dest_overhead(dest);
+ if (loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight)) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ IP_VS_DBG_BUF(6, "SED: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+ .name = "sed",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .schedule = ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
new file mode 100644
index 0000000..1d96de2
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -0,0 +1,258 @@
+/*
+ * IPVS: Source Hashing scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@gnuchina.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ * n <- servernode[src_ip];
+ * if (n is dead) OR
+ * (n is overloaded) or (n.weight <= 0) then
+ * return NULL;
+ *
+ * return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+ struct ip_vs_dest *dest; /* real server (cache) */
+};
+
+/*
+ * for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS 8
+#endif
+#define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
+
+
+/*
+ * Returns hash value for IPVS SH entry
+ */
+static inline unsigned ip_vs_sh_hashkey(__be32 addr)
+{
+ return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ * Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
+{
+ return (tbl[ip_vs_sh_hashkey(addr)]).dest;
+}
+
+
+/*
+ * Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+{
+ int i;
+ struct ip_vs_sh_bucket *b;
+ struct list_head *p;
+ struct ip_vs_dest *dest;
+
+ b = tbl;
+ p = &svc->destinations;
+ for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+ if (list_empty(p)) {
+ b->dest = NULL;
+ } else {
+ if (p == &svc->destinations)
+ p = p->next;
+
+ dest = list_entry(p, struct ip_vs_dest, n_list);
+ atomic_inc(&dest->refcnt);
+ b->dest = dest;
+
+ p = p->next;
+ }
+ b++;
+ }
+ return 0;
+}
+
+
+/*
+ * Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+{
+ int i;
+ struct ip_vs_sh_bucket *b;
+
+ b = tbl;
+ for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+ if (b->dest) {
+ atomic_dec(&b->dest->refcnt);
+ b->dest = NULL;
+ }
+ b++;
+ }
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_sh_bucket *tbl;
+
+ /* allocate the SH table for this service */
+ tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
+ GFP_ATOMIC);
+ if (tbl == NULL) {
+ IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+ return -ENOMEM;
+ }
+ svc->sched_data = tbl;
+ IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
+ "current service\n",
+ sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+ /* assign the hash buckets with the updated service */
+ ip_vs_sh_assign(tbl, svc);
+
+ return 0;
+}
+
+
+static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+ /* got to clean up hash buckets here */
+ ip_vs_sh_flush(tbl);
+
+ /* release the table itself */
+ kfree(svc->sched_data);
+ IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
+ sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+ return 0;
+}
+
+
+static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+ /* got to clean up hash buckets here */
+ ip_vs_sh_flush(tbl);
+
+ /* assign the hash buckets with the updated service */
+ ip_vs_sh_assign(tbl, svc);
+
+ return 0;
+}
+
+
+/*
+ * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ * consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+ return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_sh_bucket *tbl;
+ struct iphdr *iph = ip_hdr(skb);
+
+ IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+ tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
+ dest = ip_vs_sh_get(tbl, iph->saddr);
+ if (!dest
+ || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+ || atomic_read(&dest->weight) <= 0
+ || is_overloaded(dest)) {
+ return NULL;
+ }
+
+ IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
+ "--> server %u.%u.%u.%u:%d\n",
+ NIPQUAD(iph->saddr),
+ NIPQUAD(dest->addr.ip),
+ ntohs(dest->port));
+
+ return dest;
+}
+
+
+/*
+ * IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+ .name = "sh",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
+ .init_service = ip_vs_sh_init_svc,
+ .done_service = ip_vs_sh_done_svc,
+ .update_service = ip_vs_sh_update_svc,
+ .schedule = ip_vs_sh_schedule,
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
new file mode 100644
index 0000000..de5e7e1
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -0,0 +1,942 @@
+/*
+ * IPVS An implementation of the IP virtual server support for the
+ * LINUX operating system. IPVS is now implemented as a module
+ * over the NetFilter framework. IPVS can be used to build a
+ * high-performance and highly available server based on a
+ * cluster of servers.
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync: sync connection info from master load balancer to backups
+ * through multicast
+ *
+ * Changes:
+ * Alexandre Cassen : Added master & backup support at a time.
+ * Alexandre Cassen : Added SyncID support for incoming sync
+ * messages filtering.
+ * Justin Ossevoort : Fix endian problem on sync message size.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/inetdevice.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h> /* for ip_mc_join_group */
+#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT 8848 /* multicast port */
+
+
+/*
+ * IPVS sync connection entry
+ */
+struct ip_vs_sync_conn {
+ __u8 reserved;
+
+ /* Protocol, addresses and port numbers */
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
+
+ /* Flags and state transition */
+ __be16 flags; /* status flags */
+ __be16 state; /* state info */
+
+ /* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+ struct ip_vs_seq in_seq; /* incoming seq. struct */
+ struct ip_vs_seq out_seq; /* outgoing seq. struct */
+};
+
+struct ip_vs_sync_thread_data {
+ struct socket *sock;
+ char *buf;
+};
+
+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
+#define FULL_CONN_SIZE \
+(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+ The master mulitcasts messages to the backup load balancers in the
+ following format.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | IPVS Sync Connection (1) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | . |
+ | . |
+ | . |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | IPVS Sync Connection (n) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define SYNC_MESG_HEADER_LEN 4
+#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
+
+struct ip_vs_sync_mesg {
+ __u8 nr_conns;
+ __u8 syncid;
+ __u16 size;
+
+ /* ip_vs_sync_conn entries start here */
+};
+
+/* the maximum length of sync (sending/receiving) message */
+static int sync_send_mesg_maxlen;
+static int sync_recv_mesg_maxlen;
+
+struct ip_vs_sync_buff {
+ struct list_head list;
+ unsigned long firstuse;
+
+ /* pointers for the message data */
+ struct ip_vs_sync_mesg *mesg;
+ unsigned char *head;
+ unsigned char *end;
+};
+
+
+/* the sync_buff list head and the lock */
+static LIST_HEAD(ip_vs_sync_queue);
+static DEFINE_SPINLOCK(ip_vs_sync_lock);
+
+/* current sync_buff for accepting new conn entries */
+static struct ip_vs_sync_buff *curr_sb = NULL;
+static DEFINE_SPINLOCK(curr_sb_lock);
+
+/* ipvs sync daemon state */
+volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
+volatile int ip_vs_master_syncid = 0;
+volatile int ip_vs_backup_syncid = 0;
+
+/* multicast interface name */
+char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
+/* multicast addr */
+static struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = __constant_htons(IP_VS_SYNC_PORT),
+ .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
+};
+
+
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
+{
+ struct ip_vs_sync_buff *sb;
+
+ spin_lock_bh(&ip_vs_sync_lock);
+ if (list_empty(&ip_vs_sync_queue)) {
+ sb = NULL;
+ } else {
+ sb = list_entry(ip_vs_sync_queue.next,
+ struct ip_vs_sync_buff,
+ list);
+ list_del(&sb->list);
+ }
+ spin_unlock_bh(&ip_vs_sync_lock);
+
+ return sb;
+}
+
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+{
+ struct ip_vs_sync_buff *sb;
+
+ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+ return NULL;
+
+ if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ kfree(sb);
+ return NULL;
+ }
+ sb->mesg->nr_conns = 0;
+ sb->mesg->syncid = ip_vs_master_syncid;
+ sb->mesg->size = 4;
+ sb->head = (unsigned char *)sb->mesg + 4;
+ sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+ sb->firstuse = jiffies;
+ return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+ kfree(sb->mesg);
+ kfree(sb);
+}
+
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+ spin_lock(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(sb);
+ spin_unlock(&ip_vs_sync_lock);
+}
+
+/*
+ * Get the current sync buffer if it has been created for more
+ * than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(unsigned long time)
+{
+ struct ip_vs_sync_buff *sb;
+
+ spin_lock_bh(&curr_sb_lock);
+ if (curr_sb && (time == 0 ||
+ time_before(jiffies - curr_sb->firstuse, time))) {
+ sb = curr_sb;
+ curr_sb = NULL;
+ } else
+ sb = NULL;
+ spin_unlock_bh(&curr_sb_lock);
+ return sb;
+}
+
+
+/*
+ * Add an ip_vs_conn information into the current sync_buff.
+ * Called by ip_vs_in.
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+ struct ip_vs_sync_mesg *m;
+ struct ip_vs_sync_conn *s;
+ int len;
+
+ spin_lock(&curr_sb_lock);
+ if (!curr_sb) {
+ if (!(curr_sb=ip_vs_sync_buff_create())) {
+ spin_unlock(&curr_sb_lock);
+ IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+ return;
+ }
+ }
+
+ len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+ SIMPLE_CONN_SIZE;
+ m = curr_sb->mesg;
+ s = (struct ip_vs_sync_conn *)curr_sb->head;
+
+ /* copy members */
+ s->protocol = cp->protocol;
+ s->cport = cp->cport;
+ s->vport = cp->vport;
+ s->dport = cp->dport;
+ s->caddr = cp->caddr.ip;
+ s->vaddr = cp->vaddr.ip;
+ s->daddr = cp->daddr.ip;
+ s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+ s->state = htons(cp->state);
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ struct ip_vs_sync_conn_options *opt =
+ (struct ip_vs_sync_conn_options *)&s[1];
+ memcpy(opt, &cp->in_seq, sizeof(*opt));
+ }
+
+ m->nr_conns++;
+ m->size += len;
+ curr_sb->head += len;
+
+ /* check if there is a space for next one */
+ if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+ sb_queue_tail(curr_sb);
+ curr_sb = NULL;
+ }
+ spin_unlock(&curr_sb_lock);
+
+ /* synchronize its controller if it has */
+ if (cp->control)
+ ip_vs_sync_conn(cp->control);
+}
+
+
+/*
+ * Process received multicast message and create the corresponding
+ * ip_vs_conn entries.
+ */
+static void ip_vs_process_message(const char *buffer, const size_t buflen)
+{
+ struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+ struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_conn_options *opt;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_dest *dest;
+ char *p;
+ int i;
+
+ if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+ IP_VS_ERR_RL("sync message header too short\n");
+ return;
+ }
+
+ /* Convert size back to host byte order */
+ m->size = ntohs(m->size);
+
+ if (buflen != m->size) {
+ IP_VS_ERR_RL("bogus sync message size\n");
+ return;
+ }
+
+ /* SyncID sanity check */
+ if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+ IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
+ m->syncid);
+ return;
+ }
+
+ p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+ for (i=0; i<m->nr_conns; i++) {
+ unsigned flags, state;
+
+ if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn in sync message\n");
+ return;
+ }
+ s = (struct ip_vs_sync_conn *) p;
+ flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+ flags &= ~IP_VS_CONN_F_HASHED;
+ if (flags & IP_VS_CONN_F_SEQ_MASK) {
+ opt = (struct ip_vs_sync_conn_options *)&s[1];
+ p += FULL_CONN_SIZE;
+ if (p > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn options in sync message\n");
+ return;
+ }
+ } else {
+ opt = NULL;
+ p += SIMPLE_CONN_SIZE;
+ }
+
+ state = ntohs(s->state);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->protocol);
+ if (!pp) {
+ IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ s->protocol);
+ continue;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ pp->name, state);
+ continue;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ pp = NULL;
+ if (state > 0) {
+ IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ state);
+ state = 0;
+ }
+ }
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport);
+ else
+ cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport);
+ if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(AF_INET,
+ (union nf_inet_addr *)&s->daddr,
+ s->dport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport,
+ s->protocol);
+ /* Set the approprite ativity flag */
+ if (s->protocol == IPPROTO_TCP) {
+ if (state != IP_VS_TCP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ cp = ip_vs_conn_new(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport,
+ (union nf_inet_addr *)&s->daddr,
+ s->dport,
+ flags, dest);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ if (!cp) {
+ IP_VS_ERR("ip_vs_conn_new failed\n");
+ return;
+ }
+ } else if (!cp->dest) {
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+ (cp->state != state)) {
+ /* update active/inactive flag for the connection */
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
+
+ if (opt)
+ memcpy(&cp->in_seq, opt, sizeof(*opt));
+ atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ cp->state = state;
+ cp->old_state = cp->state;
+ /*
+ * We can not recover the right timeout for templates
+ * in all cases, we can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ */
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+ cp->timeout = pp->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ ip_vs_conn_put(cp);
+ }
+}
+
+
+/*
+ * Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+ lock_sock(sk);
+ inet->mc_loop = loop ? 1 : 0;
+ release_sock(sk);
+}
+
+/*
+ * Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+ lock_sock(sk);
+ inet->mc_ttl = ttl;
+ release_sock(sk);
+}
+
+/*
+ * Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, char *ifname)
+{
+ struct net_device *dev;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ return -EINVAL;
+
+ lock_sock(sk);
+ inet->mc_index = dev->ifindex;
+ /* inet->mc_addr = 0; */
+ release_sock(sk);
+
+ return 0;
+}
+
+
+/*
+ * Set the maximum length of sync message according to the
+ * specified interface's MTU.
+ */
+static int set_sync_mesg_maxlen(int sync_state)
+{
+ struct net_device *dev;
+ int num;
+
+ if (sync_state == IP_VS_STATE_MASTER) {
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ return -ENODEV;
+
+ num = (dev->mtu - sizeof(struct iphdr) -
+ sizeof(struct udphdr) -
+ SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
+ sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+ SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
+ IP_VS_DBG(7, "setting the maximum length of sync sending "
+ "message %d.\n", sync_send_mesg_maxlen);
+ } else if (sync_state == IP_VS_STATE_BACKUP) {
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ return -ENODEV;
+
+ sync_recv_mesg_maxlen = dev->mtu -
+ sizeof(struct iphdr) - sizeof(struct udphdr);
+ IP_VS_DBG(7, "setting the maximum length of sync receiving "
+ "message %d.\n", sync_recv_mesg_maxlen);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Join a multicast group.
+ * the group is specified by a class D multicast address 224.0.0.0/8
+ * in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+{
+ struct ip_mreqn mreq;
+ struct net_device *dev;
+ int ret;
+
+ memset(&mreq, 0, sizeof(mreq));
+ memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ return -EINVAL;
+
+ mreq.imr_ifindex = dev->ifindex;
+
+ lock_sock(sk);
+ ret = ip_mc_join_group(sk, &mreq);
+ release_sock(sk);
+
+ return ret;
+}
+
+
+static int bind_mcastif_addr(struct socket *sock, char *ifname)
+{
+ struct net_device *dev;
+ __be32 addr;
+ struct sockaddr_in sin;
+
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+
+ addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+ if (!addr)
+ IP_VS_ERR("You probably need to specify IP address on "
+ "multicast interface.\n");
+
+ IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
+ ifname, NIPQUAD(addr));
+
+ /* Now bind the socket with the address of multicast interface */
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = addr;
+ sin.sin_port = 0;
+
+ return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+}
+
+/*
+ * Set up sending multicast socket over UDP
+ */
+static struct socket * make_send_sock(void)
+{
+ struct socket *sock;
+ int result;
+
+ /* First create a socket */
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
+ IP_VS_ERR("Error during creation of socket; terminating\n");
+ return ERR_PTR(result);
+ }
+
+ result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ if (result < 0) {
+ IP_VS_ERR("Error setting outbound mcast interface\n");
+ goto error;
+ }
+
+ set_mcast_loop(sock->sk, 0);
+ set_mcast_ttl(sock->sk, 1);
+
+ result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ if (result < 0) {
+ IP_VS_ERR("Error binding address of the mcast interface\n");
+ goto error;
+ }
+
+ result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr), 0);
+ if (result < 0) {
+ IP_VS_ERR("Error connecting to the multicast addr\n");
+ goto error;
+ }
+
+ return sock;
+
+ error:
+ sock_release(sock);
+ return ERR_PTR(result);
+}
+
+
+/*
+ * Set up receiving multicast socket over UDP
+ */
+static struct socket * make_receive_sock(void)
+{
+ struct socket *sock;
+ int result;
+
+ /* First create a socket */
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
+ IP_VS_ERR("Error during creation of socket; terminating\n");
+ return ERR_PTR(result);
+ }
+
+ /* it is equivalent to the REUSEADDR option in user-space */
+ sock->sk->sk_reuse = 1;
+
+ result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr));
+ if (result < 0) {
+ IP_VS_ERR("Error binding to the multicast addr\n");
+ goto error;
+ }
+
+ /* join the multicast group */
+ result = join_mcast_group(sock->sk,
+ (struct in_addr *) &mcast_addr.sin_addr,
+ ip_vs_backup_mcast_ifn);
+ if (result < 0) {
+ IP_VS_ERR("Error joining to the multicast group\n");
+ goto error;
+ }
+
+ return sock;
+
+ error:
+ sock_release(sock);
+ return ERR_PTR(result);
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+ struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
+ struct kvec iov;
+ int len;
+
+ EnterFunction(7);
+ iov.iov_base = (void *)buffer;
+ iov.iov_len = length;
+
+ len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
+
+ LeaveFunction(7);
+ return len;
+}
+
+static void
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+ int msize;
+
+ msize = msg->size;
+
+ /* Put size in network byte order */
+ msg->size = htons(msg->size);
+
+ if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
+ IP_VS_ERR("ip_vs_send_async error\n");
+}
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+ struct msghdr msg = {NULL,};
+ struct kvec iov;
+ int len;
+
+ EnterFunction(7);
+
+ /* Receive a packet */
+ iov.iov_base = buffer;
+ iov.iov_len = (size_t)buflen;
+
+ len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+
+ if (len < 0)
+ return -1;
+
+ LeaveFunction(7);
+ return len;
+}
+
+
+static int sync_thread_master(void *data)
+{
+ struct ip_vs_sync_thread_data *tinfo = data;
+ struct ip_vs_sync_buff *sb;
+
+ IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
+ "syncid = %d\n",
+ ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+
+ while (!kthread_should_stop()) {
+ while ((sb = sb_dequeue())) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+ ip_vs_sync_buff_release(sb);
+ }
+
+ /* check if entries stay in curr_sb for 2 seconds */
+ sb = get_curr_sync_buff(2 * HZ);
+ if (sb) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+ ip_vs_sync_buff_release(sb);
+ }
+
+ schedule_timeout_interruptible(HZ);
+ }
+
+ /* clean up the sync_buff queue */
+ while ((sb=sb_dequeue())) {
+ ip_vs_sync_buff_release(sb);
+ }
+
+ /* clean up the current sync_buff */
+ if ((sb = get_curr_sync_buff(0))) {
+ ip_vs_sync_buff_release(sb);
+ }
+
+ /* release the sending multicast socket */
+ sock_release(tinfo->sock);
+ kfree(tinfo);
+
+ return 0;
+}
+
+
+static int sync_thread_backup(void *data)
+{
+ struct ip_vs_sync_thread_data *tinfo = data;
+ int len;
+
+ IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
+ "syncid = %d\n",
+ ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+ !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+ || kthread_should_stop());
+
+ /* do we have data now? */
+ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+ len = ip_vs_receive(tinfo->sock, tinfo->buf,
+ sync_recv_mesg_maxlen);
+ if (len <= 0) {
+ IP_VS_ERR("receiving message error\n");
+ break;
+ }
+
+ /* disable bottom half, because it accesses the data
+ shared by softirq while getting/creating conns */
+ local_bh_disable();
+ ip_vs_process_message(tinfo->buf, len);
+ local_bh_enable();
+ }
+ }
+
+ /* release the sending multicast socket */
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
+
+ return 0;
+}
+
+
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+{
+ struct ip_vs_sync_thread_data *tinfo;
+ struct task_struct **realtask, *task;
+ struct socket *sock;
+ char *name, *buf = NULL;
+ int (*threadfn)(void *data);
+ int result = -ENOMEM;
+
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+ sizeof(struct ip_vs_sync_conn));
+
+ if (state == IP_VS_STATE_MASTER) {
+ if (sync_master_thread)
+ return -EEXIST;
+
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_master_mcast_ifn));
+ ip_vs_master_syncid = syncid;
+ realtask = &sync_master_thread;
+ name = "ipvs_syncmaster";
+ threadfn = sync_thread_master;
+ sock = make_send_sock();
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (sync_backup_thread)
+ return -EEXIST;
+
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_backup_mcast_ifn));
+ ip_vs_backup_syncid = syncid;
+ realtask = &sync_backup_thread;
+ name = "ipvs_syncbackup";
+ threadfn = sync_thread_backup;
+ sock = make_receive_sock();
+ } else {
+ return -EINVAL;
+ }
+
+ if (IS_ERR(sock)) {
+ result = PTR_ERR(sock);
+ goto out;
+ }
+
+ set_sync_mesg_maxlen(state);
+ if (state == IP_VS_STATE_BACKUP) {
+ buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ if (!buf)
+ goto outsocket;
+ }
+
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ goto outbuf;
+
+ tinfo->sock = sock;
+ tinfo->buf = buf;
+
+ task = kthread_run(threadfn, tinfo, name);
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ goto outtinfo;
+ }
+
+ /* mark as active */
+ *realtask = task;
+ ip_vs_sync_state |= state;
+
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+
+ return 0;
+
+outtinfo:
+ kfree(tinfo);
+outbuf:
+ kfree(buf);
+outsocket:
+ sock_release(sock);
+out:
+ return result;
+}
+
+
+int stop_sync_thread(int state)
+{
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+
+ if (state == IP_VS_STATE_MASTER) {
+ if (!sync_master_thread)
+ return -ESRCH;
+
+ IP_VS_INFO("stopping master sync thread %d ...\n",
+ task_pid_nr(sync_master_thread));
+
+ /*
+ * The lock synchronizes with sb_queue_tail(), so that we don't
+ * add sync buffers to the queue, when we are already in
+ * progress of stopping the master sync daemon.
+ */
+
+ spin_lock_bh(&ip_vs_sync_lock);
+ ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ip_vs_sync_lock);
+ kthread_stop(sync_master_thread);
+ sync_master_thread = NULL;
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (!sync_backup_thread)
+ return -ESRCH;
+
+ IP_VS_INFO("stopping backup sync thread %d ...\n",
+ task_pid_nr(sync_backup_thread));
+
+ ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(sync_backup_thread);
+ sync_backup_thread = NULL;
+ } else {
+ return -EINVAL;
+ }
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+
+ return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
new file mode 100644
index 0000000..8c596e7
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -0,0 +1,128 @@
+/*
+ * IPVS: Weighted Least-Connection Scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Peter Kese <peter.kese@ijs.si>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Wensong Zhang : changed the ip_vs_wlc_schedule to return dest
+ * Wensong Zhang : changed to use the inactconns in scheduling
+ * Wensong Zhang : changed some comestics things for debugging
+ * Wensong Zhang : changed for the d-linked destination list
+ * Wensong Zhang : added the ip_vs_wlc_update_svc
+ * Wensong Zhang : added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
+{
+ /*
+ * We think the overhead of processing active connections is 256
+ * times higher than that of inactive connections in average. (This
+ * 256 times might not be accurate, we will change it later) We
+ * use the following formula to estimate the overhead now:
+ * dest->activeconns*256 + dest->inactconns
+ */
+ return (atomic_read(&dest->activeconns) << 8) +
+ atomic_read(&dest->inactconns);
+}
+
+
+/*
+ * Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest, *least;
+ unsigned int loh, doh;
+
+ IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+ /*
+ * We calculate the load of each dest server as follows:
+ * (dest overhead) / dest->weight
+ *
+ * Remember -- no floats in kernel mode!!!
+ * The comparison of h1*w2 > h2*w1 is equivalent to that of
+ * h1/w1 > h2/w2
+ * if every weight is larger than zero.
+ *
+ * The server with weight=0 is quiesced and will not receive any
+ * new connections.
+ */
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > 0) {
+ least = dest;
+ loh = ip_vs_wlc_dest_overhead(least);
+ goto nextstage;
+ }
+ }
+ return NULL;
+
+ /*
+ * Find the destination with the least load.
+ */
+ nextstage:
+ list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+ doh = ip_vs_wlc_dest_overhead(dest);
+ if (loh * atomic_read(&dest->weight) >
+ doh * atomic_read(&least->weight)) {
+ least = dest;
+ loh = doh;
+ }
+ }
+
+ IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+ .name = "wlc",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .schedule = ip_vs_wlc_schedule,
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
new file mode 100644
index 0000000..7ea92fe
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -0,0 +1,237 @@
+/*
+ * IPVS: Weighted Round-Robin Scheduling module
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Wensong Zhang : changed the ip_vs_wrr_schedule to return dest
+ * Wensong Zhang : changed some comestics things for debugging
+ * Wensong Zhang : changed for the d-linked destination list
+ * Wensong Zhang : added the ip_vs_wrr_update_svc
+ * Julian Anastasov : fixed the bug of returning destination
+ * with weight 0 when all weights are zero
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+ struct list_head *cl; /* current list head */
+ int cw; /* current weight */
+ int mw; /* maximum weight */
+ int di; /* decreasing interval */
+};
+
+
+/*
+ * Get the gcd of server weights
+ */
+static int gcd(int a, int b)
+{
+ int c;
+
+ while ((c = a % b)) {
+ a = b;
+ b = c;
+ }
+ return b;
+}
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+ struct ip_vs_dest *dest;
+ int weight;
+ int g = 0;
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ weight = atomic_read(&dest->weight);
+ if (weight > 0) {
+ if (g > 0)
+ g = gcd(weight, g);
+ else
+ g = weight;
+ }
+ }
+ return g ? g : 1;
+}
+
+
+/*
+ * Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+ struct ip_vs_dest *dest;
+ int weight = 0;
+
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (atomic_read(&dest->weight) > weight)
+ weight = atomic_read(&dest->weight);
+ }
+
+ return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_wrr_mark *mark;
+
+ /*
+ * Allocate the mark variable for WRR scheduling
+ */
+ mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+ if (mark == NULL) {
+ IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+ return -ENOMEM;
+ }
+ mark->cl = &svc->destinations;
+ mark->cw = 0;
+ mark->mw = ip_vs_wrr_max_weight(svc);
+ mark->di = ip_vs_wrr_gcd_weight(svc);
+ svc->sched_data = mark;
+
+ return 0;
+}
+
+
+static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+ /*
+ * Release the mark variable
+ */
+ kfree(svc->sched_data);
+
+ return 0;
+}
+
+
+static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+{
+ struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+ mark->cl = &svc->destinations;
+ mark->mw = ip_vs_wrr_max_weight(svc);
+ mark->di = ip_vs_wrr_gcd_weight(svc);
+ if (mark->cw > mark->mw)
+ mark->cw = 0;
+ return 0;
+}
+
+
+/*
+ * Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_wrr_mark *mark = svc->sched_data;
+ struct list_head *p;
+
+ IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+
+ /*
+ * This loop will always terminate, because mark->cw in (0, max_weight]
+ * and at least one server has its weight equal to max_weight.
+ */
+ write_lock(&svc->sched_lock);
+ p = mark->cl;
+ while (1) {
+ if (mark->cl == &svc->destinations) {
+ /* it is at the head of the destination list */
+
+ if (mark->cl == mark->cl->next) {
+ /* no dest entry */
+ dest = NULL;
+ goto out;
+ }
+
+ mark->cl = svc->destinations.next;
+ mark->cw -= mark->di;
+ if (mark->cw <= 0) {
+ mark->cw = mark->mw;
+ /*
+ * Still zero, which means no available servers.
+ */
+ if (mark->cw == 0) {
+ mark->cl = &svc->destinations;
+ IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
+ "no available servers\n");
+ dest = NULL;
+ goto out;
+ }
+ }
+ } else
+ mark->cl = mark->cl->next;
+
+ if (mark->cl != &svc->destinations) {
+ /* not at the head of the list */
+ dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) >= mark->cw) {
+ /* got it */
+ break;
+ }
+ }
+
+ if (mark->cl == p && mark->cw == mark->di) {
+ /* back to the start, and no dest is found.
+ It is only possible when all dests are OVERLOADED */
+ dest = NULL;
+ goto out;
+ }
+ }
+
+ IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->refcnt),
+ atomic_read(&dest->weight));
+
+ out:
+ write_unlock(&svc->sched_lock);
+ return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+ .name = "wrr",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
+ .init_service = ip_vs_wrr_init_svc,
+ .done_service = ip_vs_wrr_done_svc,
+ .update_service = ip_vs_wrr_update_svc,
+ .schedule = ip_vs_wrr_schedule,
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
new file mode 100644
index 0000000..e90d52f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -0,0 +1,1005 @@
+/*
+ * ip_vs_xmit.c: various packet transmitters for IPVS
+ *
+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
+ * Julian Anastasov <ja@ssi.bg>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/tcp.h> /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h> /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h> /* for icmp_send */
+#include <net/route.h> /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * Destination cache to speed up outgoing route lookup
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+{
+ struct dst_entry *old_dst;
+
+ old_dst = dest->dst_cache;
+ dest->dst_cache = dst;
+ dest->dst_rtos = rtos;
+ dst_release(old_dst);
+}
+
+static inline struct dst_entry *
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+{
+ struct dst_entry *dst = dest->dst_cache;
+
+ if (!dst)
+ return NULL;
+ if ((dst->obsolete
+ || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
+ dst->ops->check(dst, cookie) == NULL) {
+ dest->dst_cache = NULL;
+ dst_release(dst);
+ return NULL;
+ }
+ dst_hold(dst);
+ return dst;
+}
+
+static struct rtable *
+__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+{
+ struct rtable *rt; /* Route to the other host */
+ struct ip_vs_dest *dest = cp->dest;
+
+ if (dest) {
+ spin_lock(&dest->dst_lock);
+ if (!(rt = (struct rtable *)
+ __ip_vs_dst_check(dest, rtos, 0))) {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dest->addr.ip,
+ .saddr = 0,
+ .tos = rtos, } },
+ };
+
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
+ spin_unlock(&dest->dst_lock);
+ IP_VS_DBG_RL("ip_route_output error, "
+ "dest: %u.%u.%u.%u\n",
+ NIPQUAD(dest->addr.ip));
+ return NULL;
+ }
+ __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+ IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
+ NIPQUAD(dest->addr.ip),
+ atomic_read(&rt->u.dst.__refcnt), rtos);
+ }
+ spin_unlock(&dest->dst_lock);
+ } else {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = cp->daddr.ip,
+ .saddr = 0,
+ .tos = rtos, } },
+ };
+
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
+ IP_VS_DBG_RL("ip_route_output error, dest: "
+ "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
+ return NULL;
+ }
+ }
+
+ return rt;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct ip_vs_dest *dest = cp->dest;
+
+ if (dest) {
+ spin_lock(&dest->dst_lock);
+ rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+ if (!rt) {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = dest->addr.in6,
+ .saddr = {
+ .s6_addr32 =
+ { 0, 0, 0, 0 },
+ },
+ },
+ },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net,
+ NULL, &fl);
+ if (!rt) {
+ spin_unlock(&dest->dst_lock);
+ IP_VS_DBG_RL("ip6_route_output error, "
+ "dest: " NIP6_FMT "\n",
+ NIP6(dest->addr.in6));
+ return NULL;
+ }
+ __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+ IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+ NIP6(dest->addr.in6),
+ atomic_read(&rt->u.dst.__refcnt));
+ }
+ spin_unlock(&dest->dst_lock);
+ } else {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = cp->daddr.in6,
+ .saddr = {
+ .s6_addr32 = { 0, 0, 0, 0 },
+ },
+ },
+ },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (!rt) {
+ IP_VS_DBG_RL("ip6_route_output error, dest: "
+ NIP6_FMT "\n", NIP6(cp->daddr.in6));
+ return NULL;
+ }
+ }
+
+ return rt;
+}
+#endif
+
+
+/*
+ * Release dest->dst_cache before a dest is removed
+ */
+void
+ip_vs_dst_reset(struct ip_vs_dest *dest)
+{
+ struct dst_entry *old_dst;
+
+ old_dst = dest->dst_cache;
+ dest->dst_cache = NULL;
+ dst_release(old_dst);
+}
+
+#define IP_VS_XMIT(pf, skb, rt) \
+do { \
+ (skb)->ipvs_property = 1; \
+ skb_forward_csum(skb); \
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
+ (rt)->u.dst.dev, dst_output); \
+} while (0)
+
+
+/*
+ * NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+int
+ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ /* we do not touch skb and do not need pskb ptr */
+ return NF_ACCEPT;
+}
+
+
+/*
+ * Bypass transmitter
+ * Let packets bypass the destination when the destination is not
+ * available, it may be only used in transparent cache cluster.
+ */
+int
+ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rtable *rt; /* Route to the other host */
+ struct iphdr *iph = ip_hdr(skb);
+ u8 tos = iph->tos;
+ int mtu;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = iph->daddr,
+ .saddr = 0,
+ .tos = RT_TOS(tos), } },
+ };
+
+ EnterFunction(10);
+
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
+ IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+ "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+ goto tx_error_icmp;
+ }
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ ip_rt_put(rt);
+ icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+ IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+ ip_rt_put(rt);
+ return NF_STOLEN;
+ }
+ ip_send_check(ip_hdr(skb));
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int mtu;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = iph->daddr,
+ .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+ };
+
+ EnterFunction(10);
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (!rt) {
+ IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+ "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+ goto tx_error_icmp;
+ }
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(skb == NULL)) {
+ dst_release(&rt->u.dst);
+ return NF_STOLEN;
+ }
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
+
+/*
+ * NAT transmitter (only for outside-to-inside nat forwarding)
+ * Not used for related ICMP
+ */
+int
+ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rtable *rt; /* Route to the other host */
+ int mtu;
+ struct iphdr *iph = ip_hdr(skb);
+
+ EnterFunction(10);
+
+ /* check if it is a connection of no-client-port */
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ __be16 _pt, *p;
+ p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+ if (p == NULL)
+ goto tx_error;
+ ip_vs_conn_fill_cport(cp, *p);
+ IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+ }
+
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+ ip_rt_put(rt);
+ icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* mangle the packet */
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+ goto tx_error;
+ ip_hdr(skb)->daddr = cp->daddr.ip;
+ ip_send_check(ip_hdr(skb));
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+ /* FIXME: when application helper enlarges the packet and the length
+ is larger than the MTU of outgoing device, there will be still
+ MTU problem. */
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ LeaveFunction(10);
+ kfree_skb(skb);
+ return NF_STOLEN;
+ tx_error_put:
+ ip_rt_put(rt);
+ goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+
+ EnterFunction(10);
+
+ /* check if it is a connection of no-client-port */
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ __be16 _pt, *p;
+ p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+ sizeof(_pt), &_pt);
+ if (p == NULL)
+ goto tx_error;
+ ip_vs_conn_fill_cport(cp, *p);
+ IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+ }
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "ip_vs_nat_xmit_v6(): frag needed for");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* mangle the packet */
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+ goto tx_error;
+ ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+ /* FIXME: when application helper enlarges the packet and the length
+ is larger than the MTU of outgoing device, there will be still
+ MTU problem. */
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ LeaveFunction(10);
+ kfree_skb(skb);
+ return NF_STOLEN;
+tx_error_put:
+ dst_release(&rt->u.dst);
+ goto tx_error;
+}
+#endif
+
+
+/*
+ * IP Tunneling transmitter
+ *
+ * This function encapsulates the packet in a new IP packet, its
+ * destination will be set to cp->daddr. Most code of this function
+ * is taken from ipip.c.
+ *
+ * It is used in VS/TUN cluster. The load balancer selects a real
+ * server from a cluster based on a scheduling algorithm,
+ * encapsulates the request packet and forwards it to the selected
+ * server. For example, all real servers are configured with
+ * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ * the encapsulated packet, it will decapsulate the packet, processe
+ * the request and return the response packets directly to the client
+ * without passing the load balancer. This can greatly increase the
+ * scalability of virtual server.
+ *
+ * Used for ANY protocol
+ */
+int
+ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rtable *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ struct iphdr *old_iph = ip_hdr(skb);
+ u8 tos = old_iph->tos;
+ __be16 df = old_iph->frag_off;
+ sk_buff_data_t old_transport_header = skb->transport_header;
+ struct iphdr *iph; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ int mtu;
+
+ EnterFunction(10);
+
+ if (skb->protocol != htons(ETH_P_IP)) {
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+ "ETH_P_IP: %d, skb protocol: %d\n",
+ htons(ETH_P_IP), skb->protocol);
+ goto tx_error;
+ }
+
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+ goto tx_error_icmp;
+
+ tdev = rt->u.dst.dev;
+
+ mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+ if (mtu < 68) {
+ ip_rt_put(rt);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+ goto tx_error;
+ }
+ if (skb->dst)
+ skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+ df |= (old_iph->frag_off & htons(IP_DF));
+
+ if ((old_iph->frag_off & htons(IP_DF))
+ && mtu < ntohs(old_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+
+ if (skb_headroom(skb) < max_headroom
+ || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb =
+ skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ kfree_skb(skb);
+ IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+ return NF_STOLEN;
+ }
+ kfree_skb(skb);
+ skb = new_skb;
+ old_iph = ip_hdr(skb);
+ }
+
+ skb->transport_header = old_transport_header;
+
+ /* fix old IP header checksum */
+ ip_send_check(old_iph);
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+ iph = ip_hdr(skb);
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr)>>2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_IPIP;
+ iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+ iph->ttl = old_iph->ttl;
+ ip_select_ident(iph, &rt->u.dst, NULL);
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ ip_local_out(skb);
+
+ LeaveFunction(10);
+
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ sk_buff_data_t old_transport_header = skb->transport_header;
+ struct ipv6hdr *iph; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ int mtu;
+
+ EnterFunction(10);
+
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+ "ETH_P_IPV6: %d, skb protocol: %d\n",
+ htons(ETH_P_IPV6), skb->protocol);
+ goto tx_error;
+ }
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ tdev = rt->u.dst.dev;
+
+ mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+ /* TODO IPv6: do we need this check in IPv6? */
+ if (mtu < 1280) {
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+ goto tx_error;
+ }
+ if (skb->dst)
+ skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+ if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+ if (skb_headroom(skb) < max_headroom
+ || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb =
+ skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ dst_release(&rt->u.dst);
+ kfree_skb(skb);
+ IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+ return NF_STOLEN;
+ }
+ kfree_skb(skb);
+ skb = new_skb;
+ old_iph = ipv6_hdr(skb);
+ }
+
+ skb->transport_header = old_transport_header;
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+ iph = ipv6_hdr(skb);
+ iph->version = 6;
+ iph->nexthdr = IPPROTO_IPV6;
+ iph->payload_len = old_iph->payload_len;
+ be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
+ iph->priority = old_iph->priority;
+ memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+ iph->daddr = rt->rt6i_dst.addr;
+ iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */
+ iph->hop_limit = old_iph->hop_limit;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ ip6_local_out(skb);
+
+ LeaveFunction(10);
+
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
+
+
+/*
+ * Direct Routing transmitter
+ * Used for ANY protocol
+ */
+int
+ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rtable *rt; /* Route to the other host */
+ struct iphdr *iph = ip_hdr(skb);
+ int mtu;
+
+ EnterFunction(10);
+
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+ icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+ ip_rt_put(rt);
+ return NF_STOLEN;
+ }
+ ip_send_check(ip_hdr(skb));
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+
+ EnterFunction(10);
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(skb == NULL)) {
+ dst_release(&rt->u.dst);
+ return NF_STOLEN;
+ }
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
+
+
+/*
+ * ICMP packet transmitter
+ * called by the ip_vs_in_icmp
+ */
+int
+ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp, int offset)
+{
+ struct rtable *rt; /* Route to the other host */
+ int mtu;
+ int rc;
+
+ EnterFunction(10);
+
+ /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+ forwarded directly here, because there is no need to
+ translate address/port back */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+ if (cp->packet_xmit)
+ rc = cp->packet_xmit(skb, cp, pp);
+ else
+ rc = NF_ACCEPT;
+ /* do not touch skb anymore */
+ atomic_inc(&cp->in_pkts);
+ goto out;
+ }
+
+ /*
+ * mangle and send the packet here (only for VS/NAT)
+ */
+
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+ ip_rt_put(rt);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, offset))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop the old route when skb is not shared */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ ip_vs_nat_icmp(skb, pp, cp, 0);
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET, skb, rt);
+
+ rc = NF_STOLEN;
+ goto out;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ dev_kfree_skb(skb);
+ rc = NF_STOLEN;
+ out:
+ LeaveFunction(10);
+ return rc;
+ tx_error_put:
+ ip_rt_put(rt);
+ goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp, int offset)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+ int rc;
+
+ EnterFunction(10);
+
+ /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+ forwarded directly here, because there is no need to
+ translate address/port back */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+ if (cp->packet_xmit)
+ rc = cp->packet_xmit(skb, cp, pp);
+ else
+ rc = NF_ACCEPT;
+ /* do not touch skb anymore */
+ atomic_inc(&cp->in_pkts);
+ goto out;
+ }
+
+ /*
+ * mangle and send the packet here (only for VS/NAT)
+ */
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, offset))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop the old route when skb is not shared */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ rc = NF_STOLEN;
+ goto out;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ dev_kfree_skb(skb);
+ rc = NF_STOLEN;
+out:
+ LeaveFunction(10);
+ return rc;
+tx_error_put:
+ dst_release(&rt->u.dst);
+ goto tx_error;
+}
+#endif
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
new file mode 100644
index 0000000..b92df5c
--- /dev/null
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -0,0 +1,148 @@
+/* Accouting handling for netfilter. */
+
+/*
+ * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netfilter.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+#ifdef CONFIG_NF_CT_ACCT
+#define NF_CT_ACCT_DEFAULT 1
+#else
+#define NF_CT_ACCT_DEFAULT 0
+#endif
+
+static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
+
+module_param_named(acct, nf_ct_acct, bool, 0644);
+MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table acct_sysctl_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_acct",
+ .data = &init_net.ct.sysctl_acct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
+{
+ struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
+ return 0;
+
+ return seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)acct[dir].packets,
+ (unsigned long long)acct[dir].bytes);
+};
+EXPORT_SYMBOL_GPL(seq_print_acct);
+
+static struct nf_ct_ext_type acct_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]),
+ .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]),
+ .id = NF_CT_EXT_ACCT,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_acct;
+
+ net->ct.acct_sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.acct_sysctl_header) {
+ printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+ goto out_register;
+ }
+ return 0;
+
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ct.acct_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.acct_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_acct_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_acct = nf_ct_acct;
+
+ if (net_eq(net, &init_net)) {
+#ifdef CONFIG_NF_CT_ACCT
+ printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
+ printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+ printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
+#endif
+
+ ret = nf_ct_extend_register(&acct_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_acct_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
+out_extend_register:
+ return ret;
+}
+
+void nf_conntrack_acct_fini(struct net *net)
+{
+ nf_conntrack_acct_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&acct_extend);
+}
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
new file mode 100644
index 0000000..38aedee
--- /dev/null
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -0,0 +1,235 @@
+/* Amanda extension for IP connection tracking
+ *
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on HW's ip_conntrack_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/textsearch.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+static unsigned int master_timeout __read_mostly = 300;
+static char *ts_algo = "kmp";
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda connection tracking module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_amanda");
+
+module_param(master_timeout, uint, 0600);
+MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
+module_param(ts_algo, charp, 0400);
+MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
+
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_amanda_hook);
+
+enum amanda_strings {
+ SEARCH_CONNECT,
+ SEARCH_NEWLINE,
+ SEARCH_DATA,
+ SEARCH_MESG,
+ SEARCH_INDEX,
+};
+
+static struct {
+ const char *string;
+ size_t len;
+ struct ts_config *ts;
+} search[] __read_mostly = {
+ [SEARCH_CONNECT] = {
+ .string = "CONNECT ",
+ .len = 8,
+ },
+ [SEARCH_NEWLINE] = {
+ .string = "\n",
+ .len = 1,
+ },
+ [SEARCH_DATA] = {
+ .string = "DATA ",
+ .len = 5,
+ },
+ [SEARCH_MESG] = {
+ .string = "MESG ",
+ .len = 5,
+ },
+ [SEARCH_INDEX] = {
+ .string = "INDEX ",
+ .len = 6,
+ },
+};
+
+static int amanda_help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct ts_state ts;
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ unsigned int dataoff, start, stop, off, i;
+ char pbuf[sizeof("65535")], *tmp;
+ u_int16_t len;
+ __be16 port;
+ int ret = NF_ACCEPT;
+ typeof(nf_nat_amanda_hook) nf_nat_amanda;
+
+ /* Only look at packets from the Amanda server */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* increase the UDP timeout of the master connection as replies from
+ * Amanda clients to the server can be quite delayed */
+ nf_ct_refresh(ct, skb, master_timeout * HZ);
+
+ /* No data? */
+ dataoff = protoff + sizeof(struct udphdr);
+ if (dataoff >= skb->len) {
+ if (net_ratelimit())
+ printk("amanda_help: skblen = %u\n", skb->len);
+ return NF_ACCEPT;
+ }
+
+ memset(&ts, 0, sizeof(ts));
+ start = skb_find_text(skb, dataoff, skb->len,
+ search[SEARCH_CONNECT].ts, &ts);
+ if (start == UINT_MAX)
+ goto out;
+ start += dataoff + search[SEARCH_CONNECT].len;
+
+ memset(&ts, 0, sizeof(ts));
+ stop = skb_find_text(skb, start, skb->len,
+ search[SEARCH_NEWLINE].ts, &ts);
+ if (stop == UINT_MAX)
+ goto out;
+ stop += start;
+
+ for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
+ memset(&ts, 0, sizeof(ts));
+ off = skb_find_text(skb, start, stop, search[i].ts, &ts);
+ if (off == UINT_MAX)
+ continue;
+ off += start + search[i].len;
+
+ len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
+ if (skb_copy_bits(skb, off, pbuf, len))
+ break;
+ pbuf[len] = '\0';
+
+ port = htons(simple_strtoul(pbuf, &tmp, 10));
+ len = tmp - pbuf;
+ if (port == 0 || len > 5)
+ break;
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+ nf_ct_l3num(ct),
+ &tuple->src.u3, &tuple->dst.u3,
+ IPPROTO_TCP, NULL, &port);
+
+ nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
+ if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
+ len, exp);
+ else if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_ct_expect_put(exp);
+ }
+
+out:
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy amanda_exp_policy = {
+ .max_expected = 3,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
+ {
+ .name = "amanda",
+ .me = THIS_MODULE,
+ .help = amanda_help,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(10080),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .expect_policy = &amanda_exp_policy,
+ },
+ {
+ .name = "amanda",
+ .me = THIS_MODULE,
+ .help = amanda_help,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.udp.port = __constant_htons(10080),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .expect_policy = &amanda_exp_policy,
+ },
+};
+
+static void __exit nf_conntrack_amanda_fini(void)
+{
+ int i;
+
+ nf_conntrack_helper_unregister(&amanda_helper[0]);
+ nf_conntrack_helper_unregister(&amanda_helper[1]);
+ for (i = 0; i < ARRAY_SIZE(search); i++)
+ textsearch_destroy(search[i].ts);
+}
+
+static int __init nf_conntrack_amanda_init(void)
+{
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(search); i++) {
+ search[i].ts = textsearch_prepare(ts_algo, search[i].string,
+ search[i].len,
+ GFP_KERNEL, TS_AUTOLOAD);
+ if (IS_ERR(search[i].ts)) {
+ ret = PTR_ERR(search[i].ts);
+ goto err1;
+ }
+ }
+ ret = nf_conntrack_helper_register(&amanda_helper[0]);
+ if (ret < 0)
+ goto err1;
+ ret = nf_conntrack_helper_register(&amanda_helper[1]);
+ if (ret < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_conntrack_helper_unregister(&amanda_helper[0]);
+err1:
+ while (--i >= 0)
+ textsearch_destroy(search[i].ts);
+
+ return ret;
+}
+
+module_init(nf_conntrack_amanda_init);
+module_exit(nf_conntrack_amanda_fini);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
new file mode 100644
index 0000000..233fdd2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_core.c
@@ -0,0 +1,1280 @@
+/* Connection state tracking for netfilter. This is separated from,
+ but required by, the NAT layer; it can also be used by an iptables
+ extension. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/mm.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_nat.h>
+
+#define NF_CONNTRACK_VERSION "0.5.0"
+
+unsigned int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr) __read_mostly;
+EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+
+DEFINE_SPINLOCK(nf_conntrack_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
+unsigned int nf_conntrack_htable_size __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
+
+int nf_conntrack_max __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_max);
+
+struct nf_conn nf_conntrack_untracked __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+
+static struct kmem_cache *nf_conntrack_cachep __read_mostly;
+
+static int nf_conntrack_hash_rnd_initted;
+static unsigned int nf_conntrack_hash_rnd;
+
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+ unsigned int size, unsigned int rnd)
+{
+ unsigned int n;
+ u_int32_t h;
+
+ /* The direction must be ignored, so we hash everything up to the
+ * destination ports (which is a multiple of 4) and treat the last
+ * three bytes manually.
+ */
+ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
+ h = jhash2((u32 *)tuple, n,
+ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
+ tuple->dst.protonum));
+
+ return ((u64)h * size) >> 32;
+}
+
+static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+{
+ return __hash_conntrack(tuple, nf_conntrack_htable_size,
+ nf_conntrack_hash_rnd);
+}
+
+bool
+nf_ct_get_tuple(const struct sk_buff *skb,
+ unsigned int nhoff,
+ unsigned int dataoff,
+ u_int16_t l3num,
+ u_int8_t protonum,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
+{
+ memset(tuple, 0, sizeof(*tuple));
+
+ tuple->src.l3num = l3num;
+ if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+ return false;
+
+ tuple->dst.protonum = protonum;
+ tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+ return l4proto->pkt_to_tuple(skb, dataoff, tuple);
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
+
+bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
+ u_int16_t l3num, struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ unsigned int protoff;
+ u_int8_t protonum;
+ int ret;
+
+ rcu_read_lock();
+
+ l3proto = __nf_ct_l3proto_find(l3num);
+ ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
+ if (ret != NF_ACCEPT) {
+ rcu_read_unlock();
+ return false;
+ }
+
+ l4proto = __nf_ct_l4proto_find(l3num, protonum);
+
+ ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
+ l3proto, l4proto);
+
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
+
+bool
+nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
+{
+ memset(inverse, 0, sizeof(*inverse));
+
+ inverse->src.l3num = orig->src.l3num;
+ if (l3proto->invert_tuple(inverse, orig) == 0)
+ return false;
+
+ inverse->dst.dir = !orig->dst.dir;
+
+ inverse->dst.protonum = orig->dst.protonum;
+ return l4proto->invert_tuple(inverse, orig);
+}
+EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+
+static void
+clean_from_lists(struct nf_conn *ct)
+{
+ pr_debug("clean_from_lists(%p)\n", ct);
+ hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
+ hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
+
+ /* Destroy all pending expectations */
+ nf_ct_remove_expectations(ct);
+}
+
+static void
+destroy_conntrack(struct nf_conntrack *nfct)
+{
+ struct nf_conn *ct = (struct nf_conn *)nfct;
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_l4proto *l4proto;
+
+ pr_debug("destroy_conntrack(%p)\n", ct);
+ NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+ NF_CT_ASSERT(!timer_pending(&ct->timeout));
+
+ nf_conntrack_event(IPCT_DESTROY, ct);
+ set_bit(IPS_DYING_BIT, &ct->status);
+
+ /* To make sure we don't get any weird locking issues here:
+ * destroy_conntrack() MUST NOT be called with a write lock
+ * to nf_conntrack_lock!!! -HW */
+ rcu_read_lock();
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto && l4proto->destroy)
+ l4proto->destroy(ct);
+
+ rcu_read_unlock();
+
+ spin_lock_bh(&nf_conntrack_lock);
+ /* Expectations will have been removed in clean_from_lists,
+ * except TFTP can create an expectation on the first packet,
+ * before connection is in the list, so we need to clean here,
+ * too. */
+ nf_ct_remove_expectations(ct);
+
+ /* We overload first tuple to link into unconfirmed list. */
+ if (!nf_ct_is_confirmed(ct)) {
+ BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode));
+ hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
+ }
+
+ NF_CT_STAT_INC(net, delete);
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ if (ct->master)
+ nf_ct_put(ct->master);
+
+ pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
+ nf_conntrack_free(ct);
+}
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ struct nf_conn *ct = (void *)ul_conntrack;
+ struct net *net = nf_ct_net(ct);
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+
+ if (help) {
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (helper && helper->destroy)
+ helper->destroy(ct);
+ rcu_read_unlock();
+ }
+
+ spin_lock_bh(&nf_conntrack_lock);
+ /* Inside lock so preempt is disabled on module removal path.
+ * Otherwise we can get spurious warnings. */
+ NF_CT_STAT_INC(net, delete_list);
+ clean_from_lists(ct);
+ spin_unlock_bh(&nf_conntrack_lock);
+ nf_ct_put(ct);
+}
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_node *n;
+ unsigned int hash = hash_conntrack(tuple);
+
+ /* Disable BHs the entire time since we normally need to disable them
+ * at least once for the stats anyway.
+ */
+ local_bh_disable();
+ hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
+ if (nf_ct_tuple_equal(tuple, &h->tuple)) {
+ NF_CT_STAT_INC(net, found);
+ local_bh_enable();
+ return h;
+ }
+ NF_CT_STAT_INC(net, searched);
+ }
+ local_bh_enable();
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_find);
+
+/* Find a connection corresponding to a tuple. */
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+
+ rcu_read_lock();
+ h = __nf_conntrack_find(net, tuple);
+ if (h) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ h = NULL;
+ }
+ rcu_read_unlock();
+
+ return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
+
+static void __nf_conntrack_hash_insert(struct nf_conn *ct,
+ unsigned int hash,
+ unsigned int repl_hash)
+{
+ struct net *net = nf_ct_net(ct);
+
+ hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+ &net->ct.hash[hash]);
+ hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
+ &net->ct.hash[repl_hash]);
+}
+
+void nf_conntrack_hash_insert(struct nf_conn *ct)
+{
+ unsigned int hash, repl_hash;
+
+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ __nf_conntrack_hash_insert(ct, hash, repl_hash);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
+
+/* Confirm a connection given skb; places it in hash table */
+int
+__nf_conntrack_confirm(struct sk_buff *skb)
+{
+ unsigned int hash, repl_hash;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+ struct nf_conn_help *help;
+ struct hlist_node *n;
+ enum ip_conntrack_info ctinfo;
+ struct net *net;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ net = nf_ct_net(ct);
+
+ /* ipt_REJECT uses nf_conntrack_attach to attach related
+ ICMP/TCP RST packets in other direction. Actual packet
+ which created connection will be IP_CT_NEW or for an
+ expected connection, IP_CT_RELATED. */
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ /* We're not in hash table, and we refuse to set up related
+ connections for unconfirmed conns. But packet copies and
+ REJECT will give spurious warnings here. */
+ /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
+
+ /* No external references means noone else could have
+ confirmed us. */
+ NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ pr_debug("Confirming conntrack %p\n", ct);
+
+ spin_lock_bh(&nf_conntrack_lock);
+
+ /* See if there's one in the list already, including reverse:
+ NAT could have grabbed it without realizing, since we're
+ not in the hash. If there is, we lost race. */
+ hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode)
+ if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &h->tuple))
+ goto out;
+ hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode)
+ if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &h->tuple))
+ goto out;
+
+ /* Remove from unconfirmed list */
+ hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
+
+ __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ weird delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ NF_CT_STAT_INC(net, insert);
+ spin_unlock_bh(&nf_conntrack_lock);
+ help = nfct_help(ct);
+ if (help && help->helper)
+ nf_conntrack_event_cache(IPCT_HELPER, ct);
+#ifdef CONFIG_NF_NAT_NEEDED
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_NATINFO, ct);
+#endif
+ nf_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, ct);
+ return NF_ACCEPT;
+
+out:
+ NF_CT_STAT_INC(net, insert_failed);
+ spin_unlock_bh(&nf_conntrack_lock);
+ return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
+
+/* Returns true if a connection correspondings to the tuple (required
+ for NAT). */
+int
+nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack)
+{
+ struct net *net = nf_ct_net(ignored_conntrack);
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_node *n;
+ unsigned int hash = hash_conntrack(tuple);
+
+ /* Disable BHs the entire time since we need to disable them at
+ * least once for the stats anyway.
+ */
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
+ if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
+ nf_ct_tuple_equal(tuple, &h->tuple)) {
+ NF_CT_STAT_INC(net, found);
+ rcu_read_unlock_bh();
+ return 1;
+ }
+ NF_CT_STAT_INC(net, searched);
+ }
+ rcu_read_unlock_bh();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
+
+#define NF_CT_EVICTION_RANGE 8
+
+/* There's a small race here where we may free a just-assured
+ connection. Too bad: we're in trouble anyway. */
+static noinline int early_drop(struct net *net, unsigned int hash)
+{
+ /* Use oldest entry, which is roughly LRU */
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct = NULL, *tmp;
+ struct hlist_node *n;
+ unsigned int i, cnt = 0;
+ int dropped = 0;
+
+ rcu_read_lock();
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash],
+ hnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
+ ct = tmp;
+ cnt++;
+ }
+
+ if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ ct = NULL;
+ if (ct || cnt >= NF_CT_EVICTION_RANGE)
+ break;
+ hash = (hash + 1) % nf_conntrack_htable_size;
+ }
+ rcu_read_unlock();
+
+ if (!ct)
+ return dropped;
+
+ if (del_timer(&ct->timeout)) {
+ death_by_timeout((unsigned long)ct);
+ dropped = 1;
+ NF_CT_STAT_INC_ATOMIC(net, early_drop);
+ }
+ nf_ct_put(ct);
+ return dropped;
+}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp)
+{
+ struct nf_conn *ct = NULL;
+
+ if (unlikely(!nf_conntrack_hash_rnd_initted)) {
+ get_random_bytes(&nf_conntrack_hash_rnd, 4);
+ nf_conntrack_hash_rnd_initted = 1;
+ }
+
+ /* We don't want any race condition at early drop stage */
+ atomic_inc(&net->ct.count);
+
+ if (nf_conntrack_max &&
+ unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
+ unsigned int hash = hash_conntrack(orig);
+ if (!early_drop(net, hash)) {
+ atomic_dec(&net->ct.count);
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "nf_conntrack: table full, dropping"
+ " packet.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp);
+ if (ct == NULL) {
+ pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
+ atomic_dec(&net->ct.count);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ atomic_set(&ct->ct_general.use, 1);
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
+ /* Don't set timer yet: wait for confirmation */
+ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
+#ifdef CONFIG_NET_NS
+ ct->ct_net = net;
+#endif
+ INIT_RCU_HEAD(&ct->rcu);
+
+ return ct;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
+
+static void nf_conntrack_free_rcu(struct rcu_head *head)
+{
+ struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
+ struct net *net = nf_ct_net(ct);
+
+ nf_ct_ext_free(ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
+ atomic_dec(&net->ct.count);
+}
+
+void nf_conntrack_free(struct nf_conn *ct)
+{
+ nf_ct_ext_destroy(ct);
+ call_rcu(&ct->rcu, nf_conntrack_free_rcu);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_free);
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+ failed due to stress. Otherwise it really is unclassifiable. */
+static struct nf_conntrack_tuple_hash *
+init_conntrack(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_l3proto *l3proto,
+ struct nf_conntrack_l4proto *l4proto,
+ struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ struct nf_conn *ct;
+ struct nf_conn_help *help;
+ struct nf_conntrack_tuple repl_tuple;
+ struct nf_conntrack_expect *exp;
+
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+ pr_debug("Can't invert tuple.\n");
+ return NULL;
+ }
+
+ ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
+ if (ct == NULL || IS_ERR(ct)) {
+ pr_debug("Can't allocate conntrack.\n");
+ return (struct nf_conntrack_tuple_hash *)ct;
+ }
+
+ if (!l4proto->new(ct, skb, dataoff)) {
+ nf_conntrack_free(ct);
+ pr_debug("init conntrack: can't track with proto module\n");
+ return NULL;
+ }
+
+ nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+
+ spin_lock_bh(&nf_conntrack_lock);
+ exp = nf_ct_find_expectation(net, tuple);
+ if (exp) {
+ pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+ ct, exp);
+ /* Welcome, Mr. Bond. We've been expecting you... */
+ __set_bit(IPS_EXPECTED_BIT, &ct->status);
+ ct->master = exp->master;
+ if (exp->helper) {
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help)
+ rcu_assign_pointer(help->helper, exp->helper);
+ }
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ ct->mark = exp->master->mark;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ ct->secmark = exp->master->secmark;
+#endif
+ nf_conntrack_get(&ct->master->ct_general);
+ NF_CT_STAT_INC(net, expect_new);
+ } else {
+ struct nf_conntrack_helper *helper;
+
+ helper = __nf_ct_helper_find(&repl_tuple);
+ if (helper) {
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help)
+ rcu_assign_pointer(help->helper, helper);
+ }
+ NF_CT_STAT_INC(net, new);
+ }
+
+ /* Overload tuple linked list to put us in unconfirmed list. */
+ hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+ &net->ct.unconfirmed);
+
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ if (exp) {
+ if (exp->expectfn)
+ exp->expectfn(ct, exp);
+ nf_ct_expect_put(exp);
+ }
+
+ return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
+}
+
+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+static inline struct nf_conn *
+resolve_normal_ct(struct net *net,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ u_int16_t l3num,
+ u_int8_t protonum,
+ struct nf_conntrack_l3proto *l3proto,
+ struct nf_conntrack_l4proto *l4proto,
+ int *set_reply,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+
+ if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
+ dataoff, l3num, protonum, &tuple, l3proto,
+ l4proto)) {
+ pr_debug("resolve_normal_ct: Can't get tuple\n");
+ return NULL;
+ }
+
+ /* look for tuple match */
+ h = nf_conntrack_find_get(net, &tuple);
+ if (!h) {
+ h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
+ if (!h)
+ return NULL;
+ if (IS_ERR(h))
+ return (void *)h;
+ }
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ /* It exists; we have (non-exclusive) reference. */
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
+ *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+ /* Please set reply bit if this packet OK */
+ *set_reply = 1;
+ } else {
+ /* Once we've had two way comms, always ESTABLISHED. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
+ *ctinfo = IP_CT_ESTABLISHED;
+ } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
+ pr_debug("nf_conntrack_in: related packet for %p\n",
+ ct);
+ *ctinfo = IP_CT_RELATED;
+ } else {
+ pr_debug("nf_conntrack_in: new packet for %p\n", ct);
+ *ctinfo = IP_CT_NEW;
+ }
+ *set_reply = 0;
+ }
+ skb->nfct = &ct->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return ct;
+}
+
+unsigned int
+nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ unsigned int dataoff;
+ u_int8_t protonum;
+ int set_reply = 0;
+ int ret;
+
+ /* Previously seen (loopback or untracked)? Ignore. */
+ if (skb->nfct) {
+ NF_CT_STAT_INC_ATOMIC(net, ignore);
+ return NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ l3proto = __nf_ct_l3proto_find(pf);
+ ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
+ &dataoff, &protonum);
+ if (ret <= 0) {
+ pr_debug("not prepared to track yet or error occured\n");
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ return -ret;
+ }
+
+ l4proto = __nf_ct_l4proto_find(pf, protonum);
+
+ /* It may be an special packet, error, unclean...
+ * inverse of the return code tells to the netfilter
+ * core what to do with the packet. */
+ if (l4proto->error != NULL) {
+ ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+ if (ret <= 0) {
+ NF_CT_STAT_INC_ATOMIC(net, error);
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ return -ret;
+ }
+ }
+
+ ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+ l3proto, l4proto, &set_reply, &ctinfo);
+ if (!ct) {
+ /* Not valid part of a connection */
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ return NF_ACCEPT;
+ }
+
+ if (IS_ERR(ct)) {
+ /* Too stressed to deal. */
+ NF_CT_STAT_INC_ATOMIC(net, drop);
+ return NF_DROP;
+ }
+
+ NF_CT_ASSERT(skb->nfct);
+
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
+ if (ret < 0) {
+ /* Invalid: inverse of the return code tells
+ * the netfilter core what to do */
+ pr_debug("nf_conntrack_in: Can't track with proto module\n");
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = NULL;
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ return -ret;
+ }
+
+ if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_in);
+
+bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
+ const struct nf_conntrack_tuple *orig)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = nf_ct_invert_tuple(inverse, orig,
+ __nf_ct_l3proto_find(orig->src.l3num),
+ __nf_ct_l4proto_find(orig->src.l3num,
+ orig->dst.protonum));
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
+
+/* Alter reply tuple (maybe alter helper). This is for NAT, and is
+ implicitly racy: see __nf_conntrack_confirm */
+void nf_conntrack_alter_reply(struct nf_conn *ct,
+ const struct nf_conntrack_tuple *newreply)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+
+ /* Should be unconfirmed, so not in hash table yet */
+ NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+
+ pr_debug("Altering reply tuple of %p to ", ct);
+ nf_ct_dump_tuple(newreply);
+
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+ if (ct->master || (help && !hlist_empty(&help->expectations)))
+ return;
+
+ rcu_read_lock();
+ helper = __nf_ct_helper_find(newreply);
+ if (helper == NULL) {
+ if (help)
+ rcu_assign_pointer(help->helper, NULL);
+ goto out;
+ }
+
+ if (help == NULL) {
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help == NULL)
+ goto out;
+ } else {
+ memset(&help->help, 0, sizeof(help->help));
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
+
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __nf_ct_refresh_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb,
+ unsigned long extra_jiffies,
+ int do_acct)
+{
+ int event = 0;
+
+ NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
+ NF_CT_ASSERT(skb);
+
+ spin_lock_bh(&nf_conntrack_lock);
+
+ /* Only update if this is not a fixed timeout */
+ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+ goto acct;
+
+ /* If not in hash table, timer will not be active yet */
+ if (!nf_ct_is_confirmed(ct)) {
+ ct->timeout.expires = extra_jiffies;
+ event = IPCT_REFRESH;
+ } else {
+ unsigned long newtime = jiffies + extra_jiffies;
+
+ /* Only update the timeout if the new timeout is at least
+ HZ jiffies from the old timeout. Need del_timer for race
+ avoidance (may already be dying). */
+ if (newtime - ct->timeout.expires >= HZ
+ && del_timer(&ct->timeout)) {
+ ct->timeout.expires = newtime;
+ add_timer(&ct->timeout);
+ event = IPCT_REFRESH;
+ }
+ }
+
+acct:
+ if (do_acct) {
+ struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ acct[CTINFO2DIR(ctinfo)].packets++;
+ acct[CTINFO2DIR(ctinfo)].bytes +=
+ skb->len - skb_network_offset(skb);
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ /* must be unlocked when calling event cache */
+ if (event)
+ nf_conntrack_event_cache(event, ct);
+}
+EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
+
+bool __nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb,
+ int do_acct)
+{
+ if (do_acct) {
+ struct nf_conn_counter *acct;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ acct[CTINFO2DIR(ctinfo)].packets++;
+ acct[CTINFO2DIR(ctinfo)].bytes +=
+ skb->len - skb_network_offset(skb);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ }
+
+ if (del_timer(&ct->timeout)) {
+ ct->timeout.function((unsigned long)ct);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/mutex.h>
+
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
+ NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
+
+const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
+ [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
+};
+EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
+
+int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
+ return -EINVAL;
+
+ t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
+ t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+#endif
+
+/* Used by ipt_REJECT and ip6t_REJECT. */
+static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ /* This ICMP is in reverse direction to the packet which caused it */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+ else
+ ctinfo = IP_CT_RELATED;
+
+ /* Attach to new skbuff, and increment count */
+ nskb->nfct = &ct->ct_general;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nskb->nfct);
+}
+
+/* Bring out ya dead! */
+static struct nf_conn *
+get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
+ void *data, unsigned int *bucket)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+ struct hlist_node *n;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ set_bit(IPS_DYING_BIT, &ct->status);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ return NULL;
+found:
+ atomic_inc(&ct->ct_general.use);
+ spin_unlock_bh(&nf_conntrack_lock);
+ return ct;
+}
+
+void nf_ct_iterate_cleanup(struct net *net,
+ int (*iter)(struct nf_conn *i, void *data),
+ void *data)
+{
+ struct nf_conn *ct;
+ unsigned int bucket = 0;
+
+ while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
+ if (del_timer(&ct->timeout))
+ death_by_timeout((unsigned long)ct);
+ /* ... else the timer will get him soon. */
+
+ nf_ct_put(ct);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
+
+static int kill_all(struct nf_conn *i, void *data)
+{
+ return 1;
+}
+
+void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size)
+{
+ if (vmalloced)
+ vfree(hash);
+ else
+ free_pages((unsigned long)hash,
+ get_order(sizeof(struct hlist_head) * size));
+}
+EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
+
+void nf_conntrack_flush(struct net *net)
+{
+ nf_ct_iterate_cleanup(net, kill_all, NULL);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_flush);
+
+static void nf_conntrack_cleanup_init_net(void)
+{
+ nf_conntrack_helper_fini();
+ nf_conntrack_proto_fini();
+ kmem_cache_destroy(nf_conntrack_cachep);
+}
+
+static void nf_conntrack_cleanup_net(struct net *net)
+{
+ nf_ct_event_cache_flush(net);
+ nf_conntrack_ecache_fini(net);
+ i_see_dead_people:
+ nf_conntrack_flush(net);
+ if (atomic_read(&net->ct.count) != 0) {
+ schedule();
+ goto i_see_dead_people;
+ }
+ /* wait until all references to nf_conntrack_untracked are dropped */
+ while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+ schedule();
+
+ nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
+ nf_conntrack_htable_size);
+ nf_conntrack_acct_fini(net);
+ nf_conntrack_expect_fini(net);
+ free_percpu(net->ct.stat);
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+void nf_conntrack_cleanup(struct net *net)
+{
+ if (net_eq(net, &init_net))
+ rcu_assign_pointer(ip_ct_attach, NULL);
+
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+ delete... */
+ synchronize_net();
+
+ nf_conntrack_cleanup_net(net);
+
+ if (net_eq(net, &init_net)) {
+ rcu_assign_pointer(nf_ct_destroy, NULL);
+ nf_conntrack_cleanup_init_net();
+ }
+}
+
+struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
+{
+ struct hlist_head *hash;
+ unsigned int size, i;
+
+ *vmalloced = 0;
+
+ size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head));
+ hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
+ get_order(sizeof(struct hlist_head)
+ * size));
+ if (!hash) {
+ *vmalloced = 1;
+ printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
+ hash = vmalloc(sizeof(struct hlist_head) * size);
+ }
+
+ if (hash)
+ for (i = 0; i < size; i++)
+ INIT_HLIST_HEAD(&hash[i]);
+
+ return hash;
+}
+EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+ int i, bucket, vmalloced, old_vmalloced;
+ unsigned int hashsize, old_size;
+ int rnd;
+ struct hlist_head *hash, *old_hash;
+ struct nf_conntrack_tuple_hash *h;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!nf_conntrack_htable_size)
+ return param_set_uint(val, kp);
+
+ hashsize = simple_strtoul(val, NULL, 0);
+ if (!hashsize)
+ return -EINVAL;
+
+ hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced);
+ if (!hash)
+ return -ENOMEM;
+
+ /* We have to rehahs for the new table anyway, so we also can
+ * use a newrandom seed */
+ get_random_bytes(&rnd, 4);
+
+ /* Lookups in the old hash might happen in parallel, which means we
+ * might get false negatives during connection lookup. New connections
+ * created because of a false negative won't make it into the hash
+ * though since that required taking the lock.
+ */
+ spin_lock_bh(&nf_conntrack_lock);
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ while (!hlist_empty(&init_net.ct.hash[i])) {
+ h = hlist_entry(init_net.ct.hash[i].first,
+ struct nf_conntrack_tuple_hash, hnode);
+ hlist_del_rcu(&h->hnode);
+ bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+ hlist_add_head(&h->hnode, &hash[bucket]);
+ }
+ }
+ old_size = nf_conntrack_htable_size;
+ old_vmalloced = init_net.ct.hash_vmalloc;
+ old_hash = init_net.ct.hash;
+
+ nf_conntrack_htable_size = hashsize;
+ init_net.ct.hash_vmalloc = vmalloced;
+ init_net.ct.hash = hash;
+ nf_conntrack_hash_rnd = rnd;
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
+
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+ &nf_conntrack_htable_size, 0600);
+
+static int nf_conntrack_init_init_net(void)
+{
+ int max_factor = 8;
+ int ret;
+
+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
+ * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
+ if (!nf_conntrack_htable_size) {
+ nf_conntrack_htable_size
+ = (((num_physpages << PAGE_SHIFT) / 16384)
+ / sizeof(struct hlist_head));
+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ nf_conntrack_htable_size = 16384;
+ if (nf_conntrack_htable_size < 32)
+ nf_conntrack_htable_size = 32;
+
+ /* Use a max. factor of four by default to get the same max as
+ * with the old struct list_heads. When a table size is given
+ * we use the old value of 8 to avoid reducing the max.
+ * entries. */
+ max_factor = 4;
+ }
+ nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+
+ printk("nf_conntrack version %s (%u buckets, %d max)\n",
+ NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
+ nf_conntrack_max);
+
+ nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+ sizeof(struct nf_conn),
+ 0, 0, NULL);
+ if (!nf_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+ ret = -ENOMEM;
+ goto err_cache;
+ }
+
+ ret = nf_conntrack_proto_init();
+ if (ret < 0)
+ goto err_proto;
+
+ ret = nf_conntrack_helper_init();
+ if (ret < 0)
+ goto err_helper;
+
+ return 0;
+
+err_helper:
+ nf_conntrack_proto_fini();
+err_proto:
+ kmem_cache_destroy(nf_conntrack_cachep);
+err_cache:
+ return ret;
+}
+
+static int nf_conntrack_init_net(struct net *net)
+{
+ int ret;
+
+ atomic_set(&net->ct.count, 0);
+ INIT_HLIST_HEAD(&net->ct.unconfirmed);
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat) {
+ ret = -ENOMEM;
+ goto err_stat;
+ }
+ ret = nf_conntrack_ecache_init(net);
+ if (ret < 0)
+ goto err_ecache;
+ net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+ &net->ct.hash_vmalloc);
+ if (!net->ct.hash) {
+ ret = -ENOMEM;
+ printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
+ goto err_hash;
+ }
+ ret = nf_conntrack_expect_init(net);
+ if (ret < 0)
+ goto err_expect;
+ ret = nf_conntrack_acct_init(net);
+ if (ret < 0)
+ goto err_acct;
+
+ /* Set up fake conntrack:
+ - to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+ nf_conntrack_untracked.ct_net = &init_net;
+#endif
+ atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+ /* - and look it like as a confirmed connection */
+ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+
+ return 0;
+
+err_acct:
+ nf_conntrack_expect_fini(net);
+err_expect:
+ nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
+ nf_conntrack_htable_size);
+err_hash:
+ nf_conntrack_ecache_fini(net);
+err_ecache:
+ free_percpu(net->ct.stat);
+err_stat:
+ return ret;
+}
+
+int nf_conntrack_init(struct net *net)
+{
+ int ret;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_conntrack_init_init_net();
+ if (ret < 0)
+ goto out_init_net;
+ }
+ ret = nf_conntrack_init_net(net);
+ if (ret < 0)
+ goto out_net;
+
+ if (net_eq(net, &init_net)) {
+ /* For use by REJECT target */
+ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
+ rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+ }
+ return 0;
+
+out_net:
+ if (net_eq(net, &init_net))
+ nf_conntrack_cleanup_init_net();
+out_init_net:
+ return ret;
+}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
new file mode 100644
index 0000000..a5f5e2e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -0,0 +1,128 @@
+/* Event cache for netfilter. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+
+ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
+EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+{
+ if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
+ && ecache->events)
+ atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
+ ecache->ct);
+
+ ecache->events = 0;
+ nf_ct_put(ecache->ct);
+ ecache->ct = NULL;
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling for freeing the skb */
+void nf_ct_deliver_cached_events(const struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *ecache;
+
+ local_bh_disable();
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+ if (ecache->ct == ct)
+ __nf_ct_deliver_cached_events(ecache);
+ local_bh_enable();
+}
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void __nf_ct_event_cache_init(struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *ecache;
+
+ /* take care of delivering potentially old events */
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+ BUG_ON(ecache->ct == ct);
+ if (ecache->ct)
+ __nf_ct_deliver_cached_events(ecache);
+ /* initialize for this conntrack/packet */
+ ecache->ct = ct;
+ nf_conntrack_get(&ct->ct_general);
+}
+EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
+
+/* flush the event cache - touches other CPU's data and must not be called
+ * while packets are still passing through the code */
+void nf_ct_event_cache_flush(struct net *net)
+{
+ struct nf_conntrack_ecache *ecache;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ecache = per_cpu_ptr(net->ct.ecache, cpu);
+ if (ecache->ct)
+ nf_ct_put(ecache->ct);
+ }
+}
+
+int nf_conntrack_ecache_init(struct net *net)
+{
+ net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
+ if (!net->ct.ecache)
+ return -ENOMEM;
+ return 0;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+ free_percpu(net->ct.ecache);
+}
+
+int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+int nf_ct_expect_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
+
+int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
new file mode 100644
index 0000000..37a703b
--- /dev/null
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -0,0 +1,612 @@
+/* Expectation handling for nf_conntrack. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <net/net_namespace.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+unsigned int nf_ct_expect_hsize __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
+
+static unsigned int nf_ct_expect_hash_rnd __read_mostly;
+unsigned int nf_ct_expect_max __read_mostly;
+static int nf_ct_expect_hash_rnd_initted __read_mostly;
+
+static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+
+/* nf_conntrack_expect helper functions */
+void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+{
+ struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
+
+ NF_CT_ASSERT(master_help);
+ NF_CT_ASSERT(!timer_pending(&exp->timeout));
+
+ hlist_del_rcu(&exp->hnode);
+ net->ct.expect_count--;
+
+ hlist_del(&exp->lnode);
+ master_help->expecting[exp->class]--;
+ nf_ct_expect_put(exp);
+
+ NF_CT_STAT_INC(net, expect_delete);
+}
+EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
+
+static void nf_ct_expectation_timed_out(unsigned long ul_expect)
+{
+ struct nf_conntrack_expect *exp = (void *)ul_expect;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ nf_ct_unlink_expect(exp);
+ spin_unlock_bh(&nf_conntrack_lock);
+ nf_ct_expect_put(exp);
+}
+
+static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
+{
+ unsigned int hash;
+
+ if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
+ get_random_bytes(&nf_ct_expect_hash_rnd, 4);
+ nf_ct_expect_hash_rnd_initted = 1;
+ }
+
+ hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+ (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
+ (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
+ return ((u64)hash * nf_ct_expect_hsize) >> 32;
+}
+
+struct nf_conntrack_expect *
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *i;
+ struct hlist_node *n;
+ unsigned int h;
+
+ if (!net->ct.expect_count)
+ return NULL;
+
+ h = nf_ct_expect_dst_hash(tuple);
+ hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
+ if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+ return i;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *i;
+
+ rcu_read_lock();
+ i = __nf_ct_expect_find(net, tuple);
+ if (i && !atomic_inc_not_zero(&i->use))
+ i = NULL;
+ rcu_read_unlock();
+
+ return i;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+struct nf_conntrack_expect *
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *i, *exp = NULL;
+ struct hlist_node *n;
+ unsigned int h;
+
+ if (!net->ct.expect_count)
+ return NULL;
+
+ h = nf_ct_expect_dst_hash(tuple);
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+ if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
+ nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+ exp = i;
+ break;
+ }
+ }
+ if (!exp)
+ return NULL;
+
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (!nf_ct_is_confirmed(exp->master))
+ return NULL;
+
+ if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ atomic_inc(&exp->use);
+ return exp;
+ } else if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ return exp;
+ }
+
+ return NULL;
+}
+
+/* delete all expectations for this conntrack */
+void nf_ct_remove_expectations(struct nf_conn *ct)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_expect *exp;
+ struct hlist_node *n, *next;
+
+ /* Optimization: most connection never expect any others. */
+ if (!help)
+ return;
+
+ hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b)
+{
+ /* Part covered by intersection of masks must be unequal,
+ otherwise they clash */
+ struct nf_conntrack_tuple_mask intersect_mask;
+ int count;
+
+ intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
+
+ for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+ intersect_mask.src.u3.all[count] =
+ a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
+ }
+
+ return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+}
+
+static inline int expect_matches(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b)
+{
+ return a->master == b->master && a->class == b->class
+ && nf_ct_tuple_equal(&a->tuple, &b->tuple)
+ && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
+{
+ spin_lock_bh(&nf_conntrack_lock);
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
+
+/* We don't increase the master conntrack refcount for non-fulfilled
+ * conntracks. During the conntrack destruction, the expectations are
+ * always killed before the conntrack itself */
+struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
+{
+ struct nf_conntrack_expect *new;
+
+ new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
+ if (!new)
+ return NULL;
+
+ new->master = me;
+ atomic_set(&new->use, 1);
+ INIT_RCU_HEAD(&new->rcu);
+ return new;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
+
+void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
+ u_int8_t family,
+ const union nf_inet_addr *saddr,
+ const union nf_inet_addr *daddr,
+ u_int8_t proto, const __be16 *src, const __be16 *dst)
+{
+ int len;
+
+ if (family == AF_INET)
+ len = 4;
+ else
+ len = 16;
+
+ exp->flags = 0;
+ exp->class = class;
+ exp->expectfn = NULL;
+ exp->helper = NULL;
+ exp->tuple.src.l3num = family;
+ exp->tuple.dst.protonum = proto;
+
+ if (saddr) {
+ memcpy(&exp->tuple.src.u3, saddr, len);
+ if (sizeof(exp->tuple.src.u3) > len)
+ /* address needs to be cleared for nf_ct_tuple_equal */
+ memset((void *)&exp->tuple.src.u3 + len, 0x00,
+ sizeof(exp->tuple.src.u3) - len);
+ memset(&exp->mask.src.u3, 0xFF, len);
+ if (sizeof(exp->mask.src.u3) > len)
+ memset((void *)&exp->mask.src.u3 + len, 0x00,
+ sizeof(exp->mask.src.u3) - len);
+ } else {
+ memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
+ memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
+ }
+
+ if (src) {
+ exp->tuple.src.u.all = *src;
+ exp->mask.src.u.all = htons(0xFFFF);
+ } else {
+ exp->tuple.src.u.all = 0;
+ exp->mask.src.u.all = 0;
+ }
+
+ memcpy(&exp->tuple.dst.u3, daddr, len);
+ if (sizeof(exp->tuple.dst.u3) > len)
+ /* address needs to be cleared for nf_ct_tuple_equal */
+ memset((void *)&exp->tuple.dst.u3 + len, 0x00,
+ sizeof(exp->tuple.dst.u3) - len);
+
+ exp->tuple.dst.u.all = *dst;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_init);
+
+static void nf_ct_expect_free_rcu(struct rcu_head *head)
+{
+ struct nf_conntrack_expect *exp;
+
+ exp = container_of(head, struct nf_conntrack_expect, rcu);
+ kmem_cache_free(nf_ct_expect_cachep, exp);
+}
+
+void nf_ct_expect_put(struct nf_conntrack_expect *exp)
+{
+ if (atomic_dec_and_test(&exp->use))
+ call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_put);
+
+static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+{
+ struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
+ const struct nf_conntrack_expect_policy *p;
+ unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
+
+ atomic_inc(&exp->use);
+
+ hlist_add_head(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
+
+ hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+ net->ct.expect_count++;
+
+ setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
+ (unsigned long)exp);
+ p = &master_help->helper->expect_policy[exp->class];
+ exp->timeout.expires = jiffies + p->timeout * HZ;
+ add_timer(&exp->timeout);
+
+ atomic_inc(&exp->use);
+ NF_CT_STAT_INC(net, expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct nf_conn *master,
+ struct nf_conntrack_expect *new)
+{
+ struct nf_conn_help *master_help = nfct_help(master);
+ struct nf_conntrack_expect *exp, *last = NULL;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
+ if (exp->class == new->class)
+ last = exp;
+ }
+
+ if (last && del_timer(&last->timeout)) {
+ nf_ct_unlink_expect(last);
+ nf_ct_expect_put(last);
+ }
+}
+
+static inline int refresh_timer(struct nf_conntrack_expect *i)
+{
+ struct nf_conn_help *master_help = nfct_help(i->master);
+ const struct nf_conntrack_expect_policy *p;
+
+ if (!del_timer(&i->timeout))
+ return 0;
+
+ p = &master_help->helper->expect_policy[i->class];
+ i->timeout.expires = jiffies + p->timeout * HZ;
+ add_timer(&i->timeout);
+ return 1;
+}
+
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+{
+ const struct nf_conntrack_expect_policy *p;
+ struct nf_conntrack_expect *i;
+ struct nf_conn *master = expect->master;
+ struct nf_conn_help *master_help = nfct_help(master);
+ struct net *net = nf_ct_exp_net(expect);
+ struct hlist_node *n;
+ unsigned int h;
+ int ret;
+
+ NF_CT_ASSERT(master_help);
+
+ spin_lock_bh(&nf_conntrack_lock);
+ if (!master_help->helper) {
+ ret = -ESHUTDOWN;
+ goto out;
+ }
+ h = nf_ct_expect_dst_hash(&expect->tuple);
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+ if (expect_matches(i, expect)) {
+ /* Refresh timer: if it's dying, ignore.. */
+ if (refresh_timer(i)) {
+ ret = 0;
+ goto out;
+ }
+ } else if (expect_clash(i, expect)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+ /* Will be over limit? */
+ p = &master_help->helper->expect_policy[expect->class];
+ if (p->max_expected &&
+ master_help->expecting[expect->class] >= p->max_expected) {
+ evict_oldest_expect(master, expect);
+ if (master_help->expecting[expect->class] >= p->max_expected) {
+ ret = -EMFILE;
+ goto out;
+ }
+ }
+
+ if (net->ct.expect_count >= nf_ct_expect_max) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "nf_conntrack: expectation table full\n");
+ ret = -EMFILE;
+ goto out;
+ }
+
+ nf_ct_expect_insert(expect);
+ nf_ct_expect_event(IPEXP_NEW, expect);
+ ret = 0;
+out:
+ spin_unlock_bh(&nf_conntrack_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_related);
+
+#ifdef CONFIG_PROC_FS
+struct ct_expect_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+ n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+
+ head = rcu_dereference(head->next);
+ while (head == NULL) {
+ if (++st->bucket >= nf_ct_expect_hsize)
+ return NULL;
+ head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ }
+ return head;
+}
+
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_expect_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_expect_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_expect_get_idx(seq, *pos);
+}
+
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_expect_get_next(seq, v);
+}
+
+static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *expect;
+ struct hlist_node *n = v;
+ char *delim = "";
+
+ expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
+
+ if (expect->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+ ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+ seq_printf(s, "l3proto = %u proto=%u ",
+ expect->tuple.src.l3num,
+ expect->tuple.dst.protonum);
+ print_tuple(s, &expect->tuple,
+ __nf_ct_l3proto_find(expect->tuple.src.l3num),
+ __nf_ct_l4proto_find(expect->tuple.src.l3num,
+ expect->tuple.dst.protonum));
+
+ if (expect->flags & NF_CT_EXPECT_PERMANENT) {
+ seq_printf(s, "PERMANENT");
+ delim = ",";
+ }
+ if (expect->flags & NF_CT_EXPECT_INACTIVE)
+ seq_printf(s, "%sINACTIVE", delim);
+
+ return seq_putc(s, '\n');
+}
+
+static const struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
+}
+
+static const struct file_operations exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+#endif /* CONFIG_PROC_FS */
+
+static int exp_proc_init(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc;
+
+ proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
+ if (!proc)
+ return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+ return 0;
+}
+
+static void exp_proc_remove(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "nf_conntrack_expect");
+#endif /* CONFIG_PROC_FS */
+}
+
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+
+int nf_conntrack_expect_init(struct net *net)
+{
+ int err = -ENOMEM;
+
+ if (net_eq(net, &init_net)) {
+ if (!nf_ct_expect_hsize) {
+ nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+ if (!nf_ct_expect_hsize)
+ nf_ct_expect_hsize = 1;
+ }
+ nf_ct_expect_max = nf_ct_expect_hsize * 4;
+ }
+
+ net->ct.expect_count = 0;
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+ &net->ct.expect_vmalloc);
+ if (net->ct.expect_hash == NULL)
+ goto err1;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+ sizeof(struct nf_conntrack_expect),
+ 0, 0, NULL);
+ if (!nf_ct_expect_cachep)
+ goto err2;
+ }
+
+ err = exp_proc_init(net);
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
+err2:
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
+ nf_ct_expect_hsize);
+err1:
+ return err;
+}
+
+void nf_conntrack_expect_fini(struct net *net)
+{
+ exp_proc_remove(net);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
+ nf_ct_expect_hsize);
+}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
new file mode 100644
index 0000000..4b2c769
--- /dev/null
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -0,0 +1,191 @@
+/* Structure dynamic extension infrastructure
+ * Copyright (C) 2004 Rusty Russell IBM Corporation
+ * Copyright (C) 2007 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2007 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM];
+static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+
+void __nf_ct_ext_destroy(struct nf_conn *ct)
+{
+ unsigned int i;
+ struct nf_ct_ext_type *t;
+
+ for (i = 0; i < NF_CT_EXT_NUM; i++) {
+ if (!nf_ct_ext_exist(ct, i))
+ continue;
+
+ rcu_read_lock();
+ t = rcu_dereference(nf_ct_ext_types[i]);
+
+ /* Here the nf_ct_ext_type might have been unregisterd.
+ * I.e., it has responsible to cleanup private
+ * area in all conntracks when it is unregisterd.
+ */
+ if (t && t->destroy)
+ t->destroy(ct);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(__nf_ct_ext_destroy);
+
+static void *
+nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
+{
+ unsigned int off, len;
+ struct nf_ct_ext_type *t;
+
+ rcu_read_lock();
+ t = rcu_dereference(nf_ct_ext_types[id]);
+ BUG_ON(t == NULL);
+ off = ALIGN(sizeof(struct nf_ct_ext), t->align);
+ len = off + t->len;
+ rcu_read_unlock();
+
+ *ext = kzalloc(t->alloc_size, gfp);
+ if (!*ext)
+ return NULL;
+
+ INIT_RCU_HEAD(&(*ext)->rcu);
+ (*ext)->offset[id] = off;
+ (*ext)->len = len;
+
+ return (void *)(*ext) + off;
+}
+
+static void __nf_ct_ext_free_rcu(struct rcu_head *head)
+{
+ struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
+ kfree(ext);
+}
+
+void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
+{
+ struct nf_ct_ext *new;
+ int i, newlen, newoff;
+ struct nf_ct_ext_type *t;
+
+ /* Conntrack must not be confirmed to avoid races on reallocation. */
+ NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+
+ if (!ct->ext)
+ return nf_ct_ext_create(&ct->ext, id, gfp);
+
+ if (nf_ct_ext_exist(ct, id))
+ return NULL;
+
+ rcu_read_lock();
+ t = rcu_dereference(nf_ct_ext_types[id]);
+ BUG_ON(t == NULL);
+
+ newoff = ALIGN(ct->ext->len, t->align);
+ newlen = newoff + t->len;
+ rcu_read_unlock();
+
+ new = __krealloc(ct->ext, newlen, gfp);
+ if (!new)
+ return NULL;
+
+ if (new != ct->ext) {
+ for (i = 0; i < NF_CT_EXT_NUM; i++) {
+ if (!nf_ct_ext_exist(ct, i))
+ continue;
+
+ rcu_read_lock();
+ t = rcu_dereference(nf_ct_ext_types[i]);
+ if (t && t->move)
+ t->move((void *)new + new->offset[i],
+ (void *)ct->ext + ct->ext->offset[i]);
+ rcu_read_unlock();
+ }
+ call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
+ ct->ext = new;
+ }
+
+ new->offset[id] = newoff;
+ new->len = newlen;
+ memset((void *)new + newoff, 0, newlen - newoff);
+ return (void *)new + newoff;
+}
+EXPORT_SYMBOL(__nf_ct_ext_add);
+
+static void update_alloc_size(struct nf_ct_ext_type *type)
+{
+ int i, j;
+ struct nf_ct_ext_type *t1, *t2;
+ enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1;
+
+ /* unnecessary to update all types */
+ if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) {
+ min = type->id;
+ max = type->id;
+ }
+
+ /* This assumes that extended areas in conntrack for the types
+ whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
+ for (i = min; i <= max; i++) {
+ t1 = nf_ct_ext_types[i];
+ if (!t1)
+ continue;
+
+ t1->alloc_size = sizeof(struct nf_ct_ext)
+ + ALIGN(sizeof(struct nf_ct_ext), t1->align)
+ + t1->len;
+ for (j = 0; j < NF_CT_EXT_NUM; j++) {
+ t2 = nf_ct_ext_types[j];
+ if (t2 == NULL || t2 == t1 ||
+ (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
+ continue;
+
+ t1->alloc_size = ALIGN(t1->alloc_size, t2->align)
+ + t2->len;
+ }
+ }
+}
+
+/* This MUST be called in process context. */
+int nf_ct_extend_register(struct nf_ct_ext_type *type)
+{
+ int ret = 0;
+
+ mutex_lock(&nf_ct_ext_type_mutex);
+ if (nf_ct_ext_types[type->id]) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* This ensures that nf_ct_ext_create() can allocate enough area
+ before updating alloc_size */
+ type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
+ + type->len;
+ rcu_assign_pointer(nf_ct_ext_types[type->id], type);
+ update_alloc_size(type);
+out:
+ mutex_unlock(&nf_ct_ext_type_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_extend_register);
+
+/* This MUST be called in process context. */
+void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
+{
+ mutex_lock(&nf_ct_ext_type_mutex);
+ rcu_assign_pointer(nf_ct_ext_types[type->id], NULL);
+ update_alloc_size(type);
+ mutex_unlock(&nf_ct_ext_type_mutex);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
new file mode 100644
index 0000000..4f71071
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -0,0 +1,594 @@
+/* FTP extension for connection tracking. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp connection tracking helper");
+MODULE_ALIAS("ip_conntrack_ftp");
+
+/* This is slow, but it's simple. --RR */
+static char *ftp_buffer;
+
+static DEFINE_SPINLOCK(nf_ftp_lock);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+static int loose;
+module_param(loose, bool, 0600);
+
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ enum nf_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
+
+static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
+ char);
+
+static struct ftp_search {
+ const char *pattern;
+ size_t plen;
+ char skip;
+ char term;
+ enum nf_ct_ftp_type ftptype;
+ int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
+} search[IP_CT_DIR_MAX][2] = {
+ [IP_CT_DIR_ORIGINAL] = {
+ {
+ .pattern = "PORT",
+ .plen = sizeof("PORT") - 1,
+ .skip = ' ',
+ .term = '\r',
+ .ftptype = NF_CT_FTP_PORT,
+ .getnum = try_rfc959,
+ },
+ {
+ .pattern = "EPRT",
+ .plen = sizeof("EPRT") - 1,
+ .skip = ' ',
+ .term = '\r',
+ .ftptype = NF_CT_FTP_EPRT,
+ .getnum = try_eprt,
+ },
+ },
+ [IP_CT_DIR_REPLY] = {
+ {
+ .pattern = "227 ",
+ .plen = sizeof("227 ") - 1,
+ .skip = '(',
+ .term = ')',
+ .ftptype = NF_CT_FTP_PASV,
+ .getnum = try_rfc959,
+ },
+ {
+ .pattern = "229 ",
+ .plen = sizeof("229 ") - 1,
+ .skip = '(',
+ .term = ')',
+ .ftptype = NF_CT_FTP_EPSV,
+ .getnum = try_epsv_response,
+ },
+ },
+};
+
+static int
+get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
+{
+ const char *end;
+ int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
+ if (ret > 0)
+ return (int)(end - src);
+ return 0;
+}
+
+static int try_number(const char *data, size_t dlen, u_int32_t array[],
+ int array_size, char sep, char term)
+{
+ u_int32_t i, len;
+
+ memset(array, 0, sizeof(array[0])*array_size);
+
+ /* Keep data pointing at next char. */
+ for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
+ if (*data >= '0' && *data <= '9') {
+ array[i] = array[i]*10 + *data - '0';
+ }
+ else if (*data == sep)
+ i++;
+ else {
+ /* Unexpected character; true if it's the
+ terminator and we're finished. */
+ if (*data == term && i == array_size - 1)
+ return len;
+
+ pr_debug("Char %u (got %u nums) `%u' unexpected\n",
+ len, i, *data);
+ return 0;
+ }
+ }
+ pr_debug("Failed to fill %u numbers separated by %c\n",
+ array_size, sep);
+ return 0;
+}
+
+/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+static int try_rfc959(const char *data, size_t dlen,
+ struct nf_conntrack_man *cmd, char term)
+{
+ int length;
+ u_int32_t array[6];
+
+ length = try_number(data, dlen, array, 6, ',', term);
+ if (length == 0)
+ return 0;
+
+ cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
+ (array[2] << 8) | array[3]);
+ cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
+ return length;
+}
+
+/* Grab port: number up to delimiter */
+static int get_port(const char *data, int start, size_t dlen, char delim,
+ __be16 *port)
+{
+ u_int16_t tmp_port = 0;
+ int i;
+
+ for (i = start; i < dlen; i++) {
+ /* Finished? */
+ if (data[i] == delim) {
+ if (tmp_port == 0)
+ break;
+ *port = htons(tmp_port);
+ pr_debug("get_port: return %d\n", tmp_port);
+ return i + 1;
+ }
+ else if (data[i] >= '0' && data[i] <= '9')
+ tmp_port = tmp_port*10 + data[i] - '0';
+ else { /* Some other crap */
+ pr_debug("get_port: invalid char.\n");
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
+static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
+ char term)
+{
+ char delim;
+ int length;
+
+ /* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
+ then delimiter again. */
+ if (dlen <= 3) {
+ pr_debug("EPRT: too short\n");
+ return 0;
+ }
+ delim = data[0];
+ if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
+ pr_debug("try_eprt: invalid delimitter.\n");
+ return 0;
+ }
+
+ if ((cmd->l3num == PF_INET && data[1] != '1') ||
+ (cmd->l3num == PF_INET6 && data[1] != '2')) {
+ pr_debug("EPRT: invalid protocol number.\n");
+ return 0;
+ }
+
+ pr_debug("EPRT: Got %c%c%c\n", delim, data[1], delim);
+
+ if (data[1] == '1') {
+ u_int32_t array[4];
+
+ /* Now we have IP address. */
+ length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+ if (length != 0)
+ cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
+ | (array[2] << 8) | array[3]);
+ } else {
+ /* Now we have IPv6 address. */
+ length = get_ipv6_addr(data + 3, dlen - 3,
+ (struct in6_addr *)cmd->u3.ip6, delim);
+ }
+
+ if (length == 0)
+ return 0;
+ pr_debug("EPRT: Got IP address!\n");
+ /* Start offset includes initial "|1|", and trailing delimiter */
+ return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Returns 0, or length of numbers: |||6446| */
+static int try_epsv_response(const char *data, size_t dlen,
+ struct nf_conntrack_man *cmd, char term)
+{
+ char delim;
+
+ /* Three delimiters. */
+ if (dlen <= 3) return 0;
+ delim = data[0];
+ if (isdigit(delim) || delim < 33 || delim > 126
+ || data[1] != delim || data[2] != delim)
+ return 0;
+
+ return get_port(data, 3, dlen, delim, &cmd->u.tcp.port);
+}
+
+/* Return 1 for match, 0 for accept, -1 for partial. */
+static int find_pattern(const char *data, size_t dlen,
+ const char *pattern, size_t plen,
+ char skip, char term,
+ unsigned int *numoff,
+ unsigned int *numlen,
+ struct nf_conntrack_man *cmd,
+ int (*getnum)(const char *, size_t,
+ struct nf_conntrack_man *, char))
+{
+ size_t i;
+
+ pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
+ if (dlen == 0)
+ return 0;
+
+ if (dlen <= plen) {
+ /* Short packet: try for partial? */
+ if (strnicmp(data, pattern, dlen) == 0)
+ return -1;
+ else return 0;
+ }
+
+ if (strnicmp(data, pattern, plen) != 0) {
+#if 0
+ size_t i;
+
+ pr_debug("ftp: string mismatch\n");
+ for (i = 0; i < plen; i++) {
+ pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
+ i, data[i], data[i],
+ pattern[i], pattern[i]);
+ }
+#endif
+ return 0;
+ }
+
+ pr_debug("Pattern matches!\n");
+ /* Now we've found the constant string, try to skip
+ to the 'skip' character */
+ for (i = plen; data[i] != skip; i++)
+ if (i == dlen - 1) return -1;
+
+ /* Skip over the last character */
+ i++;
+
+ pr_debug("Skipped up to `%c'!\n", skip);
+
+ *numoff = i;
+ *numlen = getnum(data + i, dlen - i, cmd, term);
+ if (!*numlen)
+ return -1;
+
+ pr_debug("Match succeeded!\n");
+ return 1;
+}
+
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
+{
+ unsigned int i;
+
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+ if (info->seq_aft_nl[dir][i] == seq)
+ return 1;
+ return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
+ struct nf_ct_ftp_master *info, int dir,
+ struct sk_buff *skb)
+{
+ unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
+
+ /* Look for oldest: if we find exact match, we're done. */
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+ if (info->seq_aft_nl[dir][i] == nl_seq)
+ return;
+
+ if (oldest == info->seq_aft_nl_num[dir] ||
+ before(info->seq_aft_nl[dir][i],
+ info->seq_aft_nl[dir][oldest]))
+ oldest = i;
+ }
+
+ if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
+ info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
+ } else if (oldest != NUM_SEQ_TO_REMEMBER &&
+ after(nl_seq, info->seq_aft_nl[dir][oldest])) {
+ info->seq_aft_nl[dir][oldest] = nl_seq;
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
+ }
+}
+
+static int help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff, datalen;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ const char *fb_ptr;
+ int ret;
+ u32 seq;
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int matchlen, matchoff;
+ struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
+ struct nf_conntrack_expect *exp;
+ union nf_inet_addr *daddr;
+ struct nf_conntrack_man cmd = {};
+ unsigned int i;
+ int found = 0, ends_in_nl;
+ typeof(nf_nat_ftp_hook) nf_nat_ftp;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED
+ && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+ pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
+ return NF_ACCEPT;
+ }
+
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ dataoff = protoff + th->doff * 4;
+ /* No data? */
+ if (dataoff >= skb->len) {
+ pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
+ skb->len);
+ return NF_ACCEPT;
+ }
+ datalen = skb->len - dataoff;
+
+ spin_lock_bh(&nf_ftp_lock);
+ fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
+ BUG_ON(fb_ptr == NULL);
+
+ ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+ seq = ntohl(th->seq) + datalen;
+
+ /* Look up to see if we're just after a \n. */
+ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+ /* Now if this ends in \n, update ftp info. */
+ pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
+ ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
+ ct_ftp_info->seq_aft_nl[dir][0],
+ ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
+ ct_ftp_info->seq_aft_nl[dir][1]);
+ ret = NF_ACCEPT;
+ goto out_update_nl;
+ }
+
+ /* Initialize IP/IPv6 addr to expected address (it's not mentioned
+ in EPSV responses) */
+ cmd.l3num = nf_ct_l3num(ct);
+ memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+ sizeof(cmd.u3.all));
+
+ for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
+ found = find_pattern(fb_ptr, datalen,
+ search[dir][i].pattern,
+ search[dir][i].plen,
+ search[dir][i].skip,
+ search[dir][i].term,
+ &matchoff, &matchlen,
+ &cmd,
+ search[dir][i].getnum);
+ if (found) break;
+ }
+ if (found == -1) {
+ /* We don't usually drop packets. After all, this is
+ connection tracking, not packet filtering.
+ However, it is necessary for accurate tracking in
+ this case. */
+ if (net_ratelimit())
+ printk("conntrack_ftp: partial %s %u+%u\n",
+ search[dir][i].pattern,
+ ntohl(th->seq), datalen);
+ ret = NF_DROP;
+ goto out;
+ } else if (found == 0) { /* No match */
+ ret = NF_ACCEPT;
+ goto out_update_nl;
+ }
+
+ pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
+ matchlen, fb_ptr + matchoff,
+ matchlen, ntohl(th->seq) + matchoff);
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+
+ /* We refer to the reverse direction ("!dir") tuples here,
+ * because we're expecting something in the other direction.
+ * Doesn't matter unless NAT is happening. */
+ daddr = &ct->tuplehash[!dir].tuple.dst.u3;
+
+ /* Update the ftp info */
+ if ((cmd.l3num == nf_ct_l3num(ct)) &&
+ memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
+ sizeof(cmd.u3.all))) {
+ /* Enrico Scholz's passive FTP to partially RNAT'd ftp
+ server: it really wants us to connect to a
+ different IP address. Simply don't record it for
+ NAT. */
+ if (cmd.l3num == PF_INET) {
+ pr_debug("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT
+ " != " NIPQUAD_FMT "\n",
+ NIPQUAD(cmd.u3.ip),
+ NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
+ } else {
+ pr_debug("conntrack_ftp: NOT RECORDING: " NIP6_FMT
+ " != " NIP6_FMT "\n",
+ NIP6(*((struct in6_addr *)cmd.u3.ip6)),
+ NIP6(*((struct in6_addr *)
+ ct->tuplehash[dir].tuple.src.u3.ip6)));
+ }
+
+ /* Thanks to Cristiano Lincoln Mattos
+ <lincoln@cesar.org.br> for reporting this potential
+ problem (DMZ machines opening holes to internal
+ networks, or the packet filter itself). */
+ if (!loose) {
+ ret = NF_ACCEPT;
+ goto out_put_expect;
+ }
+ daddr = &cmd.u3;
+ }
+
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, cmd.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3, daddr,
+ IPPROTO_TCP, NULL, &cmd.u.tcp.port);
+
+ /* Now, NAT might want to mangle the packet, and register the
+ * (possibly changed) expectation itself. */
+ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
+ if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
+ matchoff, matchlen, exp);
+ else {
+ /* Can't expect this? Best to drop packet now. */
+ if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+ else
+ ret = NF_ACCEPT;
+ }
+
+out_put_expect:
+ nf_ct_expect_put(exp);
+
+out_update_nl:
+ /* Now if this ends in \n, update ftp info. Seq may have been
+ * adjusted by NAT code. */
+ if (ends_in_nl)
+ update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
+ out:
+ spin_unlock_bh(&nf_ftp_lock);
+ return ret;
+}
+
+static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
+static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly;
+
+static const struct nf_conntrack_expect_policy ftp_exp_policy = {
+ .max_expected = 1,
+ .timeout = 5 * 60,
+};
+
+/* don't make this __exit, since it's called from __init ! */
+static void nf_conntrack_ftp_fini(void)
+{
+ int i, j;
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++) {
+ if (ftp[i][j].me == NULL)
+ continue;
+
+ pr_debug("nf_ct_ftp: unregistering helper for pf: %d "
+ "port: %d\n",
+ ftp[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_helper_unregister(&ftp[i][j]);
+ }
+ }
+
+ kfree(ftp_buffer);
+}
+
+static int __init nf_conntrack_ftp_init(void)
+{
+ int i, j = -1, ret = 0;
+ char *tmpname;
+
+ ftp_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!ftp_buffer)
+ return -ENOMEM;
+
+ if (ports_c == 0)
+ ports[ports_c++] = FTP_PORT;
+
+ /* FIXME should be configurable whether IPv4 and IPv6 FTP connections
+ are tracked or not - YK */
+ for (i = 0; i < ports_c; i++) {
+ ftp[i][0].tuple.src.l3num = PF_INET;
+ ftp[i][1].tuple.src.l3num = PF_INET6;
+ for (j = 0; j < 2; j++) {
+ ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
+ ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
+ ftp[i][j].expect_policy = &ftp_exp_policy;
+ ftp[i][j].me = THIS_MODULE;
+ ftp[i][j].help = help;
+ tmpname = &ftp_names[i][j][0];
+ if (ports[i] == FTP_PORT)
+ sprintf(tmpname, "ftp");
+ else
+ sprintf(tmpname, "ftp-%d", ports[i]);
+ ftp[i][j].name = tmpname;
+
+ pr_debug("nf_ct_ftp: registering helper for pf: %d "
+ "port: %d\n",
+ ftp[i][j].tuple.src.l3num, ports[i]);
+ ret = nf_conntrack_helper_register(&ftp[i][j]);
+ if (ret) {
+ printk("nf_ct_ftp: failed to register helper "
+ " for pf: %d port: %d\n",
+ ftp[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_ftp_fini();
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+module_init(nf_conntrack_ftp_init);
+module_exit(nf_conntrack_ftp_fini);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
new file mode 100644
index 0000000..8678823
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -0,0 +1,888 @@
+/****************************************************************************
+ * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
+ * conntrack/NAT module.
+ *
+ * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * See ip_conntrack_helper_h323_asn1.h for details.
+ *
+ ****************************************************************************/
+
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#else
+#include <stdio.h>
+#endif
+#include <linux/netfilter/nf_conntrack_h323_asn1.h>
+
+/* Trace Flag */
+#ifndef H323_TRACE
+#define H323_TRACE 0
+#endif
+
+#if H323_TRACE
+#define TAB_SIZE 4
+#define IFTHEN(cond, act) if(cond){act;}
+#ifdef __KERNEL__
+#define PRINT printk
+#else
+#define PRINT printf
+#endif
+#define FNAME(name) name,
+#else
+#define IFTHEN(cond, act)
+#define PRINT(fmt, args...)
+#define FNAME(name)
+#endif
+
+/* ASN.1 Types */
+#define NUL 0
+#define BOOL 1
+#define OID 2
+#define INT 3
+#define ENUM 4
+#define BITSTR 5
+#define NUMSTR 6
+#define NUMDGT 6
+#define TBCDSTR 6
+#define OCTSTR 7
+#define PRTSTR 7
+#define IA5STR 7
+#define GENSTR 7
+#define BMPSTR 8
+#define SEQ 9
+#define SET 9
+#define SEQOF 10
+#define SETOF 10
+#define CHOICE 11
+
+/* Constraint Types */
+#define FIXD 0
+/* #define BITS 1-8 */
+#define BYTE 9
+#define WORD 10
+#define CONS 11
+#define SEMI 12
+#define UNCO 13
+
+/* ASN.1 Type Attributes */
+#define SKIP 0
+#define STOP 1
+#define DECODE 2
+#define EXT 4
+#define OPEN 8
+#define OPT 16
+
+
+/* ASN.1 Field Structure */
+typedef struct field_t {
+#if H323_TRACE
+ char *name;
+#endif
+ unsigned char type;
+ unsigned char sz;
+ unsigned char lb;
+ unsigned char ub;
+ unsigned short attr;
+ unsigned short offset;
+ const struct field_t *fields;
+} field_t;
+
+/* Bit Stream */
+typedef struct {
+ unsigned char *buf;
+ unsigned char *beg;
+ unsigned char *end;
+ unsigned char *cur;
+ unsigned int bit;
+} bitstr_t;
+
+/* Tool Functions */
+#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
+#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
+#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
+#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
+static unsigned int get_len(bitstr_t *bs);
+static unsigned int get_bit(bitstr_t *bs);
+static unsigned int get_bits(bitstr_t *bs, unsigned int b);
+static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
+static unsigned int get_uint(bitstr_t *bs, int b);
+
+/* Decoder Functions */
+static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
+
+/* Decoder Functions Vector */
+typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
+static const decoder_t Decoders[] = {
+ decode_nul,
+ decode_bool,
+ decode_oid,
+ decode_int,
+ decode_enum,
+ decode_bitstr,
+ decode_numstr,
+ decode_octstr,
+ decode_bmpstr,
+ decode_seq,
+ decode_seqof,
+ decode_choice,
+};
+
+/****************************************************************************
+ * H.323 Types
+ ****************************************************************************/
+#include "nf_conntrack_h323_types.c"
+
+/****************************************************************************
+ * Functions
+ ****************************************************************************/
+/* Assume bs is aligned && v < 16384 */
+static unsigned int get_len(bitstr_t *bs)
+{
+ unsigned int v;
+
+ v = *bs->cur++;
+
+ if (v & 0x80) {
+ v &= 0x3f;
+ v <<= 8;
+ v += *bs->cur++;
+ }
+
+ return v;
+}
+
+/****************************************************************************/
+static unsigned int get_bit(bitstr_t *bs)
+{
+ unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
+
+ INC_BIT(bs);
+
+ return b;
+}
+
+/****************************************************************************/
+/* Assume b <= 8 */
+static unsigned int get_bits(bitstr_t *bs, unsigned int b)
+{
+ unsigned int v, l;
+
+ v = (*bs->cur) & (0xffU >> bs->bit);
+ l = b + bs->bit;
+
+ if (l < 8) {
+ v >>= 8 - l;
+ bs->bit = l;
+ } else if (l == 8) {
+ bs->cur++;
+ bs->bit = 0;
+ } else { /* l > 8 */
+
+ v <<= 8;
+ v += *(++bs->cur);
+ v >>= 16 - l;
+ bs->bit = l - 8;
+ }
+
+ return v;
+}
+
+/****************************************************************************/
+/* Assume b <= 32 */
+static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
+{
+ unsigned int v, l, shift, bytes;
+
+ if (!b)
+ return 0;
+
+ l = bs->bit + b;
+
+ if (l < 8) {
+ v = (unsigned int)(*bs->cur) << (bs->bit + 24);
+ bs->bit = l;
+ } else if (l == 8) {
+ v = (unsigned int)(*bs->cur++) << (bs->bit + 24);
+ bs->bit = 0;
+ } else {
+ for (bytes = l >> 3, shift = 24, v = 0; bytes;
+ bytes--, shift -= 8)
+ v |= (unsigned int)(*bs->cur++) << shift;
+
+ if (l < 32) {
+ v |= (unsigned int)(*bs->cur) << shift;
+ v <<= bs->bit;
+ } else if (l > 32) {
+ v <<= bs->bit;
+ v |= (*bs->cur) >> (8 - bs->bit);
+ }
+
+ bs->bit = l & 0x7;
+ }
+
+ v &= 0xffffffff << (32 - b);
+
+ return v;
+}
+
+/****************************************************************************
+ * Assume bs is aligned and sizeof(unsigned int) == 4
+ ****************************************************************************/
+static unsigned int get_uint(bitstr_t *bs, int b)
+{
+ unsigned int v = 0;
+
+ switch (b) {
+ case 4:
+ v |= *bs->cur++;
+ v <<= 8;
+ case 3:
+ v |= *bs->cur++;
+ v <<= 8;
+ case 2:
+ v |= *bs->cur++;
+ v <<= 8;
+ case 1:
+ v |= *bs->cur++;
+ break;
+ }
+ return v;
+}
+
+/****************************************************************************/
+static int decode_nul(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_bool(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ INC_BIT(bs);
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_oid(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ int len;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 1);
+ len = *bs->cur++;
+ bs->cur += len;
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_int(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int len;
+
+ PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
+
+ switch (f->sz) {
+ case BYTE: /* Range == 256 */
+ BYTE_ALIGN(bs);
+ bs->cur++;
+ break;
+ case WORD: /* 257 <= Range <= 64K */
+ BYTE_ALIGN(bs);
+ bs->cur += 2;
+ break;
+ case CONS: /* 64K < Range < 4G */
+ len = get_bits(bs, 2) + 1;
+ BYTE_ALIGN(bs);
+ if (base && (f->attr & DECODE)) { /* timeToLive */
+ unsigned int v = get_uint(bs, len) + f->lb;
+ PRINT(" = %u", v);
+ *((unsigned int *)(base + f->offset)) = v;
+ }
+ bs->cur += len;
+ break;
+ case UNCO:
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs);
+ bs->cur += len;
+ break;
+ default: /* 2 <= Range <= 255 */
+ INC_BITS(bs, f->sz);
+ break;
+ }
+
+ PRINT("\n");
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_enum(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ if ((f->attr & EXT) && get_bit(bs)) {
+ INC_BITS(bs, 7);
+ } else {
+ INC_BITS(bs, f->sz);
+ }
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int len;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ BYTE_ALIGN(bs);
+ switch (f->sz) {
+ case FIXD: /* fixed length > 16 */
+ len = f->lb;
+ break;
+ case WORD: /* 2-byte length */
+ CHECK_BOUND(bs, 2);
+ len = (*bs->cur++) << 8;
+ len += (*bs->cur++) + f->lb;
+ break;
+ case SEMI:
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs);
+ break;
+ default:
+ len = 0;
+ break;
+ }
+
+ bs->cur += len >> 3;
+ bs->bit = len & 7;
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_numstr(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int len;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ /* 2 <= Range <= 255 */
+ len = get_bits(bs, f->sz) + f->lb;
+
+ BYTE_ALIGN(bs);
+ INC_BITS(bs, (len << 2));
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_octstr(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int len;
+
+ PRINT("%*.s%s", level * TAB_SIZE, " ", f->name);
+
+ switch (f->sz) {
+ case FIXD: /* Range == 1 */
+ if (f->lb > 2) {
+ BYTE_ALIGN(bs);
+ if (base && (f->attr & DECODE)) {
+ /* The IP Address */
+ IFTHEN(f->lb == 4,
+ PRINT(" = %d.%d.%d.%d:%d",
+ bs->cur[0], bs->cur[1],
+ bs->cur[2], bs->cur[3],
+ bs->cur[4] * 256 + bs->cur[5]));
+ *((unsigned int *)(base + f->offset)) =
+ bs->cur - bs->buf;
+ }
+ }
+ len = f->lb;
+ break;
+ case BYTE: /* Range == 256 */
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 1);
+ len = (*bs->cur++) + f->lb;
+ break;
+ case SEMI:
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs) + f->lb;
+ break;
+ default: /* 2 <= Range <= 255 */
+ len = get_bits(bs, f->sz) + f->lb;
+ BYTE_ALIGN(bs);
+ break;
+ }
+
+ bs->cur += len;
+
+ PRINT("\n");
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int len;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ switch (f->sz) {
+ case BYTE: /* Range == 256 */
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 1);
+ len = (*bs->cur++) + f->lb;
+ break;
+ default: /* 2 <= Range <= 255 */
+ len = get_bits(bs, f->sz) + f->lb;
+ BYTE_ALIGN(bs);
+ break;
+ }
+
+ bs->cur += len << 1;
+
+ CHECK_BOUND(bs, 0);
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_seq(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
+ int err;
+ const struct field_t *son;
+ unsigned char *beg = NULL;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ /* Decode? */
+ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+ /* Extensible? */
+ ext = (f->attr & EXT) ? get_bit(bs) : 0;
+
+ /* Get fields bitmap */
+ bmp = get_bitmap(bs, f->sz);
+ if (base)
+ *(unsigned int *)base = bmp;
+
+ /* Decode the root components */
+ for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) {
+ if (son->attr & STOP) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+ son->name);
+ return H323_ERROR_STOP;
+ }
+
+ if (son->attr & OPT) { /* Optional component */
+ if (!((0x80000000U >> (opt++)) & bmp)) /* Not exist */
+ continue;
+ }
+
+ /* Decode */
+ if (son->attr & OPEN) { /* Open field */
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ if (!base || !(son->attr & DECODE)) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
+ " ", son->name);
+ bs->cur += len;
+ continue;
+ }
+ beg = bs->cur;
+
+ /* Decode */
+ if ((err = (Decoders[son->type]) (bs, son, base,
+ level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ bs->cur = beg + len;
+ bs->bit = 0;
+ } else if ((err = (Decoders[son->type]) (bs, son, base,
+ level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+ }
+
+ /* No extension? */
+ if (!ext)
+ return H323_ERROR_NONE;
+
+ /* Get the extension bitmap */
+ bmp2_len = get_bits(bs, 7) + 1;
+ CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+ bmp2 = get_bitmap(bs, bmp2_len);
+ bmp |= bmp2 >> f->sz;
+ if (base)
+ *(unsigned int *)base = bmp;
+ BYTE_ALIGN(bs);
+
+ /* Decode the extension components */
+ for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
+ /* Check Range */
+ if (i >= f->ub) { /* Newer Version? */
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ bs->cur += len;
+ continue;
+ }
+
+ if (son->attr & STOP) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+ son->name);
+ return H323_ERROR_STOP;
+ }
+
+ if (!((0x80000000 >> opt) & bmp2)) /* Not present */
+ continue;
+
+ CHECK_BOUND(bs, 2);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ if (!base || !(son->attr & DECODE)) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+ son->name);
+ bs->cur += len;
+ continue;
+ }
+ beg = bs->cur;
+
+ if ((err = (Decoders[son->type]) (bs, son, base,
+ level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ bs->cur = beg + len;
+ bs->bit = 0;
+ }
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+static int decode_seqof(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int count, effective_count = 0, i, len = 0;
+ int err;
+ const struct field_t *son;
+ unsigned char *beg = NULL;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ /* Decode? */
+ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+ /* Decode item count */
+ switch (f->sz) {
+ case BYTE:
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 1);
+ count = *bs->cur++;
+ break;
+ case WORD:
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 2);
+ count = *bs->cur++;
+ count <<= 8;
+ count = *bs->cur++;
+ break;
+ case SEMI:
+ BYTE_ALIGN(bs);
+ CHECK_BOUND(bs, 2);
+ count = get_len(bs);
+ break;
+ default:
+ count = get_bits(bs, f->sz);
+ break;
+ }
+ count += f->lb;
+
+ /* Write Count */
+ if (base) {
+ effective_count = count > f->ub ? f->ub : count;
+ *(unsigned int *)base = effective_count;
+ base += sizeof(unsigned int);
+ }
+
+ /* Decode nested field */
+ son = f->fields;
+ if (base)
+ base -= son->offset;
+ for (i = 0; i < count; i++) {
+ if (son->attr & OPEN) {
+ BYTE_ALIGN(bs);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ if (!base || !(son->attr & DECODE)) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
+ " ", son->name);
+ bs->cur += len;
+ continue;
+ }
+ beg = bs->cur;
+
+ if ((err = (Decoders[son->type]) (bs, son,
+ i <
+ effective_count ?
+ base : NULL,
+ level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ bs->cur = beg + len;
+ bs->bit = 0;
+ } else
+ if ((err = (Decoders[son->type]) (bs, son,
+ i <
+ effective_count ?
+ base : NULL,
+ level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ if (base)
+ base += son->offset;
+ }
+
+ return H323_ERROR_NONE;
+}
+
+
+/****************************************************************************/
+static int decode_choice(bitstr_t *bs, const struct field_t *f,
+ char *base, int level)
+{
+ unsigned int type, ext, len = 0;
+ int err;
+ const struct field_t *son;
+ unsigned char *beg = NULL;
+
+ PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
+
+ /* Decode? */
+ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
+
+ /* Decode the choice index number */
+ if ((f->attr & EXT) && get_bit(bs)) {
+ ext = 1;
+ type = get_bits(bs, 7) + f->lb;
+ } else {
+ ext = 0;
+ type = get_bits(bs, f->sz);
+ if (type >= f->lb)
+ return H323_ERROR_RANGE;
+ }
+
+ /* Write Type */
+ if (base)
+ *(unsigned int *)base = type;
+
+ /* Check Range */
+ if (type >= f->ub) { /* Newer version? */
+ BYTE_ALIGN(bs);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ bs->cur += len;
+ return H323_ERROR_NONE;
+ }
+
+ /* Transfer to son level */
+ son = &f->fields[type];
+ if (son->attr & STOP) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name);
+ return H323_ERROR_STOP;
+ }
+
+ if (ext || (son->attr & OPEN)) {
+ BYTE_ALIGN(bs);
+ len = get_len(bs);
+ CHECK_BOUND(bs, len);
+ if (!base || !(son->attr & DECODE)) {
+ PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
+ son->name);
+ bs->cur += len;
+ return H323_ERROR_NONE;
+ }
+ beg = bs->cur;
+
+ if ((err = (Decoders[son->type]) (bs, son, base, level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ bs->cur = beg + len;
+ bs->bit = 0;
+ } else if ((err = (Decoders[son->type]) (bs, son, base, level + 1)) <
+ H323_ERROR_NONE)
+ return err;
+
+ return H323_ERROR_NONE;
+}
+
+/****************************************************************************/
+int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
+{
+ static const struct field_t ras_message = {
+ FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
+ 0, _RasMessage
+ };
+ bitstr_t bs;
+
+ bs.buf = bs.beg = bs.cur = buf;
+ bs.end = buf + sz;
+ bs.bit = 0;
+
+ return decode_choice(&bs, &ras_message, (char *) ras, 0);
+}
+
+/****************************************************************************/
+static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
+ size_t sz, H323_UserInformation *uuie)
+{
+ static const struct field_t h323_userinformation = {
+ FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
+ 0, _H323_UserInformation
+ };
+ bitstr_t bs;
+
+ bs.buf = buf;
+ bs.beg = bs.cur = beg;
+ bs.end = beg + sz;
+ bs.bit = 0;
+
+ return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0);
+}
+
+/****************************************************************************/
+int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
+ MultimediaSystemControlMessage *
+ mscm)
+{
+ static const struct field_t multimediasystemcontrolmessage = {
+ FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
+ DECODE | EXT, 0, _MultimediaSystemControlMessage
+ };
+ bitstr_t bs;
+
+ bs.buf = bs.beg = bs.cur = buf;
+ bs.end = buf + sz;
+ bs.bit = 0;
+
+ return decode_choice(&bs, &multimediasystemcontrolmessage,
+ (char *) mscm, 0);
+}
+
+/****************************************************************************/
+int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
+{
+ unsigned char *p = buf;
+ int len;
+
+ if (!p || sz < 1)
+ return H323_ERROR_BOUND;
+
+ /* Protocol Discriminator */
+ if (*p != 0x08) {
+ PRINT("Unknown Protocol Discriminator\n");
+ return H323_ERROR_RANGE;
+ }
+ p++;
+ sz--;
+
+ /* CallReferenceValue */
+ if (sz < 1)
+ return H323_ERROR_BOUND;
+ len = *p++;
+ sz--;
+ if (sz < len)
+ return H323_ERROR_BOUND;
+ p += len;
+ sz -= len;
+
+ /* Message Type */
+ if (sz < 1)
+ return H323_ERROR_BOUND;
+ q931->MessageType = *p++;
+ PRINT("MessageType = %02X\n", q931->MessageType);
+ if (*p & 0x80) {
+ p++;
+ sz--;
+ }
+
+ /* Decode Information Elements */
+ while (sz > 0) {
+ if (*p == 0x7e) { /* UserUserIE */
+ if (sz < 3)
+ break;
+ p++;
+ len = *p++ << 8;
+ len |= *p++;
+ sz -= 3;
+ if (sz < len)
+ break;
+ p++;
+ len--;
+ return DecodeH323_UserInformation(buf, p, len,
+ &q931->UUIE);
+ }
+ p++;
+ sz--;
+ if (sz < 1)
+ break;
+ len = *p++;
+ if (sz < len)
+ break;
+ p += len;
+ sz -= len;
+ }
+
+ PRINT("Q.931 UUIE not found\n");
+
+ return H323_ERROR_BOUND;
+}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
new file mode 100644
index 0000000..c1504f7
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -0,0 +1,1833 @@
+/*
+ * H.323 connection tracking helper
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 connection tracking module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * For more information, please see http://nath323.sourceforge.net/
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+/* Parameters */
+static unsigned int default_rrq_ttl __read_mostly = 300;
+module_param(default_rrq_ttl, uint, 0600);
+MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
+
+static int gkrouted_only __read_mostly = 1;
+module_param(gkrouted_only, int, 0600);
+MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
+
+static int callforward_filter __read_mostly = 1;
+module_param(callforward_filter, bool, 0600);
+MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
+ "if both endpoints are on different sides "
+ "(determined by routing information)");
+
+/* Hooks for NAT */
+int (*set_h245_addr_hook) (struct sk_buff *skb,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+ __read_mostly;
+int (*set_h225_addr_hook) (struct sk_buff *skb,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+ __read_mostly;
+int (*set_sig_addr_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count) __read_mostly;
+int (*set_ras_addr_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count) __read_mostly;
+int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp) __read_mostly;
+int (*nat_t120_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_h245_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_callforwarding_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_q931_hook) (struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+ __read_mostly;
+
+static DEFINE_SPINLOCK(nf_h323_lock);
+static char *h323_buffer;
+
+static struct nf_conntrack_helper nf_conntrack_helper_h245;
+static struct nf_conntrack_helper nf_conntrack_helper_q931[];
+static struct nf_conntrack_helper nf_conntrack_helper_ras[];
+
+/****************************************************************************/
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned char **data, int *datalen, int *dataoff)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ int tcpdatalen;
+ int tcpdataoff;
+ unsigned char *tpkt;
+ int tpktlen;
+ int tpktoff;
+
+ /* Get TCP header */
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return 0;
+
+ /* Get TCP data offset */
+ tcpdataoff = protoff + th->doff * 4;
+
+ /* Get TCP data length */
+ tcpdatalen = skb->len - tcpdataoff;
+ if (tcpdatalen <= 0) /* No TCP data */
+ goto clear_out;
+
+ if (*data == NULL) { /* first TPKT */
+ /* Get first TPKT pointer */
+ tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
+ h323_buffer);
+ BUG_ON(tpkt == NULL);
+
+ /* Validate TPKT identifier */
+ if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
+ /* Netmeeting sends TPKT header and data separately */
+ if (info->tpkt_len[dir] > 0) {
+ pr_debug("nf_ct_h323: previous packet "
+ "indicated separate TPKT data of %hu "
+ "bytes\n", info->tpkt_len[dir]);
+ if (info->tpkt_len[dir] <= tcpdatalen) {
+ /* Yes, there was a TPKT header
+ * received */
+ *data = tpkt;
+ *datalen = info->tpkt_len[dir];
+ *dataoff = 0;
+ goto out;
+ }
+
+ /* Fragmented TPKT */
+ pr_debug("nf_ct_h323: fragmented TPKT\n");
+ goto clear_out;
+ }
+
+ /* It is not even a TPKT */
+ return 0;
+ }
+ tpktoff = 0;
+ } else { /* Next TPKT */
+ tpktoff = *dataoff + *datalen;
+ tcpdatalen -= tpktoff;
+ if (tcpdatalen <= 4) /* No more TPKT */
+ goto clear_out;
+ tpkt = *data + *datalen;
+
+ /* Validate TPKT identifier */
+ if (tpkt[0] != 0x03 || tpkt[1] != 0)
+ goto clear_out;
+ }
+
+ /* Validate TPKT length */
+ tpktlen = tpkt[2] * 256 + tpkt[3];
+ if (tpktlen < 4)
+ goto clear_out;
+ if (tpktlen > tcpdatalen) {
+ if (tcpdatalen == 4) { /* Separate TPKT header */
+ /* Netmeeting sends TPKT header and data separately */
+ pr_debug("nf_ct_h323: separate TPKT header indicates "
+ "there will be TPKT data of %hu bytes\n",
+ tpktlen - 4);
+ info->tpkt_len[dir] = tpktlen - 4;
+ return 0;
+ }
+
+ if (net_ratelimit())
+ printk("nf_ct_h323: incomplete TPKT (fragmented?)\n");
+ goto clear_out;
+ }
+
+ /* This is the encapsulated data */
+ *data = tpkt + 4;
+ *datalen = tpktlen - 4;
+ *dataoff = tpktoff + 4;
+
+ out:
+ /* Clear TPKT length */
+ info->tpkt_len[dir] = 0;
+ return 1;
+
+ clear_out:
+ info->tpkt_len[dir] = 0;
+ return 0;
+}
+
+/****************************************************************************/
+static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
+ H245_TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 *port)
+{
+ const unsigned char *p;
+ int len;
+
+ if (taddr->choice != eH245_TransportAddress_unicastAddress)
+ return 0;
+
+ switch (taddr->unicastAddress.choice) {
+ case eUnicastAddress_iPAddress:
+ if (nf_ct_l3num(ct) != AF_INET)
+ return 0;
+ p = data + taddr->unicastAddress.iPAddress.network;
+ len = 4;
+ break;
+ case eUnicastAddress_iP6Address:
+ if (nf_ct_l3num(ct) != AF_INET6)
+ return 0;
+ p = data + taddr->unicastAddress.iP6Address.network;
+ len = 16;
+ break;
+ default:
+ return 0;
+ }
+
+ memcpy(addr, p, len);
+ memset((void *)addr + len, 0, sizeof(*addr) - len);
+ memcpy(port, p + len, sizeof(__be16));
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ __be16 rtp_port, rtcp_port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *rtp_exp;
+ struct nf_conntrack_expect *rtcp_exp;
+ typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
+
+ /* Read RTP or RTCP address */
+ if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* RTP port is even */
+ port &= htons(~1);
+ rtp_port = port;
+ rtcp_port = htons(ntohs(port) + 1);
+
+ /* Create expect for RTP */
+ if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(rtp_exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_UDP, NULL, &rtp_port);
+
+ /* Create expect for RTCP */
+ if ((rtcp_exp = nf_ct_expect_alloc(ct)) == NULL) {
+ nf_ct_expect_put(rtp_exp);
+ return -1;
+ }
+ nf_ct_expect_init(rtcp_exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_UDP, NULL, &rtcp_port);
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ taddr, port, rtp_port, rtp_exp, rtcp_exp);
+ } else { /* Conntrack only */
+ if (nf_ct_expect_related(rtp_exp) == 0) {
+ if (nf_ct_expect_related(rtcp_exp) == 0) {
+ pr_debug("nf_ct_h323: expect RTP ");
+ nf_ct_dump_tuple(&rtp_exp->tuple);
+ pr_debug("nf_ct_h323: expect RTCP ");
+ nf_ct_dump_tuple(&rtcp_exp->tuple);
+ } else {
+ nf_ct_unexpect_related(rtp_exp);
+ ret = -1;
+ }
+ } else
+ ret = -1;
+ }
+
+ nf_ct_expect_put(rtp_exp);
+ nf_ct_expect_put(rtcp_exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int expect_t120(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_t120_hook) nat_t120;
+
+ /* Read T.120 address */
+ if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* Create expect for T.120 connections */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_t120 = rcu_dereference(nat_t120_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
+ port, exp);
+ } else { /* Conntrack only */
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_h323: expect T.120 ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_h245_channel(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H2250LogicalChannelParameters *channel)
+{
+ int ret;
+
+ if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
+ /* RTP */
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ &channel->mediaChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (channel->
+ options & eH2250LogicalChannelParameters_mediaControlChannel) {
+ /* RTCP */
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ &channel->mediaControlChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ OpenLogicalChannel *olc)
+{
+ int ret;
+
+ pr_debug("nf_ct_h323: OpenLogicalChannel\n");
+
+ if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
+ eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
+ {
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ &olc->
+ forwardLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olc->options &
+ eOpenLogicalChannel_reverseLogicalChannelParameters) &&
+ (olc->reverseLogicalChannelParameters.options &
+ eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
+ && (olc->reverseLogicalChannelParameters.multiplexParameters.
+ choice ==
+ eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+ {
+ ret =
+ process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ &olc->
+ reverseLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olc->options & eOpenLogicalChannel_separateStack) &&
+ olc->forwardLogicalChannelParameters.dataType.choice ==
+ eDataType_data &&
+ olc->forwardLogicalChannelParameters.dataType.data.application.
+ choice == eDataApplicationCapability_application_t120 &&
+ olc->forwardLogicalChannelParameters.dataType.data.application.
+ t120.choice == eDataProtocolCapability_separateLANStack &&
+ olc->separateStack.networkAddress.choice ==
+ eNetworkAccessParameters_networkAddress_localAreaAddress) {
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+ &olc->separateStack.networkAddress.
+ localAreaAddress);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ OpenLogicalChannelAck *olca)
+{
+ H2250LogicalChannelAckParameters *ack;
+ int ret;
+
+ pr_debug("nf_ct_h323: OpenLogicalChannelAck\n");
+
+ if ((olca->options &
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
+ (olca->reverseLogicalChannelParameters.options &
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
+ && (olca->reverseLogicalChannelParameters.multiplexParameters.
+ choice ==
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+ {
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ &olca->
+ reverseLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olca->options &
+ eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
+ (olca->forwardMultiplexAckParameters.choice ==
+ eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
+ {
+ ack = &olca->forwardMultiplexAckParameters.
+ h2250LogicalChannelAckParameters;
+ if (ack->options &
+ eH2250LogicalChannelAckParameters_mediaChannel) {
+ /* RTP */
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ &ack->mediaChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (ack->options &
+ eH2250LogicalChannelAckParameters_mediaControlChannel) {
+ /* RTCP */
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ &ack->mediaControlChannel);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
+ olca->separateStack.networkAddress.choice ==
+ eNetworkAccessParameters_networkAddress_localAreaAddress) {
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+ &olca->separateStack.networkAddress.
+ localAreaAddress);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ MultimediaSystemControlMessage *mscm)
+{
+ switch (mscm->choice) {
+ case eMultimediaSystemControlMessage_request:
+ if (mscm->request.choice ==
+ eRequestMessage_openLogicalChannel) {
+ return process_olc(skb, ct, ctinfo, data, dataoff,
+ &mscm->request.openLogicalChannel);
+ }
+ pr_debug("nf_ct_h323: H.245 Request %d\n",
+ mscm->request.choice);
+ break;
+ case eMultimediaSystemControlMessage_response:
+ if (mscm->response.choice ==
+ eResponseMessage_openLogicalChannelAck) {
+ return process_olca(skb, ct, ctinfo, data, dataoff,
+ &mscm->response.
+ openLogicalChannelAck);
+ }
+ pr_debug("nf_ct_h323: H.245 Response %d\n",
+ mscm->response.choice);
+ break;
+ default:
+ pr_debug("nf_ct_h323: H.245 signal %d\n", mscm->choice);
+ break;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static MultimediaSystemControlMessage mscm;
+ unsigned char *data = NULL;
+ int datalen;
+ int dataoff;
+ int ret;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+ return NF_ACCEPT;
+ }
+ pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Process each TPKT */
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
+ &data, &datalen, &dataoff)) {
+ pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
+ nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode H.245 signal */
+ ret = DecodeMultimediaSystemControlMessage(data, datalen,
+ &mscm);
+ if (ret < 0) {
+ pr_debug("nf_ct_h245: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ /* We don't drop when decoding error */
+ break;
+ }
+
+ /* Process H.245 signal */
+ if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+ goto drop;
+ }
+
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_h245: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static const struct nf_conntrack_expect_policy h245_exp_policy = {
+ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
+ .timeout = 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
+ .name = "H.245",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_UNSPEC,
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .help = h245_help,
+ .expect_policy = &h245_exp_policy,
+};
+
+/****************************************************************************/
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+ TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 *port)
+{
+ const unsigned char *p;
+ int len;
+
+ switch (taddr->choice) {
+ case eTransportAddress_ipAddress:
+ if (nf_ct_l3num(ct) != AF_INET)
+ return 0;
+ p = data + taddr->ipAddress.ip;
+ len = 4;
+ break;
+ case eTransportAddress_ip6Address:
+ if (nf_ct_l3num(ct) != AF_INET6)
+ return 0;
+ p = data + taddr->ip6Address.ip;
+ len = 16;
+ break;
+ default:
+ return 0;
+ }
+
+ memcpy(addr, p, len);
+ memset((void *)addr + len, 0, sizeof(*addr) - len);
+ memcpy(port, p + len, sizeof(__be16));
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_h245_hook) nat_h245;
+
+ /* Read h245Address */
+ if (!get_h225_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* Create expect for h245 connection */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = &nf_conntrack_helper_h245;
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_h245 = rcu_dereference(nat_h245_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
+ port, exp);
+ } else { /* Conntrack only */
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_q931: expect H.245 ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/* If the calling party is on the same side of the forward-to party,
+ * we don't need to track the second call */
+static int callforward_do_filter(const union nf_inet_addr *src,
+ const union nf_inet_addr *dst,
+ u_int8_t family)
+{
+ const struct nf_afinfo *afinfo;
+ struct flowi fl1, fl2;
+ int ret = 0;
+
+ /* rcu_read_lock()ed by nf_hook_slow() */
+ afinfo = nf_get_afinfo(family);
+ if (!afinfo)
+ return 0;
+
+ memset(&fl1, 0, sizeof(fl1));
+ memset(&fl2, 0, sizeof(fl2));
+
+ switch (family) {
+ case AF_INET: {
+ struct rtable *rt1, *rt2;
+
+ fl1.fl4_dst = src->ip;
+ fl2.fl4_dst = dst->ip;
+ if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
+ if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
+ if (rt1->rt_gateway == rt2->rt_gateway &&
+ rt1->u.dst.dev == rt2->u.dst.dev)
+ ret = 1;
+ dst_release(&rt2->u.dst);
+ }
+ dst_release(&rt1->u.dst);
+ }
+ break;
+ }
+#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
+ defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
+ case AF_INET6: {
+ struct rt6_info *rt1, *rt2;
+
+ memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst));
+ memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst));
+ if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
+ if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
+ if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
+ sizeof(rt1->rt6i_gateway)) &&
+ rt1->u.dst.dev == rt2->u.dst.dev)
+ ret = 1;
+ dst_release(&rt2->u.dst);
+ }
+ dst_release(&rt1->u.dst);
+ }
+ break;
+ }
+#endif
+ }
+ return ret;
+
+}
+
+/****************************************************************************/
+static int expect_callforwarding(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_callforwarding_hook) nat_callforwarding;
+
+ /* Read alternativeAddress */
+ if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0)
+ return 0;
+
+ /* If the calling party is on the same side of the forward-to party,
+ * we don't need to track the second call */
+ if (callforward_filter &&
+ callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3,
+ nf_ct_l3num(ct))) {
+ pr_debug("nf_ct_q931: Call Forwarding not tracked\n");
+ return 0;
+ }
+
+ /* Create expect for the second call leg */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* Need NAT */
+ ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
+ taddr, port, exp);
+ } else { /* Conntrack only */
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_q931: expect Call Forwarding ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Setup_UUIE *setup)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+ typeof(set_h225_addr_hook) set_h225_addr;
+
+ pr_debug("nf_ct_q931: Setup\n");
+
+ if (setup->options & eSetup_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &setup->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
+ (set_h225_addr) && ct->status & IPS_NAT_MASK &&
+ get_h225_addr(ct, *data, &setup->destCallSignalAddress,
+ &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
+ pr_debug("nf_ct_q931: set destCallSignalAddress "
+ NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+ NIP6(*(struct in6_addr *)&addr), ntohs(port),
+ NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
+ ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
+ ret = set_h225_addr(skb, data, dataoff,
+ &setup->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ ct->tuplehash[!dir].tuple.src.u.tcp.port);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
+ (set_h225_addr) && ct->status & IPS_NAT_MASK &&
+ get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
+ &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
+ pr_debug("nf_ct_q931: set sourceCallSignalAddress "
+ NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+ NIP6(*(struct in6_addr *)&addr), ntohs(port),
+ NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
+ ret = set_h225_addr(skb, data, dataoff,
+ &setup->sourceCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.dst.u.tcp.port);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (setup->options & eSetup_UUIE_fastStart) {
+ for (i = 0; i < setup->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &setup->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_callproceeding(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ CallProceeding_UUIE *callproc)
+{
+ int ret;
+ int i;
+
+ pr_debug("nf_ct_q931: CallProceeding\n");
+
+ if (callproc->options & eCallProceeding_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &callproc->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (callproc->options & eCallProceeding_UUIE_fastStart) {
+ for (i = 0; i < callproc->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &callproc->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Connect_UUIE *connect)
+{
+ int ret;
+ int i;
+
+ pr_debug("nf_ct_q931: Connect\n");
+
+ if (connect->options & eConnect_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &connect->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (connect->options & eConnect_UUIE_fastStart) {
+ for (i = 0; i < connect->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &connect->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Alerting_UUIE *alert)
+{
+ int ret;
+ int i;
+
+ pr_debug("nf_ct_q931: Alerting\n");
+
+ if (alert->options & eAlerting_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &alert->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (alert->options & eAlerting_UUIE_fastStart) {
+ for (i = 0; i < alert->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &alert->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Facility_UUIE *facility)
+{
+ int ret;
+ int i;
+
+ pr_debug("nf_ct_q931: Facility\n");
+
+ if (facility->reason.choice == eFacilityReason_callForwarded) {
+ if (facility->options & eFacility_UUIE_alternativeAddress)
+ return expect_callforwarding(skb, ct, ctinfo, data,
+ dataoff,
+ &facility->
+ alternativeAddress);
+ return 0;
+ }
+
+ if (facility->options & eFacility_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &facility->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (facility->options & eFacility_UUIE_fastStart) {
+ for (i = 0; i < facility->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &facility->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Progress_UUIE *progress)
+{
+ int ret;
+ int i;
+
+ pr_debug("nf_ct_q931: Progress\n");
+
+ if (progress->options & eProgress_UUIE_h245Address) {
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ &progress->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (progress->options & eProgress_UUIE_fastStart) {
+ for (i = 0; i < progress->fastStart.count; i++) {
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ &progress->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff, Q931 *q931)
+{
+ H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
+ int i;
+ int ret = 0;
+
+ switch (pdu->h323_message_body.choice) {
+ case eH323_UU_PDU_h323_message_body_setup:
+ ret = process_setup(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.setup);
+ break;
+ case eH323_UU_PDU_h323_message_body_callProceeding:
+ ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.
+ callProceeding);
+ break;
+ case eH323_UU_PDU_h323_message_body_connect:
+ ret = process_connect(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.connect);
+ break;
+ case eH323_UU_PDU_h323_message_body_alerting:
+ ret = process_alerting(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.alerting);
+ break;
+ case eH323_UU_PDU_h323_message_body_facility:
+ ret = process_facility(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.facility);
+ break;
+ case eH323_UU_PDU_h323_message_body_progress:
+ ret = process_progress(skb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.progress);
+ break;
+ default:
+ pr_debug("nf_ct_q931: Q.931 signal %d\n",
+ pdu->h323_message_body.choice);
+ break;
+ }
+
+ if (ret < 0)
+ return -1;
+
+ if (pdu->options & eH323_UU_PDU_h245Control) {
+ for (i = 0; i < pdu->h245Control.count; i++) {
+ ret = process_h245(skb, ct, ctinfo, data, dataoff,
+ &pdu->h245Control.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static Q931 q931;
+ unsigned char *data = NULL;
+ int datalen;
+ int dataoff;
+ int ret;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+ return NF_ACCEPT;
+ }
+ pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Process each TPKT */
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
+ &data, &datalen, &dataoff)) {
+ pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
+ nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode Q.931 signal */
+ ret = DecodeQ931(data, datalen, &q931);
+ if (ret < 0) {
+ pr_debug("nf_ct_q931: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ /* We don't drop when decoding error */
+ break;
+ }
+
+ /* Process Q.931 signal */
+ if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
+ goto drop;
+ }
+
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_q931: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static const struct nf_conntrack_expect_policy q931_exp_policy = {
+ /* T.120 and H.245 */
+ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
+ .timeout = 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
+ {
+ .name = "Q.931",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.tcp.port = __constant_htons(Q931_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .help = q931_help,
+ .expect_policy = &q931_exp_policy,
+ },
+ {
+ .name = "Q.931",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.tcp.port = __constant_htons(Q931_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .help = q931_help,
+ .expect_policy = &q931_exp_policy,
+ },
+};
+
+/****************************************************************************/
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
+ int *datalen)
+{
+ const struct udphdr *uh;
+ struct udphdr _uh;
+ int dataoff;
+
+ uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
+ if (uh == NULL)
+ return NULL;
+ dataoff = protoff + sizeof(_uh);
+ if (dataoff >= skb->len)
+ return NULL;
+ *datalen = skb->len - dataoff;
+ return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
+}
+
+/****************************************************************************/
+static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
+ union nf_inet_addr *addr,
+ __be16 port)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple tuple;
+
+ memset(&tuple.src.u3, 0, sizeof(tuple.src.u3));
+ tuple.src.u.tcp.port = 0;
+ memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3));
+ tuple.dst.u.tcp.port = port;
+ tuple.dst.protonum = IPPROTO_TCP;
+
+ exp = __nf_ct_expect_find(net, &tuple);
+ if (exp && exp->master == ct)
+ return exp;
+ return NULL;
+}
+
+/****************************************************************************/
+static int set_expect_timeout(struct nf_conntrack_expect *exp,
+ unsigned timeout)
+{
+ if (!exp || !del_timer(&exp->timeout))
+ return 0;
+
+ exp->timeout.expires = jiffies + timeout * HZ;
+ add_timer(&exp->timeout);
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_q931_hook) nat_q931;
+
+ /* Look for the first related address */
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3,
+ sizeof(addr)) == 0 && port != 0)
+ break;
+ }
+
+ if (i >= count) /* Not found */
+ return 0;
+
+ /* Create expect for Q.931 */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ gkrouted_only ? /* only accept calls from GK? */
+ &ct->tuplehash[!dir].tuple.src.u3 : NULL,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = nf_conntrack_helper_q931;
+ exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
+
+ nat_q931 = rcu_dereference(nat_q931_hook);
+ if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */
+ ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
+ } else { /* Conntrack only */
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_ras: expect Q.931 ");
+ nf_ct_dump_tuple(&exp->tuple);
+
+ /* Save port for looking up expect in processing RCF */
+ info->sig_port[dir] = port;
+ } else
+ ret = -1;
+ }
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, GatekeeperRequest *grq)
+{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ pr_debug("nf_ct_ras: GRQ\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */
+ return set_ras_addr(skb, ct, ctinfo, data,
+ &grq->rasAddress, 1);
+ return 0;
+}
+
+/****************************************************************************/
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, GatekeeperConfirm *gcf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+
+ pr_debug("nf_ct_ras: GCF\n");
+
+ if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port))
+ return 0;
+
+ /* Registration port is the same as discovery port */
+ if (!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port)
+ return 0;
+
+ /* Avoid RAS expectation loops. A GCF is never expected. */
+ if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+ return 0;
+
+ /* Need new expect */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_UDP, NULL, &port);
+ exp->helper = nf_conntrack_helper_ras;
+
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_ras: expect RAS ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RegistrationRequest *rrq)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ pr_debug("nf_ct_ras: RRQ\n");
+
+ ret = expect_q931(skb, ct, ctinfo, data,
+ rrq->callSignalAddress.item,
+ rrq->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(skb, ct, ctinfo, data,
+ rrq->rasAddress.item,
+ rrq->rasAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (rrq->options & eRegistrationRequest_timeToLive) {
+ pr_debug("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
+ info->timeout = rrq->timeToLive;
+ } else
+ info->timeout = default_rrq_ttl;
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RegistrationConfirm *rcf)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ struct nf_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ pr_debug("nf_ct_ras: RCF\n");
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, data,
+ rcf->callSignalAddress.item,
+ rcf->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (rcf->options & eRegistrationConfirm_timeToLive) {
+ pr_debug("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
+ info->timeout = rcf->timeToLive;
+ }
+
+ if (info->timeout > 0) {
+ pr_debug("nf_ct_ras: set RAS connection timeout to "
+ "%u seconds\n", info->timeout);
+ nf_ct_refresh(ct, skb, info->timeout * HZ);
+
+ /* Set expect timeout */
+ spin_lock_bh(&nf_conntrack_lock);
+ exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ if (exp) {
+ pr_debug("nf_ct_ras: set Q.931 expect "
+ "timeout to %u seconds for",
+ info->timeout);
+ nf_ct_dump_tuple(&exp->tuple);
+ set_expect_timeout(exp, info->timeout);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, UnregistrationRequest *urq)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ pr_debug("nf_ct_ras: URQ\n");
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, data,
+ urq->callSignalAddress.item,
+ urq->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ /* Clear old expect */
+ nf_ct_remove_expectations(ct);
+ info->sig_port[dir] = 0;
+ info->sig_port[!dir] = 0;
+
+ /* Give it 30 seconds for UCF or URJ */
+ nf_ct_refresh(ct, skb, 30 * HZ);
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, AdmissionRequest *arq)
+{
+ const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ __be16 port;
+ union nf_inet_addr addr;
+ typeof(set_h225_addr_hook) set_h225_addr;
+
+ pr_debug("nf_ct_ras: ARQ\n");
+
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
+ get_h225_addr(ct, *data, &arq->destCallSignalAddress,
+ &addr, &port) &&
+ !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ port == info->sig_port[dir] &&
+ set_h225_addr && ct->status & IPS_NAT_MASK) {
+ /* Answering ARQ */
+ return set_h225_addr(skb, data, 0,
+ &arq->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+
+ if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
+ get_h225_addr(ct, *data, &arq->srcCallSignalAddress,
+ &addr, &port) &&
+ !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ set_h225_addr && ct->status & IPS_NAT_MASK) {
+ /* Calling ARQ */
+ return set_h225_addr(skb, data, 0,
+ &arq->srcCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ port);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, AdmissionConfirm *acf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ pr_debug("nf_ct_ras: ACF\n");
+
+ if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress,
+ &addr, &port))
+ return 0;
+
+ if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) {
+ /* Answering ACF */
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK)
+ return set_sig_addr(skb, ct, ctinfo, data,
+ &acf->destCallSignalAddress, 1);
+ return 0;
+ }
+
+ /* Need new expect */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_ras: expect Q.931 ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_ct_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, LocationRequest *lrq)
+{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ pr_debug("nf_ct_ras: LRQ\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK)
+ return set_ras_addr(skb, ct, ctinfo, data,
+ &lrq->replyAddress, 1);
+ return 0;
+}
+
+/****************************************************************************/
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, LocationConfirm *lcf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_inet_addr addr;
+ struct nf_conntrack_expect *exp;
+
+ pr_debug("nf_ct_ras: LCF\n");
+
+ if (!get_h225_addr(ct, *data, &lcf->callSignalAddress,
+ &addr, &port))
+ return 0;
+
+ /* Need new expect for call signal */
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (nf_ct_expect_related(exp) == 0) {
+ pr_debug("nf_ct_ras: expect Q.931 ");
+ nf_ct_dump_tuple(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_ct_expect_put(exp);
+
+ /* Ignore rasAddress */
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, InfoRequestResponse *irr)
+{
+ int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ pr_debug("nf_ct_ras: IRR\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(skb, ct, ctinfo, data,
+ &irr->rasAddress, 1);
+ if (ret < 0)
+ return -1;
+ }
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, data,
+ irr->callSignalAddress.item,
+ irr->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RasMessage *ras)
+{
+ switch (ras->choice) {
+ case eRasMessage_gatekeeperRequest:
+ return process_grq(skb, ct, ctinfo, data,
+ &ras->gatekeeperRequest);
+ case eRasMessage_gatekeeperConfirm:
+ return process_gcf(skb, ct, ctinfo, data,
+ &ras->gatekeeperConfirm);
+ case eRasMessage_registrationRequest:
+ return process_rrq(skb, ct, ctinfo, data,
+ &ras->registrationRequest);
+ case eRasMessage_registrationConfirm:
+ return process_rcf(skb, ct, ctinfo, data,
+ &ras->registrationConfirm);
+ case eRasMessage_unregistrationRequest:
+ return process_urq(skb, ct, ctinfo, data,
+ &ras->unregistrationRequest);
+ case eRasMessage_admissionRequest:
+ return process_arq(skb, ct, ctinfo, data,
+ &ras->admissionRequest);
+ case eRasMessage_admissionConfirm:
+ return process_acf(skb, ct, ctinfo, data,
+ &ras->admissionConfirm);
+ case eRasMessage_locationRequest:
+ return process_lrq(skb, ct, ctinfo, data,
+ &ras->locationRequest);
+ case eRasMessage_locationConfirm:
+ return process_lcf(skb, ct, ctinfo, data,
+ &ras->locationConfirm);
+ case eRasMessage_infoRequestResponse:
+ return process_irr(skb, ct, ctinfo, data,
+ &ras->infoRequestResponse);
+ default:
+ pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
+ break;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static RasMessage ras;
+ unsigned char *data;
+ int datalen = 0;
+ int ret;
+
+ pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Get UDP data */
+ data = get_udp_data(skb, protoff, &datalen);
+ if (data == NULL)
+ goto accept;
+ pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
+ nf_ct_dump_tuple(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode RAS message */
+ ret = DecodeRasMessage(data, datalen, &ras);
+ if (ret < 0) {
+ pr_debug("nf_ct_ras: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ goto accept;
+ }
+
+ /* Process RAS message */
+ if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
+ goto drop;
+
+ accept:
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_ras: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static const struct nf_conntrack_expect_policy ras_exp_policy = {
+ .max_expected = 32,
+ .timeout = 240,
+};
+
+static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
+ {
+ .name = "RAS",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(RAS_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .help = ras_help,
+ .expect_policy = &ras_exp_policy,
+ },
+ {
+ .name = "RAS",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.udp.port = __constant_htons(RAS_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .help = ras_help,
+ .expect_policy = &ras_exp_policy,
+ },
+};
+
+/****************************************************************************/
+static void __exit nf_conntrack_h323_fini(void)
+{
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+ kfree(h323_buffer);
+ pr_debug("nf_ct_h323: fini\n");
+}
+
+/****************************************************************************/
+static int __init nf_conntrack_h323_init(void)
+{
+ int ret;
+
+ h323_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!h323_buffer)
+ return -ENOMEM;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+ if (ret < 0)
+ goto err1;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
+ if (ret < 0)
+ goto err2;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
+ if (ret < 0)
+ goto err3;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
+ if (ret < 0)
+ goto err4;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
+ if (ret < 0)
+ goto err5;
+ pr_debug("nf_ct_h323: init success\n");
+ return 0;
+
+err5:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
+err4:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
+err3:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
+err2:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+err1:
+ kfree(h323_buffer);
+ return ret;
+}
+
+/****************************************************************************/
+module_init(nf_conntrack_h323_init);
+module_exit(nf_conntrack_h323_fini);
+
+EXPORT_SYMBOL_GPL(get_h225_addr);
+EXPORT_SYMBOL_GPL(set_h245_addr_hook);
+EXPORT_SYMBOL_GPL(set_h225_addr_hook);
+EXPORT_SYMBOL_GPL(set_sig_addr_hook);
+EXPORT_SYMBOL_GPL(set_ras_addr_hook);
+EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
+EXPORT_SYMBOL_GPL(nat_t120_hook);
+EXPORT_SYMBOL_GPL(nat_h245_hook);
+EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
+EXPORT_SYMBOL_GPL(nat_q931_hook);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_h323");
diff --git a/net/netfilter/nf_conntrack_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c
new file mode 100644
index 0000000..d880f35
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_types.c
@@ -0,0 +1,1922 @@
+/* Generated by Jing Min Zhao's ASN.1 parser, May 16 2007
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ */
+
+static const struct field_t _TransportAddress_ipAddress[] = { /* SEQUENCE */
+ {FNAME("ip") OCTSTR, FIXD, 4, 0, DECODE,
+ offsetof(TransportAddress_ipAddress, ip), NULL},
+ {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute_route[] = { /* SEQUENCE OF */
+ {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute_routing[] = { /* CHOICE */
+ {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ipSourceRoute[] = { /* SEQUENCE */
+ {FNAME("ip") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+ {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0,
+ _TransportAddress_ipSourceRoute_route},
+ {FNAME("routing") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+ _TransportAddress_ipSourceRoute_routing},
+};
+
+static const struct field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */
+ {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
+ {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+ {FNAME("port") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */
+ {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE,
+ offsetof(TransportAddress_ip6Address, ip), NULL},
+ {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H221NonStandard[] = { /* SEQUENCE */
+ {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _NonStandardIdentifier[] = { /* CHOICE */
+ {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP | EXT, 0,
+ _H221NonStandard},
+};
+
+static const struct field_t _NonStandardParameter[] = { /* SEQUENCE */
+ {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+ _NonStandardIdentifier},
+ {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TransportAddress[] = { /* CHOICE */
+ {FNAME("ipAddress") SEQ, 0, 2, 2, DECODE,
+ offsetof(TransportAddress, ipAddress), _TransportAddress_ipAddress},
+ {FNAME("ipSourceRoute") SEQ, 0, 4, 4, SKIP | EXT, 0,
+ _TransportAddress_ipSourceRoute},
+ {FNAME("ipxAddress") SEQ, 0, 3, 3, SKIP, 0,
+ _TransportAddress_ipxAddress},
+ {FNAME("ip6Address") SEQ, 0, 2, 2, DECODE | EXT,
+ offsetof(TransportAddress, ip6Address),
+ _TransportAddress_ip6Address},
+ {FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0,
+ _NonStandardParameter},
+};
+
+static const struct field_t _AliasAddress[] = { /* CHOICE */
+ {FNAME("dialedDigits") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("h323-ID") BMPSTR, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("url-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("transportID") CHOICE, 3, 7, 7, SKIP | EXT, 0, NULL},
+ {FNAME("email-ID") IA5STR, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("partyNumber") CHOICE, 3, 5, 5, SKIP | EXT, 0, NULL},
+ {FNAME("mobileUIM") CHOICE, 1, 2, 2, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_sourceAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _VendorIdentifier[] = { /* SEQUENCE */
+ {FNAME("vendor") SEQ, 0, 3, 3, SKIP | EXT, 0, _H221NonStandard},
+ {FNAME("productId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("versionId") OCTSTR, BYTE, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperInfo[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+};
+
+static const struct field_t _H310Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H320Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H321Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H322Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H323Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H324Caps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _VoiceCaps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T120OnlyCaps[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("dataRatesSupported") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SupportedProtocols[] = { /* CHOICE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP, 0,
+ _NonStandardParameter},
+ {FNAME("h310") SEQ, 1, 1, 3, SKIP | EXT, 0, _H310Caps},
+ {FNAME("h320") SEQ, 1, 1, 3, SKIP | EXT, 0, _H320Caps},
+ {FNAME("h321") SEQ, 1, 1, 3, SKIP | EXT, 0, _H321Caps},
+ {FNAME("h322") SEQ, 1, 1, 3, SKIP | EXT, 0, _H322Caps},
+ {FNAME("h323") SEQ, 1, 1, 3, SKIP | EXT, 0, _H323Caps},
+ {FNAME("h324") SEQ, 1, 1, 3, SKIP | EXT, 0, _H324Caps},
+ {FNAME("voice") SEQ, 1, 1, 3, SKIP | EXT, 0, _VoiceCaps},
+ {FNAME("t120-only") SEQ, 1, 1, 3, SKIP | EXT, 0, _T120OnlyCaps},
+ {FNAME("nonStandardProtocol") SEQ, 2, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("t38FaxAnnexbOnly") SEQ, 2, 5, 5, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _GatewayInfo_protocol[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 4, 9, 11, SKIP | EXT, 0, _SupportedProtocols},
+};
+
+static const struct field_t _GatewayInfo[] = { /* SEQUENCE */
+ {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _GatewayInfo_protocol},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+};
+
+static const struct field_t _McuInfo[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("protocol") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _TerminalInfo[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+};
+
+static const struct field_t _EndpointType[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("vendor") SEQ, 2, 3, 3, SKIP | EXT | OPT, 0,
+ _VendorIdentifier},
+ {FNAME("gatekeeper") SEQ, 1, 1, 1, SKIP | EXT | OPT, 0,
+ _GatekeeperInfo},
+ {FNAME("gateway") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, _GatewayInfo},
+ {FNAME("mcu") SEQ, 1, 1, 2, SKIP | EXT | OPT, 0, _McuInfo},
+ {FNAME("terminal") SEQ, 1, 1, 1, SKIP | EXT | OPT, 0, _TerminalInfo},
+ {FNAME("mc") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("undefinedNode") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("set") BITSTR, FIXD, 32, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedTunnelledProtocols") SEQOF, SEMI, 0, 0, SKIP | OPT,
+ 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_destinationAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _Setup_UUIE_destExtraCallInfo[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _Setup_UUIE_destExtraCRV[] = { /* SEQUENCE OF */
+ {FNAME("item") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_conferenceGoal[] = { /* CHOICE */
+ {FNAME("create") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("join") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("invite") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("capability-negotiation") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("callIndependentSupplementaryService") NUL, FIXD, 0, 0, SKIP,
+ 0, NULL},
+};
+
+static const struct field_t _Q954Details[] = { /* SEQUENCE */
+ {FNAME("conferenceCalling") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("threePartyService") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _QseriesOptions[] = { /* SEQUENCE */
+ {FNAME("q932Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q951Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q952Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q953Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q955Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q956Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q957Full") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("q954Info") SEQ, 0, 2, 2, SKIP | EXT, 0, _Q954Details},
+};
+
+static const struct field_t _CallType[] = { /* CHOICE */
+ {FNAME("pointToPoint") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("oneToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("nToOne") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("nToN") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_NonStandardIdentifier_h221NonStandard[] = { /* SEQUENCE */
+ {FNAME("t35CountryCode") INT, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("t35Extension") INT, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("manufacturerCode") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_NonStandardIdentifier[] = { /* CHOICE */
+ {FNAME("object") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("h221NonStandard") SEQ, 0, 3, 3, SKIP, 0,
+ _H245_NonStandardIdentifier_h221NonStandard},
+};
+
+static const struct field_t _H245_NonStandardParameter[] = { /* SEQUENCE */
+ {FNAME("nonStandardIdentifier") CHOICE, 1, 2, 2, SKIP, 0,
+ _H245_NonStandardIdentifier},
+ {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H261VideoCapability[] = { /* SEQUENCE */
+ {FNAME("qcifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cifMPI") INT, 2, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0,
+ NULL},
+ {FNAME("maxBitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("stillImageTransmission") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H262VideoCapability[] = { /* SEQUENCE */
+ {FNAME("profileAndLevel-SPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-MPatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-MPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-MPatH-14") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-MPatHL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-SNRatLL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-SNRatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-SpatialatH-14") BOOL, FIXD, 0, 0, SKIP, 0,
+ NULL},
+ {FNAME("profileAndLevel-HPatML") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-HPatH-14") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("profileAndLevel-HPatHL") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("samplesPerLine") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("linesPerFrame") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("framesPerSecond") INT, 4, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("luminanceSampleRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H263VideoCapability[] = { /* SEQUENCE */
+ {FNAME("sqcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("qcifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cifMPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cif4MPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cif16MPI") INT, 5, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("maxBitRate") INT, CONS, 1, 0, SKIP, 0, NULL},
+ {FNAME("unrestrictedVector") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("arithmeticCoding") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("advancedPrediction") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("pbFrames") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("temporalSpatialTradeOffCapability") BOOL, FIXD, 0, 0, SKIP, 0,
+ NULL},
+ {FNAME("hrd-B") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("bppMaxKb") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("slowSqcifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("slowQcifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("slowCifMPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("slowCif4MPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("slowCif16MPI") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("errorCompensation") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("enhancementLayerInfo") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("h263Options") SEQ, 5, 29, 31, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _IS11172VideoCapability[] = { /* SEQUENCE */
+ {FNAME("constrainedBitstream") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("videoBitRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("vbvBufferSize") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("samplesPerLine") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("linesPerFrame") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("pictureRate") INT, 4, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("luminanceSampleRate") INT, CONS, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("videoBadMBsCap") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _VideoCapability[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("h261VideoCapability") SEQ, 2, 5, 6, SKIP | EXT, 0,
+ _H261VideoCapability},
+ {FNAME("h262VideoCapability") SEQ, 6, 17, 18, SKIP | EXT, 0,
+ _H262VideoCapability},
+ {FNAME("h263VideoCapability") SEQ, 7, 13, 21, SKIP | EXT, 0,
+ _H263VideoCapability},
+ {FNAME("is11172VideoCapability") SEQ, 6, 7, 8, SKIP | EXT, 0,
+ _IS11172VideoCapability},
+ {FNAME("genericVideoCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _AudioCapability_g7231[] = { /* SEQUENCE */
+ {FNAME("maxAl-sduAudioFrames") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("silenceSuppression") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _IS11172AudioCapability[] = { /* SEQUENCE */
+ {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling32k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling44k1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling48k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("singleChannel") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("twoChannels") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _IS13818AudioCapability[] = { /* SEQUENCE */
+ {FNAME("audioLayer1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioLayer2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioLayer3") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling16k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling22k05") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling24k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling32k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling44k1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("audioSampling48k") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("singleChannel") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("twoChannels") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("threeChannels2-1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("threeChannels3-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fourChannels2-0-2-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fourChannels2-2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fourChannels3-1") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fiveChannels3-0-2-0") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fiveChannels3-2") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("lowFrequencyEnhancement") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("multilingual") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("bitRate") INT, WORD, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _AudioCapability[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("g711Alaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g711Alaw56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g711Ulaw64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g711Ulaw56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g722-64k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g722-56k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g722-48k") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g7231") SEQ, 0, 2, 2, SKIP, 0, _AudioCapability_g7231},
+ {FNAME("g728") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g729") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g729AnnexA") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("is11172AudioCapability") SEQ, 0, 9, 9, SKIP | EXT, 0,
+ _IS11172AudioCapability},
+ {FNAME("is13818AudioCapability") SEQ, 0, 21, 21, SKIP | EXT, 0,
+ _IS13818AudioCapability},
+ {FNAME("g729wAnnexB") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g729AnnexAwAnnexB") INT, BYTE, 1, 0, SKIP, 0, NULL},
+ {FNAME("g7231AnnexCCapability") SEQ, 1, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("gsmFullRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("gsmHalfRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("gsmEnhancedFullRate") SEQ, 0, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("genericAudioCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+ {FNAME("g729Extensions") SEQ, 1, 8, 8, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _DataProtocolCapability[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("v14buffered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("v42lapm") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("hdlcFrameTunnelling") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("h310SeparateVCStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("h310SingleVCStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("transparent") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("segmentationAndReassembly") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("hdlcFrameTunnelingwSAR") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("v120") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("separateLANStack") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("v76wCompression") CHOICE, 2, 3, 3, SKIP | EXT, 0, NULL},
+ {FNAME("tcp") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("udp") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T84Profile_t84Restricted[] = { /* SEQUENCE */
+ {FNAME("qcif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("cif") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("ccir601Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("ccir601Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("hdtvSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("hdtvProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("g3FacsMH200x100") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("g3FacsMH200x200") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("g4FacsMMR200x100") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("g4FacsMMR200x200") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("jbig200x200Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("jbig200x200Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("jbig300x300Seq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("jbig300x300Prog") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("digPhotoLow") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("digPhotoMedSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("digPhotoMedProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("digPhotoHighSeq") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("digPhotoHighProg") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _T84Profile[] = { /* CHOICE */
+ {FNAME("t84Unrestricted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("t84Restricted") SEQ, 0, 19, 19, SKIP | EXT, 0,
+ _T84Profile_t84Restricted},
+};
+
+static const struct field_t _DataApplicationCapability_application_t84[] = { /* SEQUENCE */
+ {FNAME("t84Protocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("t84Profile") CHOICE, 1, 2, 2, SKIP, 0, _T84Profile},
+};
+
+static const struct field_t _DataApplicationCapability_application_nlpid[] = { /* SEQUENCE */
+ {FNAME("nlpidProtocol") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("nlpidData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _DataApplicationCapability_application[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("t120") CHOICE, 3, 7, 14, DECODE | EXT,
+ offsetof(DataApplicationCapability_application, t120),
+ _DataProtocolCapability},
+ {FNAME("dsm-cc") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("userData") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("t84") SEQ, 0, 2, 2, SKIP, 0,
+ _DataApplicationCapability_application_t84},
+ {FNAME("t434") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("h224") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("nlpid") SEQ, 0, 2, 2, SKIP, 0,
+ _DataApplicationCapability_application_nlpid},
+ {FNAME("dsvdControl") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("h222DataPartitioning") CHOICE, 3, 7, 14, SKIP | EXT, 0,
+ _DataProtocolCapability},
+ {FNAME("t30fax") CHOICE, 3, 7, 14, SKIP | EXT, 0, NULL},
+ {FNAME("t140") CHOICE, 3, 7, 14, SKIP | EXT, 0, NULL},
+ {FNAME("t38fax") SEQ, 0, 2, 2, SKIP, 0, NULL},
+ {FNAME("genericDataCapability") SEQ, 5, 6, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _DataApplicationCapability[] = { /* SEQUENCE */
+ {FNAME("application") CHOICE, 4, 10, 14, DECODE | EXT,
+ offsetof(DataApplicationCapability, application),
+ _DataApplicationCapability_application},
+ {FNAME("maxBitRate") INT, CONS, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _EncryptionMode[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("h233Encryption") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _DataType[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("nullData") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("videoData") CHOICE, 3, 5, 6, SKIP | EXT, 0, _VideoCapability},
+ {FNAME("audioData") CHOICE, 4, 14, 22, SKIP | EXT, 0,
+ _AudioCapability},
+ {FNAME("data") SEQ, 0, 2, 2, DECODE | EXT, offsetof(DataType, data),
+ _DataApplicationCapability},
+ {FNAME("encryptionData") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+ _EncryptionMode},
+ {FNAME("h235Control") SEQ, 0, 2, 2, SKIP, 0, NULL},
+ {FNAME("h235Media") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+ {FNAME("multiplexedStream") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _H222LogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("resourceID") INT, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("subChannelID") INT, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("pcr-pid") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("programDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("streamDescriptors") OCTSTR, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters_adaptationLayerType_al3[] = { /* SEQUENCE */
+ {FNAME("controlFieldOctets") INT, 2, 0, 0, SKIP, 0, NULL},
+ {FNAME("sendBufferSize") INT, CONS, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters_adaptationLayerType[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0,
+ _H245_NonStandardParameter},
+ {FNAME("al1Framed") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("al1NotFramed") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("al2WithoutSequenceNumbers") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("al2WithSequenceNumbers") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("al3") SEQ, 0, 2, 2, SKIP, 0,
+ _H223LogicalChannelParameters_adaptationLayerType_al3},
+ {FNAME("al1M") SEQ, 0, 7, 8, SKIP | EXT, 0, NULL},
+ {FNAME("al2M") SEQ, 0, 2, 2, SKIP | EXT, 0, NULL},
+ {FNAME("al3M") SEQ, 0, 5, 6, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _H223LogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("adaptationLayerType") CHOICE, 3, 6, 9, SKIP | EXT, 0,
+ _H223LogicalChannelParameters_adaptationLayerType},
+ {FNAME("segmentableFlag") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CRCLength[] = { /* CHOICE */
+ {FNAME("crc8bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("crc16bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("crc32bit") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76HDLCParameters[] = { /* SEQUENCE */
+ {FNAME("crcLength") CHOICE, 2, 3, 3, SKIP | EXT, 0, _CRCLength},
+ {FNAME("n401") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("loopbackTestProcedure") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_suspendResume[] = { /* CHOICE */
+ {FNAME("noSuspendResume") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("suspendResumewAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("suspendResumewoAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode_eRM_recovery[] = { /* CHOICE */
+ {FNAME("rej") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("sREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("mSREJ") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode_eRM[] = { /* SEQUENCE */
+ {FNAME("windowSize") INT, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("recovery") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+ _V76LogicalChannelParameters_mode_eRM_recovery},
+};
+
+static const struct field_t _V76LogicalChannelParameters_mode[] = { /* CHOICE */
+ {FNAME("eRM") SEQ, 0, 2, 2, SKIP | EXT, 0,
+ _V76LogicalChannelParameters_mode_eRM},
+ {FNAME("uNERM") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V75Parameters[] = { /* SEQUENCE */
+ {FNAME("audioHeaderPresent") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _V76LogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("hdlcParameters") SEQ, 0, 3, 3, SKIP | EXT, 0,
+ _V76HDLCParameters},
+ {FNAME("suspendResume") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+ _V76LogicalChannelParameters_suspendResume},
+ {FNAME("uIH") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("mode") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+ _V76LogicalChannelParameters_mode},
+ {FNAME("v75Parameters") SEQ, 0, 1, 1, SKIP | EXT, 0, _V75Parameters},
+};
+
+static const struct field_t _H2250LogicalChannelParameters_nonStandard[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
+};
+
+static const struct field_t _UnicastAddress_iPAddress[] = { /* SEQUENCE */
+ {FNAME("network") OCTSTR, FIXD, 4, 0, DECODE,
+ offsetof(UnicastAddress_iPAddress, network), NULL},
+ {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPXAddress[] = { /* SEQUENCE */
+ {FNAME("node") OCTSTR, FIXD, 6, 0, SKIP, 0, NULL},
+ {FNAME("netnum") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+ {FNAME("tsapIdentifier") OCTSTR, FIXD, 2, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iP6Address[] = { /* SEQUENCE */
+ {FNAME("network") OCTSTR, FIXD, 16, 0, DECODE,
+ offsetof(UnicastAddress_iP6Address, network), NULL},
+ {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress_routing[] = { /* CHOICE */
+ {FNAME("strict") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("loose") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress_route[] = { /* SEQUENCE OF */
+ {FNAME("item") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _UnicastAddress_iPSourceRouteAddress[] = { /* SEQUENCE */
+ {FNAME("routing") CHOICE, 1, 2, 2, SKIP, 0,
+ _UnicastAddress_iPSourceRouteAddress_routing},
+ {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+ {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("route") SEQOF, SEMI, 0, 0, SKIP, 0,
+ _UnicastAddress_iPSourceRouteAddress_route},
+};
+
+static const struct field_t _UnicastAddress[] = { /* CHOICE */
+ {FNAME("iPAddress") SEQ, 0, 2, 2, DECODE | EXT,
+ offsetof(UnicastAddress, iPAddress), _UnicastAddress_iPAddress},
+ {FNAME("iPXAddress") SEQ, 0, 3, 3, SKIP | EXT, 0,
+ _UnicastAddress_iPXAddress},
+ {FNAME("iP6Address") SEQ, 0, 2, 2, DECODE | EXT,
+ offsetof(UnicastAddress, iP6Address), _UnicastAddress_iP6Address},
+ {FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("iPSourceRouteAddress") SEQ, 0, 4, 4, SKIP | EXT, 0,
+ _UnicastAddress_iPSourceRouteAddress},
+ {FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress_iPAddress[] = { /* SEQUENCE */
+ {FNAME("network") OCTSTR, FIXD, 4, 0, SKIP, 0, NULL},
+ {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress_iP6Address[] = { /* SEQUENCE */
+ {FNAME("network") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _MulticastAddress[] = { /* CHOICE */
+ {FNAME("iPAddress") SEQ, 0, 2, 2, SKIP | EXT, 0,
+ _MulticastAddress_iPAddress},
+ {FNAME("iP6Address") SEQ, 0, 2, 2, SKIP | EXT, 0,
+ _MulticastAddress_iP6Address},
+ {FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, NULL},
+};
+
+static const struct field_t _H245_TransportAddress[] = { /* CHOICE */
+ {FNAME("unicastAddress") CHOICE, 3, 5, 7, DECODE | EXT,
+ offsetof(H245_TransportAddress, unicastAddress), _UnicastAddress},
+ {FNAME("multicastAddress") CHOICE, 1, 2, 4, SKIP | EXT, 0,
+ _MulticastAddress},
+};
+
+static const struct field_t _H2250LogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _H2250LogicalChannelParameters_nonStandard},
+ {FNAME("sessionID") INT, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("associatedSessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("mediaChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+ offsetof(H2250LogicalChannelParameters, mediaChannel),
+ _H245_TransportAddress},
+ {FNAME("mediaGuaranteedDelivery") BOOL, FIXD, 0, 0, SKIP | OPT, 0,
+ NULL},
+ {FNAME("mediaControlChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+ offsetof(H2250LogicalChannelParameters, mediaControlChannel),
+ _H245_TransportAddress},
+ {FNAME("mediaControlGuaranteedDelivery") BOOL, FIXD, 0, 0, STOP | OPT,
+ 0, NULL},
+ {FNAME("silenceSuppression") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destination") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("dynamicRTPPayloadType") INT, 5, 96, 0, STOP | OPT, 0, NULL},
+ {FNAME("mediaPacketization") CHOICE, 0, 1, 2, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("transportCapability") SEQ, 3, 3, 3, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("redundancyEncoding") SEQ, 1, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("source") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
+ {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
+ _H222LogicalChannelParameters},
+ {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
+ _H223LogicalChannelParameters},
+ {FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0,
+ _V76LogicalChannelParameters},
+ {FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+ offsetof
+ (OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters,
+ h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+ {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_forwardLogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("dataType") CHOICE, 3, 6, 9, DECODE | EXT,
+ offsetof(OpenLogicalChannel_forwardLogicalChannelParameters,
+ dataType), _DataType},
+ {FNAME("multiplexParameters") CHOICE, 2, 3, 5, DECODE | EXT,
+ offsetof(OpenLogicalChannel_forwardLogicalChannelParameters,
+ multiplexParameters),
+ _OpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters},
+ {FNAME("forwardLogicalChannelDependency") INT, WORD, 1, 0, SKIP | OPT,
+ 0, NULL},
+ {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
+ {FNAME("h223LogicalChannelParameters") SEQ, 0, 2, 2, SKIP | EXT, 0,
+ _H223LogicalChannelParameters},
+ {FNAME("v76LogicalChannelParameters") SEQ, 0, 5, 5, SKIP | EXT, 0,
+ _V76LogicalChannelParameters},
+ {FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+ offsetof
+ (OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters,
+ h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+};
+
+static const struct field_t _OpenLogicalChannel_reverseLogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("dataType") CHOICE, 3, 6, 9, SKIP | EXT, 0, _DataType},
+ {FNAME("multiplexParameters") CHOICE, 1, 2, 3, DECODE | EXT | OPT,
+ offsetof(OpenLogicalChannel_reverseLogicalChannelParameters,
+ multiplexParameters),
+ _OpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters},
+ {FNAME("reverseLogicalChannelDependency") INT, WORD, 1, 0, SKIP | OPT,
+ 0, NULL},
+ {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _NetworkAccessParameters_distribution[] = { /* CHOICE */
+ {FNAME("unicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("multicast") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Q2931Address_address[] = { /* CHOICE */
+ {FNAME("internationalNumber") NUMSTR, 4, 1, 0, SKIP, 0, NULL},
+ {FNAME("nsapAddress") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Q2931Address[] = { /* SEQUENCE */
+ {FNAME("address") CHOICE, 1, 2, 2, SKIP | EXT, 0,
+ _Q2931Address_address},
+ {FNAME("subaddress") OCTSTR, 5, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _NetworkAccessParameters_networkAddress[] = { /* CHOICE */
+ {FNAME("q2931Address") SEQ, 1, 2, 2, SKIP | EXT, 0, _Q2931Address},
+ {FNAME("e164Address") NUMDGT, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("localAreaAddress") CHOICE, 1, 2, 2, DECODE | EXT,
+ offsetof(NetworkAccessParameters_networkAddress, localAreaAddress),
+ _H245_TransportAddress},
+};
+
+static const struct field_t _NetworkAccessParameters[] = { /* SEQUENCE */
+ {FNAME("distribution") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0,
+ _NetworkAccessParameters_distribution},
+ {FNAME("networkAddress") CHOICE, 2, 3, 3, DECODE | EXT,
+ offsetof(NetworkAccessParameters, networkAddress),
+ _NetworkAccessParameters_networkAddress},
+ {FNAME("associateConference") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("externalReference") OCTSTR, 8, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("t120SetupProcedure") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+};
+
+static const struct field_t _OpenLogicalChannel[] = { /* SEQUENCE */
+ {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("forwardLogicalChannelParameters") SEQ, 1, 3, 5, DECODE | EXT,
+ offsetof(OpenLogicalChannel, forwardLogicalChannelParameters),
+ _OpenLogicalChannel_forwardLogicalChannelParameters},
+ {FNAME("reverseLogicalChannelParameters") SEQ, 1, 2, 4,
+ DECODE | EXT | OPT, offsetof(OpenLogicalChannel,
+ reverseLogicalChannelParameters),
+ _OpenLogicalChannel_reverseLogicalChannelParameters},
+ {FNAME("separateStack") SEQ, 2, 4, 5, DECODE | EXT | OPT,
+ offsetof(OpenLogicalChannel, separateStack),
+ _NetworkAccessParameters},
+ {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Setup_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _Setup_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Setup_UUIE, h245Address), _TransportAddress},
+ {FNAME("sourceAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Setup_UUIE_sourceAddress},
+ {FNAME("sourceInfo") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+ {FNAME("destinationAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Setup_UUIE_destinationAddress},
+ {FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Setup_UUIE, destCallSignalAddress), _TransportAddress},
+ {FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Setup_UUIE_destExtraCallInfo},
+ {FNAME("destExtraCRV") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Setup_UUIE_destExtraCRV},
+ {FNAME("activeMC") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("conferenceGoal") CHOICE, 2, 3, 5, SKIP | EXT, 0,
+ _Setup_UUIE_conferenceGoal},
+ {FNAME("callServices") SEQ, 0, 8, 8, SKIP | EXT | OPT, 0,
+ _QseriesOptions},
+ {FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType},
+ {FNAME("sourceCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Setup_UUIE, sourceCallSignalAddress), _TransportAddress},
+ {FNAME("remoteExtensionAddress") CHOICE, 1, 2, 7, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("h245SecurityCapability") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(Setup_UUIE, fastStart), _Setup_UUIE_fastStart},
+ {FNAME("mediaWaitForConnect") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("canOverlapSend") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("connectionParameters") SEQ, 0, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("language") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("symmetricOperationRequired") NUL, FIXD, 0, 0, SKIP | OPT, 0,
+ NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("neededFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("desiredFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("supportedFeatures") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("parallelH245Control") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("additionalSourceAddresses") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ NULL},
+};
+
+static const struct field_t _CallProceeding_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _CallProceeding_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+ _EndpointType},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(CallProceeding_UUIE, h245Address), _TransportAddress},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(CallProceeding_UUIE, fastStart),
+ _CallProceeding_UUIE_fastStart},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Connect_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _Connect_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Connect_UUIE, h245Address), _TransportAddress},
+ {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+ _EndpointType},
+ {FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(Connect_UUIE, fastStart), _Connect_UUIE_fastStart},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("language") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("connectedAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Alerting_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _Alerting_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+ _EndpointType},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Alerting_UUIE, h245Address), _TransportAddress},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(Alerting_UUIE, fastStart), _Alerting_UUIE_fastStart},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("alertingAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Information_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, SKIP | OPT, 0, NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _ReleaseCompleteReason[] = { /* CHOICE */
+ {FNAME("noBandwidth") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("gatekeeperResources") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("unreachableDestination") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("destinationRejection") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("invalidRevision") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("noPermission") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("unreachableGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("gatewayResources") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("badFormatAddress") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("adaptiveBusy") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("inConf") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("undefinedReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("facilityCallDeflection") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("securityDenied") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("calledPartyNotRegistered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("callerNotRegistered") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("newConnectionNeeded") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardReason") SEQ, 0, 2, 2, SKIP, 0, NULL},
+ {FNAME("replaceWithConferenceInvite") OCTSTR, FIXD, 16, 0, SKIP, 0,
+ NULL},
+ {FNAME("genericDataReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("neededFeatureNotSupported") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("tunnelledSignallingRejected") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _ReleaseComplete_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("reason") CHOICE, 4, 12, 22, SKIP | EXT | OPT, 0,
+ _ReleaseCompleteReason},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("busyAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("presentationIndicator") CHOICE, 2, 3, 3, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("screeningIndicator") ENUM, 2, 0, 0, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _Facility_UUIE_alternativeAliasAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _FacilityReason[] = { /* CHOICE */
+ {FNAME("routeCallToGatekeeper") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("callForwarded") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("routeCallToMC") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("undefinedReason") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("conferenceListChoice") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("startH245") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("noH245") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("newTokens") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("featureSetUpdate") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("forwardedElements") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("transportedInformation") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _Facility_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _Facility_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("alternativeAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Facility_UUIE, alternativeAddress), _TransportAddress},
+ {FNAME("alternativeAliasAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Facility_UUIE_alternativeAliasAddress},
+ {FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL},
+ {FNAME("reason") CHOICE, 2, 4, 11, DECODE | EXT,
+ offsetof(Facility_UUIE, reason), _FacilityReason},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL},
+ {FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("remoteExtensionAddress") CHOICE, 1, 2, 7, SKIP | EXT | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("conferences") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Facility_UUIE, h245Address), _TransportAddress},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(Facility_UUIE, fastStart), _Facility_UUIE_fastStart},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+ NULL},
+};
+
+static const struct field_t _CallIdentifier[] = { /* SEQUENCE */
+ {FNAME("guid") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SecurityServiceMode[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
+ {FNAME("none") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("default") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _SecurityCapabilities[] = { /* SEQUENCE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("encryption") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+ _SecurityServiceMode},
+ {FNAME("authenticaton") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+ _SecurityServiceMode},
+ {FNAME("integrity") CHOICE, 2, 3, 3, SKIP | EXT, 0,
+ _SecurityServiceMode},
+};
+
+static const struct field_t _H245Security[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP, 0, _NonStandardParameter},
+ {FNAME("noSecurity") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("tls") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
+ {FNAME("ipsec") SEQ, 1, 4, 4, SKIP | EXT, 0, _SecurityCapabilities},
+};
+
+static const struct field_t _DHset[] = { /* SEQUENCE */
+ {FNAME("halfkey") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("modSize") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+ {FNAME("generator") BITSTR, WORD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _TypedCertificate[] = { /* SEQUENCE */
+ {FNAME("type") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("certificate") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _H235_NonStandardParameter[] = { /* SEQUENCE */
+ {FNAME("nonStandardIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("data") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _ClearToken[] = { /* SEQUENCE */
+ {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("timeStamp") INT, CONS, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("password") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("dhkey") SEQ, 0, 3, 3, SKIP | EXT | OPT, 0, _DHset},
+ {FNAME("challenge") OCTSTR, 7, 8, 0, SKIP | OPT, 0, NULL},
+ {FNAME("random") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("certificate") SEQ, 0, 2, 2, SKIP | EXT | OPT, 0,
+ _TypedCertificate},
+ {FNAME("generalID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("nonStandard") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _H235_NonStandardParameter},
+ {FNAME("eckasdhkey") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, NULL},
+ {FNAME("sendersID") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _Progress_UUIE_tokens[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
+};
+
+static const struct field_t _Params[] = { /* SEQUENCE */
+ {FNAME("ranInt") INT, UNCO, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("iv8") OCTSTR, FIXD, 8, 0, SKIP | OPT, 0, NULL},
+ {FNAME("iv16") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdHash_token[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdHash[] = { /* SEQUENCE */
+ {FNAME("alias") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+ {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
+ {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoEPPwdHash_token},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdHash_token[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdHash[] = { /* SEQUENCE */
+ {FNAME("gatekeeperId") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("timeStamp") INT, CONS, 1, 0, SKIP, 0, NULL},
+ {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoGKPwdHash_token},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPPwdEncr[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKPwdEncr[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoEPCert[] = { /* SEQUENCE */
+ {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoGKCert[] = { /* SEQUENCE */
+ {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoH323Token_cryptoFastStart[] = { /* SEQUENCE */
+ {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoEncryptedToken_token[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoEncryptedToken[] = { /* SEQUENCE */
+ {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoToken_cryptoEncryptedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoSignedToken_token[] = { /* SEQUENCE */
+ {FNAME("toBeSigned") SEQ, 8, 9, 11, SKIP | OPEN | EXT, 0, NULL},
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("signature") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoSignedToken[] = { /* SEQUENCE */
+ {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("token") SEQ, 0, 4, 4, SKIP, 0,
+ _CryptoToken_cryptoSignedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoHashedToken_token[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("hash") BITSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken_cryptoHashedToken[] = { /* SEQUENCE */
+ {FNAME("tokenOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("hashedVals") SEQ, 8, 9, 11, SKIP | EXT, 0, _ClearToken},
+ {FNAME("token") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoToken_cryptoHashedToken_token},
+};
+
+static const struct field_t _CryptoToken_cryptoPwdEncr[] = { /* SEQUENCE */
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("paramS") SEQ, 2, 2, 3, SKIP | EXT, 0, _Params},
+ {FNAME("encryptedData") OCTSTR, SEMI, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _CryptoToken[] = { /* CHOICE */
+ {FNAME("cryptoEncryptedToken") SEQ, 0, 2, 2, SKIP, 0,
+ _CryptoToken_cryptoEncryptedToken},
+ {FNAME("cryptoSignedToken") SEQ, 0, 2, 2, SKIP, 0,
+ _CryptoToken_cryptoSignedToken},
+ {FNAME("cryptoHashedToken") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoToken_cryptoHashedToken},
+ {FNAME("cryptoPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoToken_cryptoPwdEncr},
+};
+
+static const struct field_t _CryptoH323Token[] = { /* CHOICE */
+ {FNAME("cryptoEPPwdHash") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoEPPwdHash},
+ {FNAME("cryptoGKPwdHash") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoGKPwdHash},
+ {FNAME("cryptoEPPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoEPPwdEncr},
+ {FNAME("cryptoGKPwdEncr") SEQ, 0, 3, 3, SKIP, 0,
+ _CryptoH323Token_cryptoGKPwdEncr},
+ {FNAME("cryptoEPCert") SEQ, 0, 4, 4, SKIP, 0,
+ _CryptoH323Token_cryptoEPCert},
+ {FNAME("cryptoGKCert") SEQ, 0, 4, 4, SKIP, 0,
+ _CryptoH323Token_cryptoGKCert},
+ {FNAME("cryptoFastStart") SEQ, 0, 4, 4, SKIP, 0,
+ _CryptoH323Token_cryptoFastStart},
+ {FNAME("nestedcryptoToken") CHOICE, 2, 4, 4, SKIP | EXT, 0,
+ _CryptoToken},
+};
+
+static const struct field_t _Progress_UUIE_cryptoTokens[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 8, 8, SKIP | EXT, 0, _CryptoH323Token},
+};
+
+static const struct field_t _Progress_UUIE_fastStart[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT,
+ sizeof(OpenLogicalChannel), _OpenLogicalChannel}
+ ,
+};
+
+static const struct field_t _Progress_UUIE[] = { /* SEQUENCE */
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("destinationInfo") SEQ, 6, 8, 10, SKIP | EXT, 0,
+ _EndpointType},
+ {FNAME("h245Address") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(Progress_UUIE, h245Address), _TransportAddress},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0,
+ _CallIdentifier},
+ {FNAME("h245SecurityMode") CHOICE, 2, 4, 4, SKIP | EXT | OPT, 0,
+ _H245Security},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Progress_UUIE_tokens},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _Progress_UUIE_cryptoTokens},
+ {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT,
+ offsetof(Progress_UUIE, fastStart), _Progress_UUIE_fastStart},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H323_UU_PDU_h323_message_body[] = { /* CHOICE */
+ {FNAME("setup") SEQ, 7, 13, 39, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, setup), _Setup_UUIE},
+ {FNAME("callProceeding") SEQ, 1, 3, 12, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, callProceeding),
+ _CallProceeding_UUIE},
+ {FNAME("connect") SEQ, 1, 4, 19, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, connect), _Connect_UUIE},
+ {FNAME("alerting") SEQ, 1, 3, 17, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, alerting), _Alerting_UUIE},
+ {FNAME("information") SEQ, 0, 1, 7, SKIP | EXT, 0, _Information_UUIE},
+ {FNAME("releaseComplete") SEQ, 1, 2, 11, SKIP | EXT, 0,
+ _ReleaseComplete_UUIE},
+ {FNAME("facility") SEQ, 3, 5, 21, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, facility), _Facility_UUIE},
+ {FNAME("progress") SEQ, 5, 8, 11, DECODE | EXT,
+ offsetof(H323_UU_PDU_h323_message_body, progress), _Progress_UUIE},
+ {FNAME("empty") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("status") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+ {FNAME("statusInquiry") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+ {FNAME("setupAcknowledge") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+ {FNAME("notify") SEQ, 2, 4, 4, SKIP | EXT, 0, NULL},
+};
+
+static const struct field_t _RequestMessage[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("masterSlaveDetermination") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("terminalCapabilitySet") SEQ, 3, 5, 5, STOP | EXT, 0, NULL},
+ {FNAME("openLogicalChannel") SEQ, 1, 3, 5, DECODE | EXT,
+ offsetof(RequestMessage, openLogicalChannel), _OpenLogicalChannel},
+ {FNAME("closeLogicalChannel") SEQ, 0, 2, 3, STOP | EXT, 0, NULL},
+ {FNAME("requestChannelClose") SEQ, 0, 1, 3, STOP | EXT, 0, NULL},
+ {FNAME("multiplexEntrySend") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("requestMultiplexEntry") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("requestMode") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("roundTripDelayRequest") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("maintenanceLoopRequest") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("communicationModeRequest") SEQ, 0, 0, 0, STOP | EXT, 0, NULL},
+ {FNAME("conferenceRequest") CHOICE, 3, 8, 16, STOP | EXT, 0, NULL},
+ {FNAME("multilinkRequest") CHOICE, 3, 5, 5, STOP | EXT, 0, NULL},
+ {FNAME("logicalChannelRateRequest") SEQ, 0, 3, 3, STOP | EXT, 0,
+ NULL},
+};
+
+static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters[] = { /* CHOICE */
+ {FNAME("h222LogicalChannelParameters") SEQ, 3, 5, 5, SKIP | EXT, 0,
+ _H222LogicalChannelParameters},
+ {FNAME("h2250LogicalChannelParameters") SEQ, 10, 11, 14, DECODE | EXT,
+ offsetof
+ (OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters,
+ h2250LogicalChannelParameters), _H2250LogicalChannelParameters},
+};
+
+static const struct field_t _OpenLogicalChannelAck_reverseLogicalChannelParameters[] = { /* SEQUENCE */
+ {FNAME("reverseLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("multiplexParameters") CHOICE, 0, 1, 2, DECODE | EXT | OPT,
+ offsetof(OpenLogicalChannelAck_reverseLogicalChannelParameters,
+ multiplexParameters),
+ _OpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters},
+ {FNAME("replacementFor") INT, WORD, 1, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _H2250LogicalChannelAckParameters_nonStandard[] = { /* SEQUENCE OF */
+ {FNAME("item") SEQ, 0, 2, 2, SKIP, 0, _H245_NonStandardParameter},
+};
+
+static const struct field_t _H2250LogicalChannelAckParameters[] = { /* SEQUENCE */
+ {FNAME("nonStandard") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _H2250LogicalChannelAckParameters_nonStandard},
+ {FNAME("sessionID") INT, 8, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("mediaChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+ offsetof(H2250LogicalChannelAckParameters, mediaChannel),
+ _H245_TransportAddress},
+ {FNAME("mediaControlChannel") CHOICE, 1, 2, 2, DECODE | EXT | OPT,
+ offsetof(H2250LogicalChannelAckParameters, mediaControlChannel),
+ _H245_TransportAddress},
+ {FNAME("dynamicRTPPayloadType") INT, 5, 96, 0, SKIP | OPT, 0, NULL},
+ {FNAME("flowControlToZero") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("portNumber") INT, WORD, 0, 0, SKIP | OPT, 0, NULL},
+};
+
+static const struct field_t _OpenLogicalChannelAck_forwardMultiplexAckParameters[] = { /* CHOICE */
+ {FNAME("h2250LogicalChannelAckParameters") SEQ, 5, 5, 7, DECODE | EXT,
+ offsetof(OpenLogicalChannelAck_forwardMultiplexAckParameters,
+ h2250LogicalChannelAckParameters),
+ _H2250LogicalChannelAckParameters},
+};
+
+static const struct field_t _OpenLogicalChannelAck[] = { /* SEQUENCE */
+ {FNAME("forwardLogicalChannelNumber") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("reverseLogicalChannelParameters") SEQ, 2, 3, 4,
+ DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck,
+ reverseLogicalChannelParameters),
+ _OpenLogicalChannelAck_reverseLogicalChannelParameters},
+ {FNAME("separateStack") SEQ, 2, 4, 5, DECODE | EXT | OPT,
+ offsetof(OpenLogicalChannelAck, separateStack),
+ _NetworkAccessParameters},
+ {FNAME("forwardMultiplexAckParameters") CHOICE, 0, 1, 1,
+ DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck,
+ forwardMultiplexAckParameters),
+ _OpenLogicalChannelAck_forwardMultiplexAckParameters},
+ {FNAME("encryptionSync") SEQ, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _ResponseMessage[] = { /* CHOICE */
+ {FNAME("nonStandard") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("masterSlaveDeterminationAck") SEQ, 0, 1, 1, STOP | EXT, 0,
+ NULL},
+ {FNAME("masterSlaveDeterminationReject") SEQ, 0, 1, 1, STOP | EXT, 0,
+ NULL},
+ {FNAME("terminalCapabilitySetAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("terminalCapabilitySetReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+ NULL},
+ {FNAME("openLogicalChannelAck") SEQ, 1, 2, 5, DECODE | EXT,
+ offsetof(ResponseMessage, openLogicalChannelAck),
+ _OpenLogicalChannelAck},
+ {FNAME("openLogicalChannelReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("closeLogicalChannelAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("requestChannelCloseAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("requestChannelCloseReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+ NULL},
+ {FNAME("multiplexEntrySendAck") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("multiplexEntrySendReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("requestMultiplexEntryAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("requestMultiplexEntryReject") SEQ, 0, 2, 2, STOP | EXT, 0,
+ NULL},
+ {FNAME("requestModeAck") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("requestModeReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("roundTripDelayResponse") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("maintenanceLoopAck") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("maintenanceLoopReject") SEQ, 0, 2, 2, STOP | EXT, 0, NULL},
+ {FNAME("communicationModeResponse") CHOICE, 0, 1, 1, STOP | EXT, 0,
+ NULL},
+ {FNAME("conferenceResponse") CHOICE, 3, 8, 16, STOP | EXT, 0, NULL},
+ {FNAME("multilinkResponse") CHOICE, 3, 5, 5, STOP | EXT, 0, NULL},
+ {FNAME("logicalChannelRateAcknowledge") SEQ, 0, 3, 3, STOP | EXT, 0,
+ NULL},
+ {FNAME("logicalChannelRateReject") SEQ, 1, 4, 4, STOP | EXT, 0, NULL},
+};
+
+static const struct field_t _MultimediaSystemControlMessage[] = { /* CHOICE */
+ {FNAME("request") CHOICE, 4, 11, 15, DECODE | EXT,
+ offsetof(MultimediaSystemControlMessage, request), _RequestMessage},
+ {FNAME("response") CHOICE, 5, 19, 24, DECODE | EXT,
+ offsetof(MultimediaSystemControlMessage, response),
+ _ResponseMessage},
+ {FNAME("command") CHOICE, 3, 7, 12, STOP | EXT, 0, NULL},
+ {FNAME("indication") CHOICE, 4, 14, 23, STOP | EXT, 0, NULL},
+};
+
+static const struct field_t _H323_UU_PDU_h245Control[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 2, 4, 4, DECODE | OPEN | EXT,
+ sizeof(MultimediaSystemControlMessage),
+ _MultimediaSystemControlMessage}
+ ,
+};
+
+static const struct field_t _H323_UU_PDU[] = { /* SEQUENCE */
+ {FNAME("h323-message-body") CHOICE, 3, 7, 13, DECODE | EXT,
+ offsetof(H323_UU_PDU, h323_message_body),
+ _H323_UU_PDU_h323_message_body},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("h4501SupplementaryService") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ NULL},
+ {FNAME("h245Tunneling") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("h245Control") SEQOF, SEMI, 0, 4, DECODE | OPT,
+ offsetof(H323_UU_PDU, h245Control), _H323_UU_PDU_h245Control},
+ {FNAME("nonStandardControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("callLinkage") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("tunnelledSignallingMessage") SEQ, 2, 4, 4, STOP | EXT | OPT,
+ 0, NULL},
+ {FNAME("provisionalRespToH245Tunneling") NUL, FIXD, 0, 0, STOP | OPT,
+ 0, NULL},
+ {FNAME("stimulusControl") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _H323_UserInformation[] = { /* SEQUENCE */
+ {FNAME("h323-uu-pdu") SEQ, 1, 2, 11, DECODE | EXT,
+ offsetof(H323_UserInformation, h323_uu_pdu), _H323_UU_PDU},
+ {FNAME("user-data") SEQ, 0, 2, 2, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperRequest[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(GatekeeperRequest, rasAddress), _TransportAddress},
+ {FNAME("endpointType") SEQ, 6, 8, 10, STOP | EXT, 0, NULL},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("callServices") SEQ, 0, 8, 8, STOP | EXT | OPT, 0, NULL},
+ {FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("authenticationCapability") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("algorithmOIDs") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrity") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("supportsAltGK") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _GatekeeperConfirm[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(GatekeeperConfirm, rasAddress), _TransportAddress},
+ {FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("authenticationMode") CHOICE, 3, 7, 8, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("algorithmOID") OID, BYTE, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrity") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RegistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+ sizeof(TransportAddress), _TransportAddress}
+ ,
+};
+
+static const struct field_t _RegistrationRequest_rasAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+ sizeof(TransportAddress), _TransportAddress}
+ ,
+};
+
+static const struct field_t _RegistrationRequest_terminalAlias[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _RegistrationRequest[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("discoveryComplete") BOOL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+ offsetof(RegistrationRequest, callSignalAddress),
+ _RegistrationRequest_callSignalAddress},
+ {FNAME("rasAddress") SEQOF, SEMI, 0, 10, DECODE,
+ offsetof(RegistrationRequest, rasAddress),
+ _RegistrationRequest_rasAddress},
+ {FNAME("terminalType") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+ {FNAME("terminalAlias") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _RegistrationRequest_terminalAlias},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("endpointVendor") SEQ, 2, 3, 3, SKIP | EXT, 0,
+ _VendorIdentifier},
+ {FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("timeToLive") INT, CONS, 1, 0, DECODE | OPT,
+ offsetof(RegistrationRequest, timeToLive), NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("keepAlive") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("willSupplyUUIEs") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+ 0, NULL},
+ {FNAME("additiveRegistration") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("terminalAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("supportsAltGK") NUL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("usageReportingCapability") SEQ, 3, 4, 4, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("supportedH248Packages") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("callCreditCapability") SEQ, 2, 2, 2, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("capacityReportingCapability") SEQ, 0, 1, 1, STOP | EXT | OPT,
+ 0, NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RegistrationConfirm_callSignalAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+ sizeof(TransportAddress), _TransportAddress}
+ ,
+};
+
+static const struct field_t _RegistrationConfirm_terminalAlias[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _RegistrationConfirm[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+ offsetof(RegistrationConfirm, callSignalAddress),
+ _RegistrationConfirm_callSignalAddress},
+ {FNAME("terminalAlias") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _RegistrationConfirm_terminalAlias},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL},
+ {FNAME("timeToLive") INT, CONS, 1, 0, DECODE | OPT,
+ offsetof(RegistrationConfirm, timeToLive), NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("willRespondToIRR") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("preGrantedARQ") SEQ, 0, 4, 8, STOP | EXT | OPT, 0, NULL},
+ {FNAME("maintainConnection") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("supportsAdditiveRegistration") NUL, FIXD, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("terminalAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("usageSpec") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("featureServerAlias") CHOICE, 1, 2, 7, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("capacityReportingSpec") SEQ, 0, 1, 1, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _UnregistrationRequest_callSignalAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+ sizeof(TransportAddress), _TransportAddress}
+ ,
+};
+
+static const struct field_t _UnregistrationRequest[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+ offsetof(UnregistrationRequest, callSignalAddress),
+ _UnregistrationRequest_callSignalAddress},
+ {FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("reason") CHOICE, 2, 4, 5, STOP | EXT | OPT, 0, NULL},
+ {FNAME("endpointAliasPattern") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("supportedPrefixes") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("alternateGatekeeper") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _CallModel[] = { /* CHOICE */
+ {FNAME("direct") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+ {FNAME("gatekeeperRouted") NUL, FIXD, 0, 0, SKIP, 0, NULL},
+};
+
+static const struct field_t _AdmissionRequest_destinationInfo[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest_destExtraCallInfo[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest_srcInfo[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _AdmissionRequest[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("callType") CHOICE, 2, 4, 4, SKIP | EXT, 0, _CallType},
+ {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT | OPT, 0, _CallModel},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _AdmissionRequest_destinationInfo},
+ {FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(AdmissionRequest, destCallSignalAddress),
+ _TransportAddress},
+ {FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, SKIP | OPT, 0,
+ _AdmissionRequest_destExtraCallInfo},
+ {FNAME("srcInfo") SEQOF, SEMI, 0, 0, SKIP, 0,
+ _AdmissionRequest_srcInfo},
+ {FNAME("srcCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT,
+ offsetof(AdmissionRequest, srcCallSignalAddress), _TransportAddress},
+ {FNAME("bandWidth") INT, CONS, 0, 0, STOP, 0, NULL},
+ {FNAME("callReferenceValue") INT, WORD, 0, 0, STOP, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("callServices") SEQ, 0, 8, 8, STOP | EXT | OPT, 0, NULL},
+ {FNAME("conferenceID") OCTSTR, FIXD, 16, 0, STOP, 0, NULL},
+ {FNAME("activeMC") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("answerCall") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("canMapAlias") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("callIdentifier") SEQ, 0, 1, 1, STOP | EXT, 0, NULL},
+ {FNAME("srcAlternatives") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destAlternatives") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("transportQOS") CHOICE, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("willSupplyUUIEs") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("callLinkage") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("gatewayDataRate") SEQ, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("desiredTunnelledProtocol") SEQ, 1, 2, 2, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _AdmissionConfirm[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("bandWidth") INT, CONS, 0, 0, SKIP, 0, NULL},
+ {FNAME("callModel") CHOICE, 1, 2, 2, SKIP | EXT, 0, _CallModel},
+ {FNAME("destCallSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(AdmissionConfirm, destCallSignalAddress),
+ _TransportAddress},
+ {FNAME("irrFrequency") INT, WORD, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destinationType") SEQ, 6, 8, 10, STOP | EXT | OPT, 0, NULL},
+ {FNAME("remoteExtensionAddress") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("transportQOS") CHOICE, 2, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("willRespondToIRR") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("uuiesRequested") SEQ, 0, 9, 13, STOP | EXT, 0, NULL},
+ {FNAME("language") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+ 0, NULL},
+ {FNAME("useSpecifiedTransport") CHOICE, 1, 2, 2, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("usageSpec") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("supportedProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _LocationRequest_destinationInfo[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 1, 2, 7, SKIP | EXT, 0, _AliasAddress},
+};
+
+static const struct field_t _LocationRequest[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP | OPT, 0, NULL},
+ {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, SKIP, 0,
+ _LocationRequest_destinationInfo},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("replyAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(LocationRequest, replyAddress), _TransportAddress},
+ {FNAME("sourceInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("canMapAlias") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("gatekeeperIdentifier") BMPSTR, 7, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("desiredProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("desiredTunnelledProtocol") SEQ, 1, 2, 2, STOP | EXT | OPT, 0,
+ NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("hopCount") INT, 8, 1, 0, STOP | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+};
+
+static const struct field_t _LocationConfirm[] = { /* SEQUENCE */
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("callSignalAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(LocationConfirm, callSignalAddress), _TransportAddress},
+ {FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(LocationConfirm, rasAddress), _TransportAddress},
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("destinationInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destExtraCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("destinationType") SEQ, 6, 8, 10, STOP | EXT | OPT, 0, NULL},
+ {FNAME("remoteExtensionAddress") SEQOF, SEMI, 0, 0, STOP | OPT, 0,
+ NULL},
+ {FNAME("alternateEndpoints") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("alternateTransportAddresses") SEQ, 1, 1, 1, STOP | EXT | OPT,
+ 0, NULL},
+ {FNAME("supportedProtocols") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("multipleCalls") BOOL, FIXD, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("featureSet") SEQ, 3, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("circuitInfo") SEQ, 3, 3, 3, STOP | EXT | OPT, 0, NULL},
+ {FNAME("serviceControl") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _InfoRequestResponse_callSignalAddress[] = { /* SEQUENCE OF */
+ {FNAME("item") CHOICE, 3, 7, 7, DECODE | EXT,
+ sizeof(TransportAddress), _TransportAddress}
+ ,
+};
+
+static const struct field_t _InfoRequestResponse[] = { /* SEQUENCE */
+ {FNAME("nonStandardData") SEQ, 0, 2, 2, SKIP | OPT, 0,
+ _NonStandardParameter},
+ {FNAME("requestSeqNum") INT, WORD, 1, 0, SKIP, 0, NULL},
+ {FNAME("endpointType") SEQ, 6, 8, 10, SKIP | EXT, 0, _EndpointType},
+ {FNAME("endpointIdentifier") BMPSTR, 7, 1, 0, SKIP, 0, NULL},
+ {FNAME("rasAddress") CHOICE, 3, 7, 7, DECODE | EXT,
+ offsetof(InfoRequestResponse, rasAddress), _TransportAddress},
+ {FNAME("callSignalAddress") SEQOF, SEMI, 0, 10, DECODE,
+ offsetof(InfoRequestResponse, callSignalAddress),
+ _InfoRequestResponse_callSignalAddress},
+ {FNAME("endpointAlias") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("perCallInfo") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("tokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+ {FNAME("integrityCheckValue") SEQ, 0, 2, 2, STOP | OPT, 0, NULL},
+ {FNAME("needResponse") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("capacity") SEQ, 2, 2, 2, STOP | EXT | OPT, 0, NULL},
+ {FNAME("irrStatus") CHOICE, 2, 4, 4, STOP | EXT | OPT, 0, NULL},
+ {FNAME("unsolicited") BOOL, FIXD, 0, 0, STOP, 0, NULL},
+ {FNAME("genericData") SEQOF, SEMI, 0, 0, STOP | OPT, 0, NULL},
+};
+
+static const struct field_t _RasMessage[] = { /* CHOICE */
+ {FNAME("gatekeeperRequest") SEQ, 4, 8, 18, DECODE | EXT,
+ offsetof(RasMessage, gatekeeperRequest), _GatekeeperRequest},
+ {FNAME("gatekeeperConfirm") SEQ, 2, 5, 14, DECODE | EXT,
+ offsetof(RasMessage, gatekeeperConfirm), _GatekeeperConfirm},
+ {FNAME("gatekeeperReject") SEQ, 2, 5, 11, STOP | EXT, 0, NULL},
+ {FNAME("registrationRequest") SEQ, 3, 10, 31, DECODE | EXT,
+ offsetof(RasMessage, registrationRequest), _RegistrationRequest},
+ {FNAME("registrationConfirm") SEQ, 3, 7, 24, DECODE | EXT,
+ offsetof(RasMessage, registrationConfirm), _RegistrationConfirm},
+ {FNAME("registrationReject") SEQ, 2, 5, 11, STOP | EXT, 0, NULL},
+ {FNAME("unregistrationRequest") SEQ, 3, 5, 15, DECODE | EXT,
+ offsetof(RasMessage, unregistrationRequest), _UnregistrationRequest},
+ {FNAME("unregistrationConfirm") SEQ, 1, 2, 6, STOP | EXT, 0, NULL},
+ {FNAME("unregistrationReject") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+ {FNAME("admissionRequest") SEQ, 7, 16, 34, DECODE | EXT,
+ offsetof(RasMessage, admissionRequest), _AdmissionRequest},
+ {FNAME("admissionConfirm") SEQ, 2, 6, 27, DECODE | EXT,
+ offsetof(RasMessage, admissionConfirm), _AdmissionConfirm},
+ {FNAME("admissionReject") SEQ, 1, 3, 11, STOP | EXT, 0, NULL},
+ {FNAME("bandwidthRequest") SEQ, 2, 7, 18, STOP | EXT, 0, NULL},
+ {FNAME("bandwidthConfirm") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+ {FNAME("bandwidthReject") SEQ, 1, 4, 9, STOP | EXT, 0, NULL},
+ {FNAME("disengageRequest") SEQ, 1, 6, 19, STOP | EXT, 0, NULL},
+ {FNAME("disengageConfirm") SEQ, 1, 2, 9, STOP | EXT, 0, NULL},
+ {FNAME("disengageReject") SEQ, 1, 3, 8, STOP | EXT, 0, NULL},
+ {FNAME("locationRequest") SEQ, 2, 5, 17, DECODE | EXT,
+ offsetof(RasMessage, locationRequest), _LocationRequest},
+ {FNAME("locationConfirm") SEQ, 1, 4, 19, DECODE | EXT,
+ offsetof(RasMessage, locationConfirm), _LocationConfirm},
+ {FNAME("locationReject") SEQ, 1, 3, 10, STOP | EXT, 0, NULL},
+ {FNAME("infoRequest") SEQ, 2, 4, 15, STOP | EXT, 0, NULL},
+ {FNAME("infoRequestResponse") SEQ, 3, 8, 16, DECODE | EXT,
+ offsetof(RasMessage, infoRequestResponse), _InfoRequestResponse},
+ {FNAME("nonStandardMessage") SEQ, 0, 2, 7, STOP | EXT, 0, NULL},
+ {FNAME("unknownMessageResponse") SEQ, 0, 1, 5, STOP | EXT, 0, NULL},
+ {FNAME("requestInProgress") SEQ, 4, 6, 6, STOP | EXT, 0, NULL},
+ {FNAME("resourcesAvailableIndicate") SEQ, 4, 9, 11, STOP | EXT, 0,
+ NULL},
+ {FNAME("resourcesAvailableConfirm") SEQ, 4, 6, 7, STOP | EXT, 0,
+ NULL},
+ {FNAME("infoRequestAck") SEQ, 4, 5, 5, STOP | EXT, 0, NULL},
+ {FNAME("infoRequestNak") SEQ, 5, 7, 7, STOP | EXT, 0, NULL},
+ {FNAME("serviceControlIndication") SEQ, 8, 10, 10, STOP | EXT, 0,
+ NULL},
+ {FNAME("serviceControlResponse") SEQ, 7, 8, 8, STOP | EXT, 0, NULL},
+};
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
new file mode 100644
index 0000000..c39b6a9
--- /dev/null
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -0,0 +1,213 @@
+/* Helper handling for netfilter. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+static DEFINE_MUTEX(nf_ct_helper_mutex);
+static struct hlist_head *nf_ct_helper_hash __read_mostly;
+static unsigned int nf_ct_helper_hsize __read_mostly;
+static unsigned int nf_ct_helper_count __read_mostly;
+static int nf_ct_helper_vmalloc;
+
+
+/* Stupid hash, but collision free for the default registrations of the
+ * helpers currently in the kernel. */
+static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
+{
+ return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^
+ (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
+}
+
+struct nf_conntrack_helper *
+__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
+ struct hlist_node *n;
+ unsigned int h;
+
+ if (!nf_ct_helper_count)
+ return NULL;
+
+ h = helper_hash(tuple);
+ hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) {
+ if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
+ return helper;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_helper_find);
+
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name)
+{
+ struct nf_conntrack_helper *h;
+ struct hlist_node *n;
+ unsigned int i;
+
+ for (i = 0; i < nf_ct_helper_hsize; i++) {
+ hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
+ if (!strcmp(h->name, name))
+ return h;
+ }
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
+
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+ struct nf_conn_help *help;
+
+ help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp);
+ if (help)
+ INIT_HLIST_HEAD(&help->expectations);
+ else
+ pr_debug("failed to add helper extension area");
+ return help;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+
+static inline int unhelp(struct nf_conntrack_tuple_hash *i,
+ const struct nf_conntrack_helper *me)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
+ struct nf_conn_help *help = nfct_help(ct);
+
+ if (help && help->helper == me) {
+ nf_conntrack_event(IPCT_HELPER, ct);
+ rcu_assign_pointer(help->helper, NULL);
+ }
+ return 0;
+}
+
+int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
+{
+ unsigned int h = helper_hash(&me->tuple);
+
+ BUG_ON(me->expect_policy == NULL);
+ BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
+
+ mutex_lock(&nf_ct_helper_mutex);
+ hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
+ nf_ct_helper_count++;
+ mutex_unlock(&nf_ct_helper_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
+
+static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
+ struct net *net)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *n, *next;
+ unsigned int i;
+
+ /* Get rid of expectations */
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, n, next,
+ &net->ct.expect_hash[i], hnode) {
+ struct nf_conn_help *help = nfct_help(exp->master);
+ if ((help->helper == me || exp->helper == me) &&
+ del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ /* Get rid of expecteds, set helpers to NULL. */
+ hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode)
+ unhelp(h, me);
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ hlist_for_each_entry(h, n, &net->ct.hash[i], hnode)
+ unhelp(h, me);
+ }
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ struct net *net;
+
+ mutex_lock(&nf_ct_helper_mutex);
+ hlist_del_rcu(&me->hnode);
+ nf_ct_helper_count--;
+ mutex_unlock(&nf_ct_helper_mutex);
+
+ /* Make sure every nothing is still using the helper unless its a
+ * connection in the hash.
+ */
+ synchronize_rcu();
+
+ rtnl_lock();
+ spin_lock_bh(&nf_conntrack_lock);
+ for_each_net(net)
+ __nf_conntrack_helper_unregister(me, net);
+ spin_unlock_bh(&nf_conntrack_lock);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
+
+static struct nf_ct_ext_type helper_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_help),
+ .align = __alignof__(struct nf_conn_help),
+ .id = NF_CT_EXT_HELPER,
+};
+
+int nf_conntrack_helper_init(void)
+{
+ int err;
+
+ nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
+ &nf_ct_helper_vmalloc);
+ if (!nf_ct_helper_hash)
+ return -ENOMEM;
+
+ err = nf_ct_extend_register(&helper_extend);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+
+err1:
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
+ nf_ct_helper_hsize);
+ return err;
+}
+
+void nf_conntrack_helper_fini(void)
+{
+ nf_ct_extend_unregister(&helper_extend);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
+ nf_ct_helper_hsize);
+}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
new file mode 100644
index 0000000..20633fd
--- /dev/null
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -0,0 +1,290 @@
+/* IRC extension for IP connection tracking, Version 1.21
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * based on RR's ip_conntrack_ftp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+static unsigned int max_dcc_channels = 8;
+static unsigned int dcc_timeout __read_mostly = 300;
+/* This is slow, but it's simple. --RR */
+static char *irc_buffer;
+static DEFINE_SPINLOCK(irc_buffer_lock);
+
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_irc");
+
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+module_param(max_dcc_channels, uint, 0400);
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per "
+ "IRC session");
+module_param(dcc_timeout, uint, 0400);
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
+
+static const char *const dccprotos[] = {
+ "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT "
+};
+
+#define MINMATCHLEN 5
+
+/* tries to get the ip_addr and port out of a dcc command
+ * return value: -1 on failure, 0 on success
+ * data pointer to first byte of DCC command data
+ * data_end pointer to last byte of dcc command data
+ * ip returns parsed ip of dcc command
+ * port returns parsed port of dcc command
+ * ad_beg_p returns pointer to first byte of addr data
+ * ad_end_p returns pointer to last byte of addr data
+ */
+static int parse_dcc(char *data, const char *data_end, u_int32_t *ip,
+ u_int16_t *port, char **ad_beg_p, char **ad_end_p)
+{
+ char *tmp;
+
+ /* at least 12: "AAAAAAAA P\1\n" */
+ while (*data++ != ' ')
+ if (data > data_end - 12)
+ return -1;
+
+ /* Make sure we have a newline character within the packet boundaries
+ * because simple_strtoul parses until the first invalid character. */
+ for (tmp = data; tmp <= data_end; tmp++)
+ if (*tmp == '\n')
+ break;
+ if (tmp > data_end || *tmp != '\n')
+ return -1;
+
+ *ad_beg_p = data;
+ *ip = simple_strtoul(data, &data, 10);
+
+ /* skip blanks between ip and port */
+ while (*data == ' ') {
+ if (data >= data_end)
+ return -1;
+ data++;
+ }
+
+ *port = simple_strtoul(data, &data, 10);
+ *ad_end_p = data;
+
+ return 0;
+}
+
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff;
+ const struct iphdr *iph;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ const char *data_limit;
+ char *data, *ib_ptr;
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ u_int32_t dcc_ip;
+ u_int16_t dcc_port;
+ __be16 port;
+ int i, ret = NF_ACCEPT;
+ char *addr_beg_p, *addr_end_p;
+ typeof(nf_nat_irc_hook) nf_nat_irc;
+
+ /* If packet is coming from IRC server */
+ if (dir == IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+ return NF_ACCEPT;
+
+ /* Not a full tcp header? */
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ /* No data? */
+ dataoff = protoff + th->doff*4;
+ if (dataoff >= skb->len)
+ return NF_ACCEPT;
+
+ spin_lock_bh(&irc_buffer_lock);
+ ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
+ irc_buffer);
+ BUG_ON(ib_ptr == NULL);
+
+ data = ib_ptr;
+ data_limit = ib_ptr + skb->len - dataoff;
+
+ /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+ * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+ while (data < data_limit - (19 + MINMATCHLEN)) {
+ if (memcmp(data, "\1DCC ", 5)) {
+ data++;
+ continue;
+ }
+ data += 5;
+ /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+
+ iph = ip_hdr(skb);
+ pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
+
+ for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
+ if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
+ /* no match */
+ continue;
+ }
+ data += strlen(dccprotos[i]);
+ pr_debug("DCC %s detected\n", dccprotos[i]);
+
+ /* we have at least
+ * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * data left (== 14/13 bytes) */
+ if (parse_dcc(data, data_limit, &dcc_ip,
+ &dcc_port, &addr_beg_p, &addr_end_p)) {
+ pr_debug("unable to parse dcc command\n");
+ continue;
+ }
+ pr_debug("DCC bound ip/port: %u.%u.%u.%u:%u\n",
+ HIPQUAD(dcc_ip), dcc_port);
+
+ /* dcc_ip can be the internal OR external (NAT'ed) IP */
+ tuple = &ct->tuplehash[dir].tuple;
+ if (tuple->src.u3.ip != htonl(dcc_ip) &&
+ tuple->dst.u3.ip != htonl(dcc_ip)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "Forged DCC command from "
+ "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
+ NIPQUAD(tuple->src.u3.ip),
+ HIPQUAD(dcc_ip), dcc_port);
+ continue;
+ }
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+ tuple = &ct->tuplehash[!dir].tuple;
+ port = htons(dcc_port);
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+ tuple->src.l3num,
+ NULL, &tuple->dst.u3,
+ IPPROTO_TCP, NULL, &port);
+
+ nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
+ if (nf_nat_irc && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_irc(skb, ctinfo,
+ addr_beg_p - ib_ptr,
+ addr_end_p - addr_beg_p,
+ exp);
+ else if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_ct_expect_put(exp);
+ goto out;
+ }
+ }
+ out:
+ spin_unlock_bh(&irc_buffer_lock);
+ return ret;
+}
+
+static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
+static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly;
+static struct nf_conntrack_expect_policy irc_exp_policy;
+
+static void nf_conntrack_irc_fini(void);
+
+static int __init nf_conntrack_irc_init(void)
+{
+ int i, ret;
+ char *tmpname;
+
+ if (max_dcc_channels < 1) {
+ printk("nf_ct_irc: max_dcc_channels must not be zero\n");
+ return -EINVAL;
+ }
+
+ irc_exp_policy.max_expected = max_dcc_channels;
+ irc_exp_policy.timeout = dcc_timeout;
+
+ irc_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!irc_buffer)
+ return -ENOMEM;
+
+ /* If no port given, default to standard irc port */
+ if (ports_c == 0)
+ ports[ports_c++] = IRC_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ irc[i].tuple.src.l3num = AF_INET;
+ irc[i].tuple.src.u.tcp.port = htons(ports[i]);
+ irc[i].tuple.dst.protonum = IPPROTO_TCP;
+ irc[i].expect_policy = &irc_exp_policy;
+ irc[i].me = THIS_MODULE;
+ irc[i].help = help;
+
+ tmpname = &irc_names[i][0];
+ if (ports[i] == IRC_PORT)
+ sprintf(tmpname, "irc");
+ else
+ sprintf(tmpname, "irc-%u", i);
+ irc[i].name = tmpname;
+
+ ret = nf_conntrack_helper_register(&irc[i]);
+ if (ret) {
+ printk("nf_ct_irc: failed to register helper "
+ "for pf: %u port: %u\n",
+ irc[i].tuple.src.l3num, ports[i]);
+ nf_conntrack_irc_fini();
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* This function is intentionally _NOT_ defined as __exit, because
+ * it is needed by the init function */
+static void nf_conntrack_irc_fini(void)
+{
+ int i;
+
+ for (i = 0; i < ports_c; i++)
+ nf_conntrack_helper_unregister(&irc[i]);
+ kfree(irc_buffer);
+}
+
+module_init(nf_conntrack_irc_init);
+module_exit(nf_conntrack_irc_fini);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
new file mode 100644
index 0000000..e7eb807
--- /dev/null
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -0,0 +1,74 @@
+/*
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * Based largely upon the original ip_conntrack code which
+ * had the following copyright information:
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+
+static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
+ memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
+
+ return true;
+}
+
+static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
+ memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
+
+ return true;
+}
+
+static int generic_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return 0;
+}
+
+static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ unsigned int *dataoff, u_int8_t *protonum)
+{
+ /* Never track !!! */
+ return -NF_ACCEPT;
+}
+
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
+ .l3proto = PF_UNSPEC,
+ .name = "unknown",
+ .pkt_to_tuple = generic_pkt_to_tuple,
+ .invert_tuple = generic_invert_tuple,
+ .print_tuple = generic_print_tuple,
+ .get_l4proto = generic_get_l4proto,
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
new file mode 100644
index 0000000..08404e6
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -0,0 +1,126 @@
+/*
+ * NetBIOS name service broadcast connection tracking helper
+ *
+ * (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+/*
+ * This helper tracks locally originating NetBIOS name service
+ * requests by issuing permanent expectations (valid until
+ * timing out) matching all reply connections from the
+ * destination network. The only NetBIOS specific thing is
+ * actually the port number.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_addr.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/route.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define NMBD_PORT 137
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_netbios_ns");
+
+static unsigned int timeout __read_mostly = 3;
+module_param(timeout, uint, 0400);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ struct nf_conntrack_expect *exp;
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb->rtable;
+ struct in_device *in_dev;
+ __be32 mask = 0;
+
+ /* we're only interested in locally generated packets */
+ if (skb->sk == NULL)
+ goto out;
+ if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+ goto out;
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ goto out;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+ if (in_dev != NULL) {
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_broadcast == iph->daddr) {
+ mask = ifa->ifa_mask;
+ break;
+ }
+ } endfor_ifa(in_dev);
+ }
+ rcu_read_unlock();
+
+ if (mask == 0)
+ goto out;
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL)
+ goto out;
+
+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->tuple.src.u.udp.port = htons(NMBD_PORT);
+
+ exp->mask.src.u3.ip = mask;
+ exp->mask.src.u.udp.port = htons(0xFFFF);
+
+ exp->expectfn = NULL;
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
+ exp->helper = NULL;
+
+ nf_ct_expect_related(exp);
+ nf_ct_expect_put(exp);
+
+ nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+ return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+ .max_expected = 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+ .name = "netbios-ns",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(NMBD_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &exp_policy,
+};
+
+static int __init nf_conntrack_netbios_ns_init(void)
+{
+ exp_policy.timeout = timeout;
+ return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_netbios_ns_fini(void)
+{
+ nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_netbios_ns_init);
+module_exit(nf_conntrack_netbios_ns_fini);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 0000000..8e9a303
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,1838 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Initial connection tracking via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/rculist.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+
+#include <linux/netfilter.h>
+#include <net/netlink.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#endif
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+MODULE_LICENSE("GPL");
+
+static char __initdata version[] = "0.93";
+
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_l4proto *l4proto)
+{
+ int ret = 0;
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ NLA_PUT_U8(skb, CTA_PROTO_NUM, tuple->dst.protonum);
+
+ if (likely(l4proto->tuple_to_nlattr))
+ ret = l4proto->tuple_to_nlattr(skb, tuple);
+
+ nla_nest_end(skb, nest_parms);
+
+ return ret;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_tuples_ip(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_l3proto *l3proto)
+{
+ int ret = 0;
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ if (likely(l3proto->tuple_to_nlattr))
+ ret = l3proto->tuple_to_nlattr(skb, tuple);
+
+ nla_nest_end(skb, nest_parms);
+
+ return ret;
+
+nla_put_failure:
+ return -1;
+}
+
+static int
+ctnetlink_dump_tuples(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ int ret;
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+
+ l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+ ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
+ nf_ct_l3proto_put(l3proto);
+
+ if (unlikely(ret < 0))
+ return ret;
+
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
+ nf_ct_l4proto_put(l4proto);
+
+ return ret;
+}
+
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ NLA_PUT_BE32(skb, CTA_STATUS, htonl(ct->status));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ long timeout = (ct->timeout.expires - jiffies) / HZ;
+
+ if (timeout < 0)
+ timeout = 0;
+
+ NLA_PUT_BE32(skb, CTA_TIMEOUT, htonl(timeout));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nf_conntrack_l4proto *l4proto;
+ struct nlattr *nest_proto;
+ int ret;
+
+ l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (!l4proto->to_nlattr) {
+ nf_ct_l4proto_put(l4proto);
+ return 0;
+ }
+
+ nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED);
+ if (!nest_proto)
+ goto nla_put_failure;
+
+ ret = l4proto->to_nlattr(skb, nest_proto, ct);
+
+ nf_ct_l4proto_put(l4proto);
+
+ nla_nest_end(skb, nest_proto);
+
+ return ret;
+
+nla_put_failure:
+ nf_ct_l4proto_put(l4proto);
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_helper;
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+
+ if (!help)
+ return 0;
+
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out;
+
+ nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED);
+ if (!nest_helper)
+ goto nla_put_failure;
+ NLA_PUT_STRING(skb, CTA_HELP_NAME, helper->name);
+
+ if (helper->to_nlattr)
+ helper->to_nlattr(skb, ct);
+
+ nla_nest_end(skb, nest_helper);
+out:
+ rcu_read_unlock();
+ return 0;
+
+nla_put_failure:
+ rcu_read_unlock();
+ return -1;
+}
+
+static int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+ enum ip_conntrack_dir dir)
+{
+ enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+ struct nlattr *nest_count;
+ const struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
+ return 0;
+
+ nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS,
+ cpu_to_be64(acct[dir].packets));
+ NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES,
+ cpu_to_be64(acct[dir].bytes));
+
+ nla_nest_end(skb, nest_count);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ NLA_PUT_BE32(skb, CTA_MARK, htonl(ct->mark));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static inline int
+ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+#else
+#define ctnetlink_dump_secmark(a, b) (0)
+#endif
+
+#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+
+static inline int
+ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_parms;
+
+ if (!(ct->status & IPS_EXPECTED))
+ return 0;
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+#ifdef CONFIG_NF_NAT_NEEDED
+static int
+dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
+{
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ NLA_PUT_BE32(skb, CTA_NAT_SEQ_CORRECTION_POS,
+ htonl(natseq->correction_pos));
+ NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
+ htonl(natseq->offset_before));
+ NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
+ htonl(natseq->offset_after));
+
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nf_nat_seq *natseq;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
+ return 0;
+
+ natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
+ if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
+ return -1;
+
+ natseq = &nat->seq[IP_CT_DIR_REPLY];
+ if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
+ return -1;
+
+ return 0;
+}
+#else
+#define ctnetlink_dump_nat_seq_adj(a, b) (0)
+#endif
+
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ NLA_PUT_BE32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ int event, int nowait,
+ const struct nf_conn *ct)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ struct nlattr *nest_parms;
+ unsigned char *b = skb_tail_pointer(skb);
+
+ event |= NFNL_SUBSYS_CTNETLINK << 8;
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+
+ nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg->nfgen_family = nf_ct_l3num(ct);
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+ ctnetlink_dump_timeout(skb, ct) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+ ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
+ ctnetlink_dump_secmark(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+ ctnetlink_dump_master(skb, ct) < 0 ||
+ ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ goto nla_put_failure;
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+ unsigned long events, void *ptr)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ struct nlattr *nest_parms;
+ struct nf_conn *ct = (struct nf_conn *)ptr;
+ struct sk_buff *skb;
+ unsigned int type;
+ sk_buff_data_t b;
+ unsigned int flags = 0, group;
+
+ /* ignore our fake conntrack entry */
+ if (ct == &nf_conntrack_untracked)
+ return NOTIFY_DONE;
+
+ if (events & IPCT_DESTROY) {
+ type = IPCTNL_MSG_CT_DELETE;
+ group = NFNLGRP_CONNTRACK_DESTROY;
+ } else if (events & (IPCT_NEW | IPCT_RELATED)) {
+ type = IPCTNL_MSG_CT_NEW;
+ flags = NLM_F_CREATE|NLM_F_EXCL;
+ group = NFNLGRP_CONNTRACK_NEW;
+ } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
+ type = IPCTNL_MSG_CT_NEW;
+ group = NFNLGRP_CONNTRACK_UPDATE;
+ } else
+ return NOTIFY_DONE;
+
+ if (!nfnetlink_has_listeners(group))
+ return NOTIFY_DONE;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ return NOTIFY_DONE;
+
+ b = skb->tail;
+
+ type |= NFNL_SUBSYS_CTNETLINK << 8;
+ nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+
+ nlh->nlmsg_flags = flags;
+ nfmsg->nfgen_family = nf_ct_l3num(ct);
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ if (ctnetlink_dump_id(skb, ct) < 0)
+ goto nla_put_failure;
+
+ if (ctnetlink_dump_status(skb, ct) < 0)
+ goto nla_put_failure;
+
+ if (events & IPCT_DESTROY) {
+ if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+ goto nla_put_failure;
+ } else {
+ if (ctnetlink_dump_timeout(skb, ct) < 0)
+ goto nla_put_failure;
+
+ if (events & IPCT_PROTOINFO
+ && ctnetlink_dump_protoinfo(skb, ct) < 0)
+ goto nla_put_failure;
+
+ if ((events & IPCT_HELPER || nfct_help(ct))
+ && ctnetlink_dump_helpinfo(skb, ct) < 0)
+ goto nla_put_failure;
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if ((events & IPCT_SECMARK || ct->secmark)
+ && ctnetlink_dump_secmark(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
+
+ if (events & IPCT_RELATED &&
+ ctnetlink_dump_master(skb, ct) < 0)
+ goto nla_put_failure;
+
+ if (events & IPCT_NATSEQADJ &&
+ ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ goto nla_put_failure;
+ }
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if ((events & IPCT_MARK || ct->mark)
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nla_put_failure;
+#endif
+
+ nlh->nlmsg_len = skb->tail - b;
+ nfnetlink_send(skb, 0, group, 0);
+ return NOTIFY_DONE;
+
+nlmsg_failure:
+nla_put_failure:
+ kfree_skb(skb);
+ return NOTIFY_DONE;
+}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+ if (cb->args[1])
+ nf_ct_put((struct nf_conn *)cb->args[1]);
+ return 0;
+}
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nf_conn *ct, *last;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_node *n;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ u_int8_t l3proto = nfmsg->nfgen_family;
+
+ rcu_read_lock();
+ last = (struct nf_conn *)cb->args[1];
+ for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+restart:
+ hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
+ hnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* Dump entries of a given L3 protocol number.
+ * If it is not specified, ie. l3proto == 0,
+ * then dump everything. */
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
+ continue;
+ if (cb->args[1]) {
+ if (ct != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ IPCTNL_MSG_CT_NEW,
+ 1, ct) < 0) {
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ continue;
+ cb->args[1] = (unsigned long)ct;
+ goto out;
+ }
+
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+ IPCTNL_MSG_CT_GET_CTRZERO) {
+ struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct)
+ memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
+ }
+ }
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ }
+ }
+out:
+ rcu_read_unlock();
+ if (last)
+ nf_ct_put(last);
+
+ return skb->len;
+}
+
+static inline int
+ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
+{
+ struct nlattr *tb[CTA_IP_MAX+1];
+ struct nf_conntrack_l3proto *l3proto;
+ int ret = 0;
+
+ nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+
+ l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+
+ if (likely(l3proto->nlattr_to_tuple)) {
+ ret = nla_validate_nested(attr, CTA_IP_MAX,
+ l3proto->nla_policy);
+ if (ret == 0)
+ ret = l3proto->nlattr_to_tuple(tb, tuple);
+ }
+
+ nf_ct_l3proto_put(l3proto);
+
+ return ret;
+}
+
+static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_NUM] = { .type = NLA_U8 },
+};
+
+static inline int
+ctnetlink_parse_tuple_proto(struct nlattr *attr,
+ struct nf_conntrack_tuple *tuple)
+{
+ struct nlattr *tb[CTA_PROTO_MAX+1];
+ struct nf_conntrack_l4proto *l4proto;
+ int ret = 0;
+
+ ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[CTA_PROTO_NUM])
+ return -EINVAL;
+ tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
+
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+
+ if (likely(l4proto->nlattr_to_tuple)) {
+ ret = nla_validate_nested(attr, CTA_PROTO_MAX,
+ l4proto->nla_policy);
+ if (ret == 0)
+ ret = l4proto->nlattr_to_tuple(tb, tuple);
+ }
+
+ nf_ct_l4proto_put(l4proto);
+
+ return ret;
+}
+
+static int
+ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
+ enum ctattr_tuple type, u_int8_t l3num)
+{
+ struct nlattr *tb[CTA_TUPLE_MAX+1];
+ int err;
+
+ memset(tuple, 0, sizeof(*tuple));
+
+ nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL);
+
+ if (!tb[CTA_TUPLE_IP])
+ return -EINVAL;
+
+ tuple->src.l3num = l3num;
+
+ err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple);
+ if (err < 0)
+ return err;
+
+ if (!tb[CTA_TUPLE_PROTO])
+ return -EINVAL;
+
+ err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple);
+ if (err < 0)
+ return err;
+
+ /* orig and expect tuples get DIR_ORIGINAL */
+ if (type == CTA_TUPLE_REPLY)
+ tuple->dst.dir = IP_CT_DIR_REPLY;
+ else
+ tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+ return 0;
+}
+
+static inline int
+ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
+{
+ struct nlattr *tb[CTA_HELP_MAX+1];
+
+ nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL);
+
+ if (!tb[CTA_HELP_NAME])
+ return -EINVAL;
+
+ *helper_name = nla_data(tb[CTA_HELP_NAME]);
+
+ return 0;
+}
+
+static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+ [CTA_STATUS] = { .type = NLA_U32 },
+ [CTA_TIMEOUT] = { .type = NLA_U32 },
+ [CTA_MARK] = { .type = NLA_U32 },
+ [CTA_USE] = { .type = NLA_U32 },
+ [CTA_ID] = { .type = NLA_U32 },
+};
+
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ int err = 0;
+
+ if (cda[CTA_TUPLE_ORIG])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ else if (cda[CTA_TUPLE_REPLY])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ else {
+ /* Flush the whole table */
+ nf_conntrack_flush(&init_net);
+ return 0;
+ }
+
+ if (err < 0)
+ return err;
+
+ h = nf_conntrack_find_get(&init_net, &tuple);
+ if (!h)
+ return -ENOENT;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (cda[CTA_ID]) {
+ u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
+ if (id != (u32)(unsigned long)ct) {
+ nf_ct_put(ct);
+ return -ENOENT;
+ }
+ }
+
+ nf_ct_kill(ct);
+ nf_ct_put(ct);
+
+ return 0;
+}
+
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
+ struct sk_buff *skb2 = NULL;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ int err = 0;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP)
+ return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+ ctnetlink_done);
+
+ if (cda[CTA_TUPLE_ORIG])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ else if (cda[CTA_TUPLE_REPLY])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ else
+ return -EINVAL;
+
+ if (err < 0)
+ return err;
+
+ h = nf_conntrack_find_get(&init_net, &tuple);
+ if (!h)
+ return -ENOENT;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ err = -ENOMEM;
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2) {
+ nf_ct_put(ct);
+ return -ENOMEM;
+ }
+
+ err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
+ IPCTNL_MSG_CT_NEW, 1, ct);
+ nf_ct_put(ct);
+ if (err <= 0)
+ goto free;
+
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ if (err < 0)
+ goto out;
+
+ return 0;
+
+free:
+ kfree_skb(skb2);
+out:
+ return err;
+}
+
+#ifdef CONFIG_NF_NAT_NEEDED
+static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+
+ parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
+ if (!parse_nat_setup) {
+#ifdef CONFIG_MODULES
+ rcu_read_unlock();
+ spin_unlock_bh(&nf_conntrack_lock);
+ nfnl_unlock();
+ if (request_module("nf-nat-ipv4") < 0) {
+ nfnl_lock();
+ spin_lock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
+ return -EOPNOTSUPP;
+ }
+ nfnl_lock();
+ spin_lock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
+ if (nfnetlink_parse_nat_setup_hook)
+ return -EAGAIN;
+#endif
+ return -EOPNOTSUPP;
+ }
+
+ return parse_nat_setup(ct, manip, attr);
+}
+#endif
+
+static int
+ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
+{
+ unsigned long d;
+ unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
+ d = ct->status ^ status;
+
+ if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+ /* unchangeable */
+ return -EBUSY;
+
+ if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+ /* SEEN_REPLY bit can only be set */
+ return -EBUSY;
+
+ if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+ /* ASSURED bit can only be set */
+ return -EBUSY;
+
+ /* Be careful here, modifying NAT bits can screw up things,
+ * so don't let users modify them directly if they don't pass
+ * nf_nat_range. */
+ ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+ return 0;
+}
+
+static int
+ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
+{
+#ifdef CONFIG_NF_NAT_NEEDED
+ int ret;
+
+ if (cda[CTA_NAT_DST]) {
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_DST,
+ cda[CTA_NAT_DST]);
+ if (ret < 0)
+ return ret;
+ }
+ if (cda[CTA_NAT_SRC]) {
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static inline int
+ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help = nfct_help(ct);
+ char *helpname;
+ int err;
+
+ /* don't change helper of sibling connections */
+ if (ct->master)
+ return -EBUSY;
+
+ err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
+ if (err < 0)
+ return err;
+
+ if (!strcmp(helpname, "")) {
+ if (help && help->helper) {
+ /* we had a helper before ... */
+ nf_ct_remove_expectations(ct);
+ rcu_assign_pointer(help->helper, NULL);
+ }
+
+ return 0;
+ }
+
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper == NULL)
+ return -EOPNOTSUPP;
+
+ if (help) {
+ if (help->helper == helper)
+ return 0;
+ if (help->helper)
+ return -EBUSY;
+ /* need to zero data of old helper */
+ memset(&help->help, 0, sizeof(help->help));
+ } else {
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help == NULL)
+ return -ENOMEM;
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+
+ return 0;
+}
+
+static inline int
+ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
+{
+ u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+
+ if (!del_timer(&ct->timeout))
+ return -ETIME;
+
+ ct->timeout.expires = jiffies + timeout * HZ;
+ add_timer(&ct->timeout);
+
+ return 0;
+}
+
+static inline int
+ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
+{
+ struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO];
+ struct nf_conntrack_l4proto *l4proto;
+ int err = 0;
+
+ nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
+
+ l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->from_nlattr)
+ err = l4proto->from_nlattr(tb, ct);
+ nf_ct_l4proto_put(l4proto);
+
+ return err;
+}
+
+#ifdef CONFIG_NF_NAT_NEEDED
+static inline int
+change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
+{
+ struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
+
+ nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
+
+ if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
+ return -EINVAL;
+
+ natseq->correction_pos =
+ ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+
+ if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
+ return -EINVAL;
+
+ natseq->offset_before =
+ ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+
+ if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
+ return -EINVAL;
+
+ natseq->offset_after =
+ ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+
+ return 0;
+}
+
+static int
+ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
+{
+ int ret = 0;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+
+ if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
+ ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
+ cda[CTA_NAT_SEQ_ADJ_ORIG]);
+ if (ret < 0)
+ return ret;
+
+ ct->status |= IPS_SEQ_ADJUST;
+ }
+
+ if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
+ ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
+ cda[CTA_NAT_SEQ_ADJ_REPLY]);
+ if (ret < 0)
+ return ret;
+
+ ct->status |= IPS_SEQ_ADJUST;
+ }
+
+ return 0;
+}
+#endif
+
+static int
+ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
+{
+ int err;
+
+ if (cda[CTA_HELP]) {
+ err = ctnetlink_change_helper(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
+ if (cda[CTA_TIMEOUT]) {
+ err = ctnetlink_change_timeout(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
+ if (cda[CTA_STATUS]) {
+ err = ctnetlink_change_status(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
+ if (cda[CTA_PROTOINFO]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ if (cda[CTA_MARK])
+ ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+#endif
+
+#ifdef CONFIG_NF_NAT_NEEDED
+ if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
+ err = ctnetlink_change_nat_seq_adj(ct, cda);
+ if (err < 0)
+ return err;
+ }
+#endif
+
+ return 0;
+}
+
+static int
+ctnetlink_create_conntrack(struct nlattr *cda[],
+ struct nf_conntrack_tuple *otuple,
+ struct nf_conntrack_tuple *rtuple,
+ struct nf_conn *master_ct)
+{
+ struct nf_conn *ct;
+ int err = -EINVAL;
+ struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
+
+ ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
+ if (ct == NULL || IS_ERR(ct))
+ return -ENOMEM;
+
+ if (!cda[CTA_TIMEOUT])
+ goto err;
+ ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+
+ ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+ ct->status |= IPS_CONFIRMED;
+
+ rcu_read_lock();
+ helper = __nf_ct_helper_find(rtuple);
+ if (helper) {
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help == NULL) {
+ rcu_read_unlock();
+ err = -ENOMEM;
+ goto err;
+ }
+ /* not in hash table yet so not strictly necessary */
+ rcu_assign_pointer(help->helper, helper);
+ }
+
+ if (cda[CTA_STATUS]) {
+ err = ctnetlink_change_status(ct, cda);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+ }
+
+ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+ err = ctnetlink_change_nat(ct, cda);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+ }
+
+ if (cda[CTA_PROTOINFO]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+ }
+
+ nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ if (cda[CTA_MARK])
+ ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+#endif
+
+ /* setup master conntrack: this is a confirmed expectation */
+ if (master_ct) {
+ __set_bit(IPS_EXPECTED_BIT, &ct->status);
+ ct->master = master_ct;
+ }
+
+ add_timer(&ct->timeout);
+ nf_conntrack_hash_insert(ct);
+ rcu_read_unlock();
+
+ return 0;
+
+err:
+ nf_conntrack_free(ct);
+ return err;
+}
+
+static int
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_tuple otuple, rtuple;
+ struct nf_conntrack_tuple_hash *h = NULL;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ int err = 0;
+
+ if (cda[CTA_TUPLE_ORIG]) {
+ err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+ if (err < 0)
+ return err;
+ }
+
+ if (cda[CTA_TUPLE_REPLY]) {
+ err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+ if (err < 0)
+ return err;
+ }
+
+ spin_lock_bh(&nf_conntrack_lock);
+ if (cda[CTA_TUPLE_ORIG])
+ h = __nf_conntrack_find(&init_net, &otuple);
+ else if (cda[CTA_TUPLE_REPLY])
+ h = __nf_conntrack_find(&init_net, &rtuple);
+
+ if (h == NULL) {
+ struct nf_conntrack_tuple master;
+ struct nf_conntrack_tuple_hash *master_h = NULL;
+ struct nf_conn *master_ct = NULL;
+
+ if (cda[CTA_TUPLE_MASTER]) {
+ err = ctnetlink_parse_tuple(cda,
+ &master,
+ CTA_TUPLE_MASTER,
+ u3);
+ if (err < 0)
+ goto out_unlock;
+
+ master_h = __nf_conntrack_find(&init_net, &master);
+ if (master_h == NULL) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ master_ct = nf_ct_tuplehash_to_ctrack(master_h);
+ atomic_inc(&master_ct->ct_general.use);
+ }
+
+ err = -ENOENT;
+ if (nlh->nlmsg_flags & NLM_F_CREATE)
+ err = ctnetlink_create_conntrack(cda,
+ &otuple,
+ &rtuple,
+ master_ct);
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ if (err < 0 && master_ct)
+ nf_ct_put(master_ct);
+
+ return err;
+ }
+ /* implicit 'else' */
+
+ /* We manipulate the conntrack inside the global conntrack table lock,
+ * so there's no need to increase the refcount */
+ err = -EEXIST;
+ if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+ /* we only allow nat config for new conntracks */
+ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+ /* can't link an existing conntrack to a master */
+ if (cda[CTA_TUPLE_MASTER]) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+ err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
+ cda);
+ }
+
+out_unlock:
+ spin_unlock_bh(&nf_conntrack_lock);
+ return err;
+}
+
+/***********************************************************************
+ * EXPECT
+ ***********************************************************************/
+
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple,
+ enum ctattr_expect type)
+{
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ if (ctnetlink_dump_tuples(skb, tuple) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
+ctnetlink_exp_dump_mask(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple_mask *mask)
+{
+ int ret;
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ struct nf_conntrack_tuple m;
+ struct nlattr *nest_parms;
+
+ memset(&m, 0xFF, sizeof(m));
+ m.src.u.all = mask->src.u.all;
+ memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
+
+ nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+ ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
+ nf_ct_l3proto_put(l3proto);
+
+ if (unlikely(ret < 0))
+ goto nla_put_failure;
+
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
+ nf_ct_l4proto_put(l4proto);
+ if (unlikely(ret < 0))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+ const struct nf_conntrack_expect *exp)
+{
+ struct nf_conn *master = exp->master;
+ long timeout = (exp->timeout.expires - jiffies) / HZ;
+
+ if (timeout < 0)
+ timeout = 0;
+
+ if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+ goto nla_put_failure;
+ if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
+ goto nla_put_failure;
+ if (ctnetlink_exp_dump_tuple(skb,
+ &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ CTA_EXPECT_MASTER) < 0)
+ goto nla_put_failure;
+
+ NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
+ NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ int event,
+ int nowait,
+ const struct nf_conntrack_expect *exp)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned char *b = skb_tail_pointer(skb);
+
+ event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+
+ nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg->nfgen_family = exp->tuple.src.l3num;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+ goto nla_put_failure;
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+ unsigned long events, void *ptr)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+ struct sk_buff *skb;
+ unsigned int type;
+ sk_buff_data_t b;
+ int flags = 0;
+
+ if (events & IPEXP_NEW) {
+ type = IPCTNL_MSG_EXP_NEW;
+ flags = NLM_F_CREATE|NLM_F_EXCL;
+ } else
+ return NOTIFY_DONE;
+
+ if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+ return NOTIFY_DONE;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ return NOTIFY_DONE;
+
+ b = skb->tail;
+
+ type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+ nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+
+ nlh->nlmsg_flags = flags;
+ nfmsg->nfgen_family = exp->tuple.src.l3num;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+ goto nla_put_failure;
+
+ nlh->nlmsg_len = skb->tail - b;
+ nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+ return NOTIFY_DONE;
+
+nlmsg_failure:
+nla_put_failure:
+ kfree_skb(skb);
+ return NOTIFY_DONE;
+}
+#endif
+static int ctnetlink_exp_done(struct netlink_callback *cb)
+{
+ if (cb->args[1])
+ nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]);
+ return 0;
+}
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = &init_net;
+ struct nf_conntrack_expect *exp, *last;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ struct hlist_node *n;
+ u_int8_t l3proto = nfmsg->nfgen_family;
+
+ rcu_read_lock();
+ last = (struct nf_conntrack_expect *)cb->args[1];
+ for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
+restart:
+ hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
+ hnode) {
+ if (l3proto && exp->tuple.src.l3num != l3proto)
+ continue;
+ if (cb->args[1]) {
+ if (exp != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ IPCTNL_MSG_EXP_NEW,
+ 1, exp) < 0) {
+ if (!atomic_inc_not_zero(&exp->use))
+ continue;
+ cb->args[1] = (unsigned long)exp;
+ goto out;
+ }
+ }
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ }
+ }
+out:
+ rcu_read_unlock();
+ if (last)
+ nf_ct_expect_put(last);
+
+ return skb->len;
+}
+
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+ [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
+ [CTA_EXPECT_ID] = { .type = NLA_U32 },
+};
+
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
+ struct sk_buff *skb2;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ int err = 0;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ return netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_exp_dump_table,
+ ctnetlink_exp_done);
+ }
+
+ if (cda[CTA_EXPECT_MASTER])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ else
+ return -EINVAL;
+
+ if (err < 0)
+ return err;
+
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
+ if (!exp)
+ return -ENOENT;
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+ if (ntohl(id) != (u32)(unsigned long)exp) {
+ nf_ct_expect_put(exp);
+ return -ENOENT;
+ }
+ }
+
+ err = -ENOMEM;
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ goto out;
+
+ err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
+ nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+ 1, exp);
+ if (err <= 0)
+ goto free;
+
+ nf_ct_expect_put(exp);
+
+ return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+free:
+ kfree_skb(skb2);
+out:
+ nf_ct_expect_put(exp);
+ return err;
+}
+
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_helper *h;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct hlist_node *n, *next;
+ u_int8_t u3 = nfmsg->nfgen_family;
+ unsigned int i;
+ int err;
+
+ if (cda[CTA_EXPECT_TUPLE]) {
+ /* delete a single expect by tuple */
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+
+ /* bump usage count to 2 */
+ exp = nf_ct_expect_find_get(&init_net, &tuple);
+ if (!exp)
+ return -ENOENT;
+
+ if (cda[CTA_EXPECT_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
+ if (ntohl(id) != (u32)(unsigned long)exp) {
+ nf_ct_expect_put(exp);
+ return -ENOENT;
+ }
+ }
+
+ /* after list removal, usage count == 1 */
+ nf_ct_unexpect_related(exp);
+ /* have to put what we 'get' above.
+ * after this line usage count == 0 */
+ nf_ct_expect_put(exp);
+ } else if (cda[CTA_EXPECT_HELP_NAME]) {
+ char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+ struct nf_conn_help *m_help;
+
+ /* delete all expectations for this helper */
+ spin_lock_bh(&nf_conntrack_lock);
+ h = __nf_conntrack_helper_find_byname(name);
+ if (!h) {
+ spin_unlock_bh(&nf_conntrack_lock);
+ return -EOPNOTSUPP;
+ }
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, n, next,
+ &init_net.ct.expect_hash[i],
+ hnode) {
+ m_help = nfct_help(exp->master);
+ if (m_help->helper == h
+ && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ } else {
+ /* This basically means we have to flush everything*/
+ spin_lock_bh(&nf_conntrack_lock);
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, n, next,
+ &init_net.ct.expect_hash[i],
+ hnode) {
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ }
+
+ return 0;
+}
+static int
+ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[])
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
+{
+ struct nf_conntrack_tuple tuple, mask, master_tuple;
+ struct nf_conntrack_tuple_hash *h = NULL;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *ct;
+ struct nf_conn_help *help;
+ int err = 0;
+
+ /* caller guarantees that those three CTA_EXPECT_* exist */
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ if (err < 0)
+ return err;
+
+ /* Look for master conntrack of this expectation */
+ h = nf_conntrack_find_get(&init_net, &master_tuple);
+ if (!h)
+ return -ENOENT;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ help = nfct_help(ct);
+
+ if (!help || !help->helper) {
+ /* such conntrack hasn't got any helper, abort */
+ err = -EINVAL;
+ goto out;
+ }
+
+ exp = nf_ct_expect_alloc(ct);
+ if (!exp) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ exp->expectfn = NULL;
+ exp->flags = 0;
+ exp->master = ct;
+ exp->helper = NULL;
+ memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
+ memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
+ exp->mask.src.u.all = mask.src.u.all;
+
+ err = nf_ct_expect_related(exp);
+ nf_ct_expect_put(exp);
+
+out:
+ nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+ return err;
+}
+
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *cda[])
+{
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ int err = 0;
+
+ if (!cda[CTA_EXPECT_TUPLE]
+ || !cda[CTA_EXPECT_MASK]
+ || !cda[CTA_EXPECT_MASTER])
+ return -EINVAL;
+
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ exp = __nf_ct_expect_find(&init_net, &tuple);
+
+ if (!exp) {
+ spin_unlock_bh(&nf_conntrack_lock);
+ err = -ENOENT;
+ if (nlh->nlmsg_flags & NLM_F_CREATE)
+ err = ctnetlink_create_expect(cda, u3);
+ return err;
+ }
+
+ err = -EEXIST;
+ if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+ err = ctnetlink_change_expect(exp, cda);
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ return err;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+ .notifier_call = ctnetlink_conntrack_event,
+};
+
+static struct notifier_block ctnl_notifier_exp = {
+ .notifier_call = ctnetlink_expect_event,
+};
+#endif
+
+static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+ [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy },
+ [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy },
+ [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy },
+ [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy },
+};
+
+static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+ [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy },
+ [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy },
+ [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy },
+};
+
+static const struct nfnetlink_subsystem ctnl_subsys = {
+ .name = "conntrack",
+ .subsys_id = NFNL_SUBSYS_CTNETLINK,
+ .cb_count = IPCTNL_MSG_MAX,
+ .cb = ctnl_cb,
+};
+
+static const struct nfnetlink_subsystem ctnl_exp_subsys = {
+ .name = "conntrack_expect",
+ .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
+ .cb_count = IPCTNL_MSG_EXP_MAX,
+ .cb = ctnl_exp_cb,
+};
+
+MODULE_ALIAS("ip_conntrack_netlink");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
+
+static int __init ctnetlink_init(void)
+{
+ int ret;
+
+ printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+ ret = nfnetlink_subsys_register(&ctnl_subsys);
+ if (ret < 0) {
+ printk("ctnetlink_init: cannot register with nfnetlink.\n");
+ goto err_out;
+ }
+
+ ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+ if (ret < 0) {
+ printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+ goto err_unreg_subsys;
+ }
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ ret = nf_conntrack_register_notifier(&ctnl_notifier);
+ if (ret < 0) {
+ printk("ctnetlink_init: cannot register notifier.\n");
+ goto err_unreg_exp_subsys;
+ }
+
+ ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
+ if (ret < 0) {
+ printk("ctnetlink_init: cannot expect register notifier.\n");
+ goto err_unreg_notifier;
+ }
+#endif
+
+ return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+ nf_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+ nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+ nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+ return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+ printk("ctnetlink: unregistering from nfnetlink.\n");
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
+ nf_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+
+ nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+ nfnetlink_subsys_unregister(&ctnl_subsys);
+ return;
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
new file mode 100644
index 0000000..1bc3001
--- /dev/null
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -0,0 +1,629 @@
+/*
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * - We blindly assume that control connections are always
+ * established in PNS->PAC direction. This is a violation
+ * of RFFC2673
+ * - We can only support one single call within each session
+ * TODO:
+ * - testing of incoming PPTP calls
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+MODULE_ALIAS("ip_conntrack_pptp");
+
+static DEFINE_SPINLOCK(nf_pptp_lock);
+
+int
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
+
+int
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
+
+void
+(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre);
+
+void
+(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
+
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+/* PptpControlMessageType names */
+const char *const pptp_msg_name[] = {
+ "UNKNOWN_MESSAGE",
+ "START_SESSION_REQUEST",
+ "START_SESSION_REPLY",
+ "STOP_SESSION_REQUEST",
+ "STOP_SESSION_REPLY",
+ "ECHO_REQUEST",
+ "ECHO_REPLY",
+ "OUT_CALL_REQUEST",
+ "OUT_CALL_REPLY",
+ "IN_CALL_REQUEST",
+ "IN_CALL_REPLY",
+ "IN_CALL_CONNECT",
+ "CALL_CLEAR_REQUEST",
+ "CALL_DISCONNECT_NOTIFY",
+ "WAN_ERROR_NOTIFY",
+ "SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT (10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
+
+static void pptp_expectfn(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct net *net = nf_ct_net(ct);
+ typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
+ pr_debug("increasing timeouts\n");
+
+ /* increase timeout of GRE data channel conntrack entry */
+ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+ ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+ /* Can you see how rusty this code is, compared with the pre-2.6.11
+ * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+ rcu_read_lock();
+ nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
+ if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
+ nf_nat_pptp_expectfn(ct, exp);
+ else {
+ struct nf_conntrack_tuple inv_t;
+ struct nf_conntrack_expect *exp_other;
+
+ /* obviously this tuple inversion only works until you do NAT */
+ nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
+ pr_debug("trying to unexpect other dir: ");
+ nf_ct_dump_tuple(&inv_t);
+
+ exp_other = nf_ct_expect_find_get(net, &inv_t);
+ if (exp_other) {
+ /* delete other expectation. */
+ pr_debug("found\n");
+ nf_ct_unexpect_related(exp_other);
+ nf_ct_expect_put(exp_other);
+ } else {
+ pr_debug("not found\n");
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int destroy_sibling_or_exp(struct net *net,
+ const struct nf_conntrack_tuple *t)
+{
+ const struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *sibling;
+
+ pr_debug("trying to timeout ct or exp for tuple ");
+ nf_ct_dump_tuple(t);
+
+ h = nf_conntrack_find_get(net, t);
+ if (h) {
+ sibling = nf_ct_tuplehash_to_ctrack(h);
+ pr_debug("setting timeout of conntrack %p to 0\n", sibling);
+ sibling->proto.gre.timeout = 0;
+ sibling->proto.gre.stream_timeout = 0;
+ if (del_timer(&sibling->timeout))
+ sibling->timeout.function((unsigned long)sibling);
+ nf_ct_put(sibling);
+ return 1;
+ } else {
+ exp = nf_ct_expect_find_get(net, t);
+ if (exp) {
+ pr_debug("unexpect_related of expect %p\n", exp);
+ nf_ct_unexpect_related(exp);
+ nf_ct_expect_put(exp);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_tuple t;
+
+ nf_ct_gre_keymap_destroy(ct);
+
+ /* try original (pns->pac) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
+ t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
+ if (!destroy_sibling_or_exp(net, &t))
+ pr_debug("failed to timeout original pns->pac ct/exp\n");
+
+ /* try reply (pac->pns) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
+ t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
+ if (!destroy_sibling_or_exp(net, &t))
+ pr_debug("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
+{
+ struct nf_conntrack_expect *exp_orig, *exp_reply;
+ enum ip_conntrack_dir dir;
+ int ret = 1;
+ typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre;
+
+ exp_orig = nf_ct_expect_alloc(ct);
+ if (exp_orig == NULL)
+ goto out;
+
+ exp_reply = nf_ct_expect_alloc(ct);
+ if (exp_reply == NULL)
+ goto out_put_orig;
+
+ /* original direction, PNS->PAC */
+ dir = IP_CT_DIR_ORIGINAL;
+ nf_ct_expect_init(exp_orig, NF_CT_EXPECT_CLASS_DEFAULT,
+ nf_ct_l3num(ct),
+ &ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[dir].tuple.dst.u3,
+ IPPROTO_GRE, &peer_callid, &callid);
+ exp_orig->expectfn = pptp_expectfn;
+
+ /* reply direction, PAC->PNS */
+ dir = IP_CT_DIR_REPLY;
+ nf_ct_expect_init(exp_reply, NF_CT_EXPECT_CLASS_DEFAULT,
+ nf_ct_l3num(ct),
+ &ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[dir].tuple.dst.u3,
+ IPPROTO_GRE, &callid, &peer_callid);
+ exp_reply->expectfn = pptp_expectfn;
+
+ nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
+ if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
+ nf_nat_pptp_exp_gre(exp_orig, exp_reply);
+ if (nf_ct_expect_related(exp_orig) != 0)
+ goto out_put_both;
+ if (nf_ct_expect_related(exp_reply) != 0)
+ goto out_unexpect_orig;
+
+ /* Add GRE keymap entries */
+ if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0)
+ goto out_unexpect_both;
+ if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) {
+ nf_ct_gre_keymap_destroy(ct);
+ goto out_unexpect_both;
+ }
+ ret = 0;
+
+out_put_both:
+ nf_ct_expect_put(exp_reply);
+out_put_orig:
+ nf_ct_expect_put(exp_orig);
+out:
+ return ret;
+
+out_unexpect_both:
+ nf_ct_unexpect_related(exp_reply);
+out_unexpect_orig:
+ nf_ct_unexpect_related(exp_orig);
+ goto out_put_both;
+}
+
+static inline int
+pptp_inbound_pkt(struct sk_buff *skb,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 cid = 0, pcid = 0;
+ typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
+
+ msg = ntohs(ctlh->messageType);
+ pr_debug("inbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REPLY:
+ /* server confirms new control session */
+ if (info->sstate < PPTP_SESSION_REQUESTED)
+ goto invalid;
+ if (pptpReq->srep.resultCode == PPTP_START_OK)
+ info->sstate = PPTP_SESSION_CONFIRMED;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_STOP_SESSION_REPLY:
+ /* server confirms end of control session */
+ if (info->sstate > PPTP_SESSION_STOPREQ)
+ goto invalid;
+ if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+ info->sstate = PPTP_SESSION_NONE;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_OUT_CALL_REPLY:
+ /* server accepted call, we now expect GRE frames */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_OUT_REQ &&
+ info->cstate != PPTP_CALL_OUT_CONF)
+ goto invalid;
+
+ cid = pptpReq->ocack.callID;
+ pcid = pptpReq->ocack.peersCallID;
+ if (info->pns_call_id != pcid)
+ goto invalid;
+ pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+ info->cstate = PPTP_CALL_OUT_CONF;
+ info->pac_call_id = cid;
+ exp_gre(ct, cid, pcid);
+ } else
+ info->cstate = PPTP_CALL_NONE;
+ break;
+
+ case PPTP_IN_CALL_REQUEST:
+ /* server tells us about incoming call request */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+
+ cid = pptpReq->icreq.callID;
+ pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->cstate = PPTP_CALL_IN_REQ;
+ info->pac_call_id = cid;
+ break;
+
+ case PPTP_IN_CALL_CONNECT:
+ /* server tells us about incoming call established */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_IN_REP &&
+ info->cstate != PPTP_CALL_IN_CONF)
+ goto invalid;
+
+ pcid = pptpReq->iccon.peersCallID;
+ cid = info->pac_call_id;
+
+ if (info->pns_call_id != pcid)
+ goto invalid;
+
+ pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+ info->cstate = PPTP_CALL_IN_CONF;
+
+ /* we expect a GRE connection from PAC to PNS */
+ exp_gre(ct, cid, pcid);
+ break;
+
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ /* server confirms disconnect */
+ cid = pptpReq->disc.callID;
+ pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->cstate = PPTP_CALL_NONE;
+
+ /* untrack this call id, unexpect GRE packets */
+ pptp_destroy_siblings(ct);
+ break;
+
+ case PPTP_WAN_ERROR_NOTIFY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
+ if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
+ return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
+ return NF_ACCEPT;
+
+invalid:
+ pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff *skb,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 cid = 0, pcid = 0;
+ typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
+
+ msg = ntohs(ctlh->messageType);
+ pr_debug("outbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REQUEST:
+ /* client requests for new control session */
+ if (info->sstate != PPTP_SESSION_NONE)
+ goto invalid;
+ info->sstate = PPTP_SESSION_REQUESTED;
+ break;
+
+ case PPTP_STOP_SESSION_REQUEST:
+ /* client requests end of control session */
+ info->sstate = PPTP_SESSION_STOPREQ;
+ break;
+
+ case PPTP_OUT_CALL_REQUEST:
+ /* client initiating connection to server */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ info->cstate = PPTP_CALL_OUT_REQ;
+ /* track PNS call id */
+ cid = pptpReq->ocreq.callID;
+ pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->pns_call_id = cid;
+ break;
+
+ case PPTP_IN_CALL_REPLY:
+ /* client answers incoming call */
+ if (info->cstate != PPTP_CALL_IN_REQ &&
+ info->cstate != PPTP_CALL_IN_REP)
+ goto invalid;
+
+ cid = pptpReq->icack.callID;
+ pcid = pptpReq->icack.peersCallID;
+ if (info->pac_call_id != pcid)
+ goto invalid;
+ pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+ /* part two of the three-way handshake */
+ info->cstate = PPTP_CALL_IN_REP;
+ info->pns_call_id = cid;
+ } else
+ info->cstate = PPTP_CALL_NONE;
+ break;
+
+ case PPTP_CALL_CLEAR_REQUEST:
+ /* client requests hangup of call */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ /* FUTURE: iterate over all calls and check if
+ * call ID is valid. We don't do this without newnat,
+ * because we only know about last call */
+ info->cstate = PPTP_CALL_CLEAR_REQ;
+ break;
+
+ case PPTP_SET_LINK_INFO:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
+ if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
+ return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
+ return NF_ACCEPT;
+
+invalid:
+ pr_debug("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
+}
+
+static const unsigned int pptp_msg_size[] = {
+ [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
+ [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
+ [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
+ [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
+ [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
+ [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
+ [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
+ [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
+ [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
+ [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
+ [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+ [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
+ [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
+};
+
+/* track caller id inside control connection, call expect_related */
+static int
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+
+{
+ int dir = CTINFO2DIR(ctinfo);
+ const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ const struct tcphdr *tcph;
+ struct tcphdr _tcph;
+ const struct pptp_pkt_hdr *pptph;
+ struct pptp_pkt_hdr _pptph;
+ struct PptpControlHeader _ctlh, *ctlh;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ unsigned int tcplen = skb->len - protoff;
+ unsigned int datalen, reqlen, nexthdr_off;
+ int oldsstate, oldcstate;
+ int ret;
+ u_int16_t msg;
+
+ /* don't do any tracking before tcp handshake complete */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+ return NF_ACCEPT;
+
+ nexthdr_off = protoff;
+ tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
+ BUG_ON(!tcph);
+ nexthdr_off += tcph->doff * 4;
+ datalen = tcplen - tcph->doff * 4;
+
+ pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
+ if (!pptph) {
+ pr_debug("no full PPTP header, can't track\n");
+ return NF_ACCEPT;
+ }
+ nexthdr_off += sizeof(_pptph);
+ datalen -= sizeof(_pptph);
+
+ /* if it's not a control message we can't do anything with it */
+ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+ ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+ pr_debug("not a control packet\n");
+ return NF_ACCEPT;
+ }
+
+ ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh)
+ return NF_ACCEPT;
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ msg = ntohs(ctlh->messageType);
+ if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+ return NF_ACCEPT;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+
+ pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq)
+ return NF_ACCEPT;
+
+ oldsstate = info->sstate;
+ oldcstate = info->cstate;
+
+ spin_lock_bh(&nf_pptp_lock);
+
+ /* FIXME: We just blindly assume that the control connection is always
+ * established from PNS->PAC. However, RFC makes no guarantee */
+ if (dir == IP_CT_DIR_ORIGINAL)
+ /* client -> server (PNS -> PAC) */
+ ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+ ctinfo);
+ else
+ /* server -> client (PAC -> PNS) */
+ ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+ ctinfo);
+ pr_debug("sstate: %d->%d, cstate: %d->%d\n",
+ oldsstate, info->sstate, oldcstate, info->cstate);
+ spin_unlock_bh(&nf_pptp_lock);
+
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy pptp_exp_policy = {
+ .max_expected = 2,
+ .timeout = 5 * 60,
+};
+
+/* control protocol helper */
+static struct nf_conntrack_helper pptp __read_mostly = {
+ .name = "pptp",
+ .me = THIS_MODULE,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .help = conntrack_pptp_help,
+ .destroy = pptp_destroy_siblings,
+ .expect_policy = &pptp_exp_policy,
+};
+
+static void nf_conntrack_pptp_net_exit(struct net *net)
+{
+ nf_ct_gre_keymap_flush(net);
+}
+
+static struct pernet_operations nf_conntrack_pptp_net_ops = {
+ .exit = nf_conntrack_pptp_net_exit,
+};
+
+static int __init nf_conntrack_pptp_init(void)
+{
+ int rv;
+
+ rv = nf_conntrack_helper_register(&pptp);
+ if (rv < 0)
+ return rv;
+ rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
+ if (rv < 0)
+ nf_conntrack_helper_unregister(&pptp);
+ return rv;
+}
+
+static void __exit nf_conntrack_pptp_fini(void)
+{
+ nf_conntrack_helper_unregister(&pptp);
+ unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
+}
+
+module_init(nf_conntrack_pptp_init);
+module_exit(nf_conntrack_pptp_fini);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
new file mode 100644
index 0000000..592d733
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -0,0 +1,370 @@
+/* L3/L4 protocol support for nf_conntrack. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_l3protos);
+
+static DEFINE_MUTEX(nf_ct_proto_mutex);
+
+#ifdef CONFIG_SYSCTL
+static int
+nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_path *path,
+ struct ctl_table *table, unsigned int *users)
+{
+ if (*header == NULL) {
+ *header = register_sysctl_paths(path, table);
+ if (*header == NULL)
+ return -ENOMEM;
+ }
+ if (users != NULL)
+ (*users)++;
+ return 0;
+}
+
+static void
+nf_ct_unregister_sysctl(struct ctl_table_header **header,
+ struct ctl_table *table, unsigned int *users)
+{
+ if (users != NULL && --*users > 0)
+ return;
+
+ unregister_sysctl_table(*header);
+ *header = NULL;
+}
+#endif
+
+struct nf_conntrack_l4proto *
+__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
+{
+ if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
+ return &nf_conntrack_l4proto_generic;
+
+ return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
+}
+EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
+
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct nf_conntrack_l4proto *
+nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto)
+{
+ struct nf_conntrack_l4proto *p;
+
+ rcu_read_lock();
+ p = __nf_ct_l4proto_find(l3proto, l4proto);
+ if (!try_module_get(p->me))
+ p = &nf_conntrack_l4proto_generic;
+ rcu_read_unlock();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
+
+void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
+
+struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto)
+{
+ struct nf_conntrack_l3proto *p;
+
+ rcu_read_lock();
+ p = __nf_ct_l3proto_find(l3proto);
+ if (!try_module_get(p->me))
+ p = &nf_conntrack_l3proto_generic;
+ rcu_read_unlock();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
+
+void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_put);
+
+int
+nf_ct_l3proto_try_module_get(unsigned short l3proto)
+{
+ int ret;
+ struct nf_conntrack_l3proto *p;
+
+retry: p = nf_ct_l3proto_find_get(l3proto);
+ if (p == &nf_conntrack_l3proto_generic) {
+ ret = request_module("nf_conntrack-%d", l3proto);
+ if (!ret)
+ goto retry;
+
+ return -EPROTOTYPE;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
+
+void nf_ct_l3proto_module_put(unsigned short l3proto)
+{
+ struct nf_conntrack_l3proto *p;
+
+ /* rcu_read_lock not necessary since the caller holds a reference */
+ p = __nf_ct_l3proto_find(l3proto);
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
+
+static int kill_l3proto(struct nf_conn *i, void *data)
+{
+ return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+}
+
+static int kill_l4proto(struct nf_conn *i, void *data)
+{
+ struct nf_conntrack_l4proto *l4proto;
+ l4proto = (struct nf_conntrack_l4proto *)data;
+ return nf_ct_protonum(i) == l4proto->l4proto &&
+ nf_ct_l3num(i) == l4proto->l3proto;
+}
+
+static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
+{
+ int err = 0;
+
+#ifdef CONFIG_SYSCTL
+ if (l3proto->ctl_table != NULL) {
+ err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
+ l3proto->ctl_table_path,
+ l3proto->ctl_table, NULL);
+ }
+#endif
+ return err;
+}
+
+static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
+{
+#ifdef CONFIG_SYSCTL
+ if (l3proto->ctl_table_header != NULL)
+ nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
+ l3proto->ctl_table, NULL);
+#endif
+}
+
+int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
+{
+ int ret = 0;
+
+ if (proto->l3proto >= AF_MAX)
+ return -EBUSY;
+
+ mutex_lock(&nf_ct_proto_mutex);
+ if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ ret = nf_ct_l3proto_register_sysctl(proto);
+ if (ret < 0)
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
+
+out_unlock:
+ mutex_unlock(&nf_ct_proto_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
+
+void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+{
+ struct net *net;
+
+ BUG_ON(proto->l3proto >= AF_MAX);
+
+ mutex_lock(&nf_ct_proto_mutex);
+ BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+ rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
+ &nf_conntrack_l3proto_generic);
+ nf_ct_l3proto_unregister_sysctl(proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
+
+ /* Remove all contrack entries for this protocol */
+ rtnl_lock();
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
+
+static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
+{
+ int err = 0;
+
+#ifdef CONFIG_SYSCTL
+ if (l4proto->ctl_table != NULL) {
+ err = nf_ct_register_sysctl(l4proto->ctl_table_header,
+ nf_net_netfilter_sysctl_path,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+ if (err < 0)
+ goto out;
+ }
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ if (l4proto->ctl_compat_table != NULL) {
+ err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header,
+ nf_net_ipv4_netfilter_sysctl_path,
+ l4proto->ctl_compat_table, NULL);
+ if (err == 0)
+ goto out;
+ nf_ct_unregister_sysctl(l4proto->ctl_table_header,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+ }
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+out:
+#endif /* CONFIG_SYSCTL */
+ return err;
+}
+
+static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
+{
+#ifdef CONFIG_SYSCTL
+ if (l4proto->ctl_table_header != NULL &&
+ *l4proto->ctl_table_header != NULL)
+ nf_ct_unregister_sysctl(l4proto->ctl_table_header,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ if (l4proto->ctl_compat_table_header != NULL)
+ nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
+ l4proto->ctl_compat_table, NULL);
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+}
+
+/* FIXME: Allow NULL functions and sub in pointers to generic for
+ them. --RR */
+int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
+{
+ int ret = 0;
+
+ if (l4proto->l3proto >= PF_MAX)
+ return -EBUSY;
+
+ mutex_lock(&nf_ct_proto_mutex);
+ if (!nf_ct_protos[l4proto->l3proto]) {
+ /* l3proto may be loaded latter. */
+ struct nf_conntrack_l4proto **proto_array;
+ int i;
+
+ proto_array = kmalloc(MAX_NF_CT_PROTO *
+ sizeof(struct nf_conntrack_l4proto *),
+ GFP_KERNEL);
+ if (proto_array == NULL) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ for (i = 0; i < MAX_NF_CT_PROTO; i++)
+ proto_array[i] = &nf_conntrack_l4proto_generic;
+ nf_ct_protos[l4proto->l3proto] = proto_array;
+ } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
+ &nf_conntrack_l4proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ ret = nf_ct_l4proto_register_sysctl(l4proto);
+ if (ret < 0)
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ l4proto);
+
+out_unlock:
+ mutex_unlock(&nf_ct_proto_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
+
+void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
+{
+ struct net *net;
+
+ BUG_ON(l4proto->l3proto >= PF_MAX);
+
+ mutex_lock(&nf_ct_proto_mutex);
+ BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+ rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ &nf_conntrack_l4proto_generic);
+ nf_ct_l4proto_unregister_sysctl(l4proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
+
+ /* Remove all contrack entries for this protocol */
+ rtnl_lock();
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+ unsigned int i;
+ int err;
+
+ err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < AF_MAX; i++)
+ rcu_assign_pointer(nf_ct_l3protos[i],
+ &nf_conntrack_l3proto_generic);
+ return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+ unsigned int i;
+
+ nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
+
+ /* free l3proto protocol tables */
+ for (i = 0; i < PF_MAX; i++)
+ kfree(nf_ct_protos[i]);
+}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
new file mode 100644
index 0000000..8fcf176
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -0,0 +1,816 @@
+/*
+ * DCCP connection tracking protocol helper
+ *
+ * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/dccp.h>
+
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_log.h>
+
+static DEFINE_RWLOCK(dccp_lock);
+
+static int nf_ct_dccp_loose __read_mostly = 1;
+
+/* Timeouts are based on values from RFC4340:
+ *
+ * - REQUEST:
+ *
+ * 8.1.2. Client Request
+ *
+ * A client MAY give up on its DCCP-Requests after some time
+ * (3 minutes, for example).
+ *
+ * - RESPOND:
+ *
+ * 8.1.3. Server Response
+ *
+ * It MAY also leave the RESPOND state for CLOSED after a timeout of
+ * not less than 4MSL (8 minutes);
+ *
+ * - PARTOPEN:
+ *
+ * 8.1.5. Handshake Completion
+ *
+ * If the client remains in PARTOPEN for more than 4MSL (8 minutes),
+ * it SHOULD reset the connection with Reset Code 2, "Aborted".
+ *
+ * - OPEN:
+ *
+ * The DCCP timestamp overflows after 11.9 hours. If the connection
+ * stays idle this long the sequence number won't be recognized
+ * as valid anymore.
+ *
+ * - CLOSEREQ/CLOSING:
+ *
+ * 8.3. Termination
+ *
+ * The retransmission timer should initially be set to go off in two
+ * round-trip times and should back off to not less than once every
+ * 64 seconds ...
+ *
+ * - TIMEWAIT:
+ *
+ * 4.3. States
+ *
+ * A server or client socket remains in this state for 2MSL (4 minutes)
+ * after the connection has been town down, ...
+ */
+
+#define DCCP_MSL (2 * 60 * HZ)
+
+static unsigned int dccp_timeout[CT_DCCP_MAX + 1] __read_mostly = {
+ [CT_DCCP_REQUEST] = 2 * DCCP_MSL,
+ [CT_DCCP_RESPOND] = 4 * DCCP_MSL,
+ [CT_DCCP_PARTOPEN] = 4 * DCCP_MSL,
+ [CT_DCCP_OPEN] = 12 * 3600 * HZ,
+ [CT_DCCP_CLOSEREQ] = 64 * HZ,
+ [CT_DCCP_CLOSING] = 64 * HZ,
+ [CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL,
+};
+
+static const char * const dccp_state_names[] = {
+ [CT_DCCP_NONE] = "NONE",
+ [CT_DCCP_REQUEST] = "REQUEST",
+ [CT_DCCP_RESPOND] = "RESPOND",
+ [CT_DCCP_PARTOPEN] = "PARTOPEN",
+ [CT_DCCP_OPEN] = "OPEN",
+ [CT_DCCP_CLOSEREQ] = "CLOSEREQ",
+ [CT_DCCP_CLOSING] = "CLOSING",
+ [CT_DCCP_TIMEWAIT] = "TIMEWAIT",
+ [CT_DCCP_IGNORE] = "IGNORE",
+ [CT_DCCP_INVALID] = "INVALID",
+};
+
+#define sNO CT_DCCP_NONE
+#define sRQ CT_DCCP_REQUEST
+#define sRS CT_DCCP_RESPOND
+#define sPO CT_DCCP_PARTOPEN
+#define sOP CT_DCCP_OPEN
+#define sCR CT_DCCP_CLOSEREQ
+#define sCG CT_DCCP_CLOSING
+#define sTW CT_DCCP_TIMEWAIT
+#define sIG CT_DCCP_IGNORE
+#define sIV CT_DCCP_INVALID
+
+/*
+ * DCCP state transistion table
+ *
+ * The assumption is the same as for TCP tracking:
+ *
+ * We are the man in the middle. All the packets go through us but might
+ * get lost in transit to the destination. It is assumed that the destination
+ * can't receive segments we haven't seen.
+ *
+ * The following states exist:
+ *
+ * NONE: Initial state, expecting Request
+ * REQUEST: Request seen, waiting for Response from server
+ * RESPOND: Response from server seen, waiting for Ack from client
+ * PARTOPEN: Ack after Response seen, waiting for packet other than Response,
+ * Reset or Sync from server
+ * OPEN: Packet other than Response, Reset or Sync seen
+ * CLOSEREQ: CloseReq from server seen, expecting Close from client
+ * CLOSING: Close seen, expecting Reset
+ * TIMEWAIT: Reset seen
+ * IGNORE: Not determinable whether packet is valid
+ *
+ * Some states exist only on one side of the connection: REQUEST, RESPOND,
+ * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to
+ * the one it was in before.
+ *
+ * Packets are marked as ignored (sIG) if we don't know if they're valid
+ * (for example a reincarnation of a connection we didn't notice is dead
+ * already) and the server may send back a connection closing Reset or a
+ * Response. They're also used for Sync/SyncAck packets, which we don't
+ * care about.
+ */
+static const u_int8_t
+dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = {
+ [CT_DCCP_ROLE_CLIENT] = {
+ [DCCP_PKT_REQUEST] = {
+ /*
+ * sNO -> sRQ Regular Request
+ * sRQ -> sRQ Retransmitted Request or reincarnation
+ * sRS -> sRS Retransmitted Request (apparently Response
+ * got lost after we saw it) or reincarnation
+ * sPO -> sIG Ignore, conntrack might be out of sync
+ * sOP -> sIG Ignore, conntrack might be out of sync
+ * sCR -> sIG Ignore, conntrack might be out of sync
+ * sCG -> sIG Ignore, conntrack might be out of sync
+ * sTW -> sRQ Reincarnation
+ *
+ * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */
+ sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ,
+ },
+ [DCCP_PKT_RESPONSE] = {
+ /*
+ * sNO -> sIV Invalid
+ * sRQ -> sIG Ignore, might be response to ignored Request
+ * sRS -> sIG Ignore, might be response to ignored Request
+ * sPO -> sIG Ignore, might be response to ignored Request
+ * sOP -> sIG Ignore, might be response to ignored Request
+ * sCR -> sIG Ignore, might be response to ignored Request
+ * sCG -> sIG Ignore, might be response to ignored Request
+ * sTW -> sIV Invalid, reincarnation in reverse direction
+ * goes through sRQ
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV,
+ },
+ [DCCP_PKT_ACK] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
+ * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN
+ * sOP -> sOP Regular ACK, remain in OPEN
+ * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.)
+ * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
+ },
+ [DCCP_PKT_DATA] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.)
+ * sOP -> sOP Regular Data packet
+ * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.)
+ * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV,
+ },
+ [DCCP_PKT_DATAACK] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
+ * sPO -> sPO Remain in PARTOPEN state
+ * sOP -> sOP Regular DataAck packet in OPEN state
+ * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.)
+ * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
+ },
+ [DCCP_PKT_CLOSEREQ] = {
+ /*
+ * CLOSEREQ may only be sent by the server.
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV
+ },
+ [DCCP_PKT_CLOSE] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sCG Client-initiated close
+ * sOP -> sCG Client-initiated close
+ * sCR -> sCG Close in response to CloseReq (8.3.)
+ * sCG -> sCG Retransmit
+ * sTW -> sIV Late retransmit, already in TIME_WAIT
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV
+ },
+ [DCCP_PKT_RESET] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.)
+ * sRS -> sTW Response received without Request
+ * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.)
+ * sOP -> sTW Connection reset
+ * sCR -> sTW Connection reset
+ * sCG -> sTW Connection reset
+ * sTW -> sIG Ignore (don't refresh timer)
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG
+ },
+ [DCCP_PKT_SYNC] = {
+ /*
+ * We currently ignore Sync packets
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ },
+ [DCCP_PKT_SYNCACK] = {
+ /*
+ * We currently ignore SyncAck packets
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ },
+ },
+ [CT_DCCP_ROLE_SERVER] = {
+ [DCCP_PKT_REQUEST] = {
+ /*
+ * sNO -> sIV Invalid
+ * sRQ -> sIG Ignore, conntrack might be out of sync
+ * sRS -> sIG Ignore, conntrack might be out of sync
+ * sPO -> sIG Ignore, conntrack might be out of sync
+ * sOP -> sIG Ignore, conntrack might be out of sync
+ * sCR -> sIG Ignore, conntrack might be out of sync
+ * sCG -> sIG Ignore, conntrack might be out of sync
+ * sTW -> sRQ Reincarnation, must reverse roles
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ
+ },
+ [DCCP_PKT_RESPONSE] = {
+ /*
+ * sNO -> sIV Response without Request
+ * sRQ -> sRS Response to clients Request
+ * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT)
+ * sPO -> sIG Response to an ignored Request or late retransmit
+ * sOP -> sIG Ignore, might be response to ignored Request
+ * sCR -> sIG Ignore, might be response to ignored Request
+ * sCG -> sIG Ignore, might be response to ignored Request
+ * sTW -> sIV Invalid, Request from client in sTW moves to sRQ
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV
+ },
+ [DCCP_PKT_ACK] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sOP Enter OPEN state (8.1.5.)
+ * sOP -> sOP Regular Ack in OPEN state
+ * sCR -> sIV Waiting for Close from client
+ * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+ },
+ [DCCP_PKT_DATA] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sOP Enter OPEN state (8.1.5.)
+ * sOP -> sOP Regular Data packet in OPEN state
+ * sCR -> sIV Waiting for Close from client
+ * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+ },
+ [DCCP_PKT_DATAACK] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sOP Enter OPEN state (8.1.5.)
+ * sOP -> sOP Regular DataAck in OPEN state
+ * sCR -> sIV Waiting for Close from client
+ * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
+ * sTW -> sIV
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
+ },
+ [DCCP_PKT_CLOSEREQ] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.)
+ * sOP -> sCR CloseReq in OPEN state
+ * sCR -> sCR Retransmit
+ * sCG -> sCR Simultaneous close, client sends another Close
+ * sTW -> sIV Already closed
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV
+ },
+ [DCCP_PKT_CLOSE] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sIV No connection
+ * sRS -> sIV No connection
+ * sPO -> sOP -> sCG Move direcly to CLOSING
+ * sOP -> sCG Move to CLOSING
+ * sCR -> sIV Close after CloseReq is invalid
+ * sCG -> sCG Retransmit
+ * sTW -> sIV Already closed
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV
+ },
+ [DCCP_PKT_RESET] = {
+ /*
+ * sNO -> sIV No connection
+ * sRQ -> sTW Reset in response to Request
+ * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.)
+ * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.)
+ * sOP -> sTW
+ * sCR -> sTW
+ * sCG -> sTW
+ * sTW -> sIG Ignore (don't refresh timer)
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */
+ sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG
+ },
+ [DCCP_PKT_SYNC] = {
+ /*
+ * We currently ignore Sync packets
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ },
+ [DCCP_PKT_SYNCACK] = {
+ /*
+ * We currently ignore SyncAck packets
+ *
+ * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
+ sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ },
+ },
+};
+
+static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ struct dccp_hdr _hdr, *dh;
+
+ dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (dh == NULL)
+ return false;
+
+ tuple->src.u.dccp.port = dh->dccph_sport;
+ tuple->dst.u.dccp.port = dh->dccph_dport;
+ return true;
+}
+
+static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
+ const struct nf_conntrack_tuple *tuple)
+{
+ inv->src.u.dccp.port = tuple->dst.u.dccp.port;
+ inv->dst.u.dccp.port = tuple->src.u.dccp.port;
+ return true;
+}
+
+static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ struct net *net = nf_ct_net(ct);
+ struct dccp_hdr _dh, *dh;
+ const char *msg;
+ u_int8_t state;
+
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ BUG_ON(dh == NULL);
+
+ state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
+ switch (state) {
+ default:
+ if (nf_ct_dccp_loose == 0) {
+ msg = "nf_ct_dccp: not picking up existing connection ";
+ goto out_invalid;
+ }
+ case CT_DCCP_REQUEST:
+ break;
+ case CT_DCCP_INVALID:
+ msg = "nf_ct_dccp: invalid state transition ";
+ goto out_invalid;
+ }
+
+ ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
+ ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
+ ct->proto.dccp.state = CT_DCCP_NONE;
+ return true;
+
+out_invalid:
+ if (LOG_INVALID(net, IPPROTO_DCCP))
+ nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
+ return false;
+}
+
+static u64 dccp_ack_seq(const struct dccp_hdr *dh)
+{
+ const struct dccp_hdr_ack_bits *dhack;
+
+ dhack = (void *)dh + __dccp_basic_hdr_len(dh);
+ return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) +
+ ntohl(dhack->dccph_ack_nr_low);
+}
+
+static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff, enum ip_conntrack_info ctinfo,
+ u_int8_t pf, unsigned int hooknum)
+{
+ struct net *net = nf_ct_net(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct dccp_hdr _dh, *dh;
+ u_int8_t type, old_state, new_state;
+ enum ct_dccp_roles role;
+
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ BUG_ON(dh == NULL);
+ type = dh->dccph_type;
+
+ if (type == DCCP_PKT_RESET &&
+ !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ /* Tear down connection immediately if only reply is a RESET */
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_ACCEPT;
+ }
+
+ write_lock_bh(&dccp_lock);
+
+ role = ct->proto.dccp.role[dir];
+ old_state = ct->proto.dccp.state;
+ new_state = dccp_state_table[role][type][old_state];
+
+ switch (new_state) {
+ case CT_DCCP_REQUEST:
+ if (old_state == CT_DCCP_TIMEWAIT &&
+ role == CT_DCCP_ROLE_SERVER) {
+ /* Reincarnation in the reverse direction: reopen and
+ * reverse client/server roles. */
+ ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT;
+ ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER;
+ }
+ break;
+ case CT_DCCP_RESPOND:
+ if (old_state == CT_DCCP_REQUEST)
+ ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
+ break;
+ case CT_DCCP_PARTOPEN:
+ if (old_state == CT_DCCP_RESPOND &&
+ type == DCCP_PKT_ACK &&
+ dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq)
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ break;
+ case CT_DCCP_IGNORE:
+ /*
+ * Connection tracking might be out of sync, so we ignore
+ * packets that might establish a new connection and resync
+ * if the server responds with a valid Response.
+ */
+ if (ct->proto.dccp.last_dir == !dir &&
+ ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST &&
+ type == DCCP_PKT_RESPONSE) {
+ ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT;
+ ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER;
+ ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
+ new_state = CT_DCCP_RESPOND;
+ break;
+ }
+ ct->proto.dccp.last_dir = dir;
+ ct->proto.dccp.last_pkt = type;
+
+ write_unlock_bh(&dccp_lock);
+ if (LOG_INVALID(net, IPPROTO_DCCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_dccp: invalid packet ignored ");
+ return NF_ACCEPT;
+ case CT_DCCP_INVALID:
+ write_unlock_bh(&dccp_lock);
+ if (LOG_INVALID(net, IPPROTO_DCCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_dccp: invalid state transition ");
+ return -NF_ACCEPT;
+ }
+
+ ct->proto.dccp.last_dir = dir;
+ ct->proto.dccp.last_pkt = type;
+ ct->proto.dccp.state = new_state;
+ write_unlock_bh(&dccp_lock);
+ nf_ct_refresh_acct(ct, ctinfo, skb, dccp_timeout[new_state]);
+
+ return NF_ACCEPT;
+}
+
+static int dccp_error(struct net *net, struct sk_buff *skb,
+ unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+ u_int8_t pf, unsigned int hooknum)
+{
+ struct dccp_hdr _dh, *dh;
+ unsigned int dccp_len = skb->len - dataoff;
+ unsigned int cscov;
+ const char *msg;
+
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ if (dh == NULL) {
+ msg = "nf_ct_dccp: short packet ";
+ goto out_invalid;
+ }
+
+ if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+ dh->dccph_doff * 4 > dccp_len) {
+ msg = "nf_ct_dccp: truncated/malformed packet ";
+ goto out_invalid;
+ }
+
+ cscov = dccp_len;
+ if (dh->dccph_cscov) {
+ cscov = (dh->dccph_cscov - 1) * 4;
+ if (cscov > dccp_len) {
+ msg = "nf_ct_dccp: bad checksum coverage ";
+ goto out_invalid;
+ }
+ }
+
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
+ pf)) {
+ msg = "nf_ct_dccp: bad checksum ";
+ goto out_invalid;
+ }
+
+ if (dh->dccph_type >= DCCP_PKT_INVALID) {
+ msg = "nf_ct_dccp: reserved packet type ";
+ goto out_invalid;
+ }
+
+ return NF_ACCEPT;
+
+out_invalid:
+ if (LOG_INVALID(net, IPPROTO_DCCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
+ return -NF_ACCEPT;
+}
+
+static int dccp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.dccp.port),
+ ntohs(tuple->dst.u.dccp.port));
+}
+
+static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+{
+ return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+ const struct nf_conn *ct)
+{
+ struct nlattr *nest_parms;
+
+ read_lock_bh(&dccp_lock);
+ nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+ NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
+ nla_nest_end(skb, nest_parms);
+ read_unlock_bh(&dccp_lock);
+ return 0;
+
+nla_put_failure:
+ read_unlock_bh(&dccp_lock);
+ return -1;
+}
+
+static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
+ [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
+};
+
+static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
+{
+ struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
+ struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1];
+ int err;
+
+ if (!attr)
+ return 0;
+
+ err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr,
+ dccp_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
+ nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE)
+ return -EINVAL;
+
+ write_lock_bh(&dccp_lock);
+ ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
+ write_unlock_bh(&dccp_lock);
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+static unsigned int dccp_sysctl_table_users;
+static struct ctl_table_header *dccp_sysctl_header;
+static ctl_table dccp_sysctl_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_request",
+ .data = &dccp_timeout[CT_DCCP_REQUEST],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_respond",
+ .data = &dccp_timeout[CT_DCCP_RESPOND],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_partopen",
+ .data = &dccp_timeout[CT_DCCP_PARTOPEN],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_open",
+ .data = &dccp_timeout[CT_DCCP_OPEN],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_closereq",
+ .data = &dccp_timeout[CT_DCCP_CLOSEREQ],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_closing",
+ .data = &dccp_timeout[CT_DCCP_CLOSING],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_timeout_timewait",
+ .data = &dccp_timeout[CT_DCCP_TIMEWAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_dccp_loose",
+ .data = &nf_ct_dccp_loose,
+ .maxlen = sizeof(nf_ct_dccp_loose),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = 0,
+ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
+ .l3proto = AF_INET,
+ .l4proto = IPPROTO_DCCP,
+ .name = "dccp",
+ .pkt_to_tuple = dccp_pkt_to_tuple,
+ .invert_tuple = dccp_invert_tuple,
+ .new = dccp_new,
+ .packet = dccp_packet,
+ .error = dccp_error,
+ .print_tuple = dccp_print_tuple,
+ .print_conntrack = dccp_print_conntrack,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = dccp_to_nlattr,
+ .from_nlattr = nlattr_to_dccp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &dccp_sysctl_table_users,
+ .ctl_table_header = &dccp_sysctl_header,
+ .ctl_table = dccp_sysctl_table,
+#endif
+};
+
+static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
+ .l3proto = AF_INET6,
+ .l4proto = IPPROTO_DCCP,
+ .name = "dccp",
+ .pkt_to_tuple = dccp_pkt_to_tuple,
+ .invert_tuple = dccp_invert_tuple,
+ .new = dccp_new,
+ .packet = dccp_packet,
+ .error = dccp_error,
+ .print_tuple = dccp_print_tuple,
+ .print_conntrack = dccp_print_conntrack,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = dccp_to_nlattr,
+ .from_nlattr = nlattr_to_dccp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &dccp_sysctl_table_users,
+ .ctl_table_header = &dccp_sysctl_header,
+ .ctl_table = dccp_sysctl_table,
+#endif
+};
+
+static int __init nf_conntrack_proto_dccp_init(void)
+{
+ int err;
+
+ err = nf_conntrack_l4proto_register(&dccp_proto4);
+ if (err < 0)
+ goto err1;
+
+ err = nf_conntrack_l4proto_register(&dccp_proto6);
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_conntrack_l4proto_unregister(&dccp_proto4);
+err1:
+ return err;
+}
+
+static void __exit nf_conntrack_proto_dccp_fini(void)
+{
+ nf_conntrack_l4proto_unregister(&dccp_proto6);
+ nf_conntrack_l4proto_unregister(&dccp_proto4);
+}
+
+module_init(nf_conntrack_proto_dccp_init);
+module_exit(nf_conntrack_proto_dccp_fini);
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
new file mode 100644
index 0000000..dbe680a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -0,0 +1,109 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+
+static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+
+static bool generic_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+
+ return true;
+}
+
+static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int generic_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout);
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ return true;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *generic_sysctl_header;
+static struct ctl_table generic_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_generic_timeout",
+ .data = &nf_ct_generic_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table generic_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_generic_timeout",
+ .data = &nf_ct_generic_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
+{
+ .l3proto = PF_UNSPEC,
+ .l4proto = 0,
+ .name = "unknown",
+ .pkt_to_tuple = generic_pkt_to_tuple,
+ .invert_tuple = generic_invert_tuple,
+ .print_tuple = generic_print_tuple,
+ .packet = packet,
+ .new = new,
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &generic_sysctl_header,
+ .ctl_table = generic_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = generic_compat_sysctl_table,
+#endif
+#endif
+};
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
new file mode 100644
index 0000000..4ab62ad
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -0,0 +1,353 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define GRE_TIMEOUT (30 * HZ)
+#define GRE_STREAM_TIMEOUT (180 * HZ)
+
+static int proto_gre_net_id;
+struct netns_proto_gre {
+ rwlock_t keymap_lock;
+ struct list_head keymap_list;
+};
+
+void nf_ct_gre_keymap_flush(struct net *net)
+{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+ struct nf_ct_gre_keymap *km, *tmp;
+
+ write_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
+ list_del(&km->list);
+ kfree(km);
+ }
+ write_unlock_bh(&net_gre->keymap_lock);
+}
+EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
+
+static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
+ const struct nf_conntrack_tuple *t)
+{
+ return km->tuple.src.l3num == t->src.l3num &&
+ !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) &&
+ !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) &&
+ km->tuple.dst.protonum == t->dst.protonum &&
+ km->tuple.dst.u.all == t->dst.u.all;
+}
+
+/* look up the source key for a given tuple */
+static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
+{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+ struct nf_ct_gre_keymap *km;
+ __be16 key = 0;
+
+ read_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry(km, &net_gre->keymap_list, list) {
+ if (gre_key_cmpfn(km, t)) {
+ key = km->tuple.src.u.gre.key;
+ break;
+ }
+ }
+ read_unlock_bh(&net_gre->keymap_lock);
+
+ pr_debug("lookup src key 0x%x for ", key);
+ nf_ct_dump_tuple(t);
+
+ return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
+ struct nf_conntrack_tuple *t)
+{
+ struct net *net = nf_ct_net(ct);
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_ct_gre_keymap **kmp, *km;
+
+ kmp = &help->help.ct_pptp_info.keymap[dir];
+ if (*kmp) {
+ /* check whether it's a retransmission */
+ read_lock_bh(&net_gre->keymap_lock);
+ list_for_each_entry(km, &net_gre->keymap_list, list) {
+ if (gre_key_cmpfn(km, t) && km == *kmp) {
+ read_unlock_bh(&net_gre->keymap_lock);
+ return 0;
+ }
+ }
+ read_unlock_bh(&net_gre->keymap_lock);
+ pr_debug("trying to override keymap_%s for ct %p\n",
+ dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
+ return -EEXIST;
+ }
+
+ km = kmalloc(sizeof(*km), GFP_ATOMIC);
+ if (!km)
+ return -ENOMEM;
+ memcpy(&km->tuple, t, sizeof(*t));
+ *kmp = km;
+
+ pr_debug("adding new entry %p: ", km);
+ nf_ct_dump_tuple(&km->tuple);
+
+ write_lock_bh(&net_gre->keymap_lock);
+ list_add_tail(&km->list, &net_gre->keymap_list);
+ write_unlock_bh(&net_gre->keymap_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
+
+/* destroy the keymap entries associated with specified master ct */
+void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+ struct nf_conn_help *help = nfct_help(ct);
+ enum ip_conntrack_dir dir;
+
+ pr_debug("entering for ct %p\n", ct);
+
+ write_lock_bh(&net_gre->keymap_lock);
+ for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
+ if (help->help.ct_pptp_info.keymap[dir]) {
+ pr_debug("removing %p from list\n",
+ help->help.ct_pptp_info.keymap[dir]);
+ list_del(&help->help.ct_pptp_info.keymap[dir]->list);
+ kfree(help->help.ct_pptp_info.keymap[dir]);
+ help->help.ct_pptp_info.keymap[dir] = NULL;
+ }
+ }
+ write_unlock_bh(&net_gre->keymap_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->dst.u.gre.key = orig->src.u.gre.key;
+ tuple->src.u.gre.key = orig->dst.u.gre.key;
+ return true;
+}
+
+/* gre hdr info to tuple */
+static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
+ const struct gre_hdr_pptp *pgrehdr;
+ struct gre_hdr_pptp _pgrehdr;
+ __be16 srckey;
+ const struct gre_hdr *grehdr;
+ struct gre_hdr _grehdr;
+
+ /* first only delinearize old RFC1701 GRE header */
+ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+ if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+ /* try to behave like "nf_conntrack_proto_generic" */
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+ return true;
+ }
+
+ /* PPTP header is variable length, only need up to the call_id field */
+ pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+ if (!pgrehdr)
+ return true;
+
+ if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+ pr_debug("GRE_VERSION_PPTP but unknown proto\n");
+ return false;
+ }
+
+ tuple->dst.u.gre.key = pgrehdr->call_id;
+ srckey = gre_keymap_lookup(net, tuple);
+ tuple->src.u.gre.key = srckey;
+
+ return true;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+ const struct nf_conn *ct)
+{
+ return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+ (ct->proto.gre.timeout / HZ),
+ (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ /* If we've seen traffic both ways, this is a GRE connection.
+ * Extend timeout. */
+ if (ct->status & IPS_SEEN_REPLY) {
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ ct->proto.gre.stream_timeout);
+ /* Also, more likely to be important, and not a probe. */
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+ } else
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ ct->proto.gre.timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ pr_debug(": ");
+ nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+ /* initialize to sane value. Ideally a conntrack helper
+ * (e.g. in case of pptp) is increasing them */
+ ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+ ct->proto.gre.timeout = GRE_TIMEOUT;
+
+ return true;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct nf_conn *ct)
+{
+ struct nf_conn *master = ct->master;
+ pr_debug(" entering\n");
+
+ if (!master)
+ pr_debug("no master !?!\n");
+ else
+ nf_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
+ .l3proto = AF_INET,
+ .l4proto = IPPROTO_GRE,
+ .name = "gre",
+ .pkt_to_tuple = gre_pkt_to_tuple,
+ .invert_tuple = gre_invert_tuple,
+ .print_tuple = gre_print_tuple,
+ .print_conntrack = gre_print_conntrack,
+ .packet = gre_packet,
+ .new = gre_new,
+ .destroy = gre_destroy,
+ .me = THIS_MODULE,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+};
+
+static int proto_gre_net_init(struct net *net)
+{
+ struct netns_proto_gre *net_gre;
+ int rv;
+
+ net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL);
+ if (!net_gre)
+ return -ENOMEM;
+ rwlock_init(&net_gre->keymap_lock);
+ INIT_LIST_HEAD(&net_gre->keymap_list);
+
+ rv = net_assign_generic(net, proto_gre_net_id, net_gre);
+ if (rv < 0)
+ kfree(net_gre);
+ return rv;
+}
+
+static void proto_gre_net_exit(struct net *net)
+{
+ struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+
+ nf_ct_gre_keymap_flush(net);
+ kfree(net_gre);
+}
+
+static struct pernet_operations proto_gre_net_ops = {
+ .init = proto_gre_net_init,
+ .exit = proto_gre_net_exit,
+};
+
+static int __init nf_ct_proto_gre_init(void)
+{
+ int rv;
+
+ rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+ if (rv < 0)
+ return rv;
+ rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops);
+ if (rv < 0)
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+ return rv;
+}
+
+static void nf_ct_proto_gre_fini(void)
+{
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+ unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops);
+}
+
+module_init(nf_ct_proto_gre_init);
+module_exit(nf_ct_proto_gre_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
new file mode 100644
index 0000000..ae8c260
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -0,0 +1,746 @@
+/*
+ * Connection tracking protocol helper module for SCTP.
+ *
+ * SCTP is defined in RFC 2960. References to various sections in this code
+ * are to this RFC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+/* Protects ct->proto.sctp */
+static DEFINE_RWLOCK(sctp_lock);
+
+/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR
+
+ And so for me for SCTP :D -Kiran */
+
+static const char *const sctp_conntrack_names[] = {
+ "NONE",
+ "CLOSED",
+ "COOKIE_WAIT",
+ "COOKIE_ECHOED",
+ "ESTABLISHED",
+ "SHUTDOWN_SENT",
+ "SHUTDOWN_RECD",
+ "SHUTDOWN_ACK_SENT",
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
+ [SCTP_CONNTRACK_CLOSED] = 10 SECS,
+ [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
+ [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
+};
+
+#define sNO SCTP_CONNTRACK_NONE
+#define sCL SCTP_CONNTRACK_CLOSED
+#define sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define sES SCTP_CONNTRACK_ESTABLISHED
+#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sIV SCTP_CONNTRACK_MAX
+
+/*
+ These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end
+point. Please note the subtleties. -Kiran
+
+NONE - Nothing so far.
+COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
+ an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+ to that of the SHUTDOWN chunk.
+CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
+ the SHUTDOWN chunk. Connection is closed.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction.
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+ {
+/* ORIGINAL */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ },
+ {
+/* REPLY */
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+ }
+};
+
+static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct sctphdr *hp;
+ struct sctphdr _hdr;
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->src.u.sctp.port = hp->source;
+ tuple->dst.u.sctp.port = hp->dest;
+ return true;
+}
+
+static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u.sctp.port = orig->dst.u.sctp.port;
+ tuple->dst.u.sctp.port = orig->src.u.sctp.port;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int sctp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+{
+ enum sctp_conntrack state;
+
+ read_lock_bh(&sctp_lock);
+ state = ct->proto.sctp.state;
+ read_unlock_bh(&sctp_lock);
+
+ return seq_printf(s, "%s ", sctp_conntrack_names[state]);
+}
+
+#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
+for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \
+ (offset) < (skb)->len && \
+ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
+ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ unsigned long *map)
+{
+ u_int32_t offset, count;
+ sctp_chunkhdr_t _sch, *sch;
+ int flag;
+
+ flag = 0;
+
+ for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+ pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
+
+ if (sch->type == SCTP_CID_INIT ||
+ sch->type == SCTP_CID_INIT_ACK ||
+ sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
+ flag = 1;
+
+ /*
+ * Cookie Ack/Echo chunks not the first OR
+ * Init / Init Ack / Shutdown compl chunks not the only chunks
+ * OR zero-length.
+ */
+ if (((sch->type == SCTP_CID_COOKIE_ACK ||
+ sch->type == SCTP_CID_COOKIE_ECHO ||
+ flag) &&
+ count != 0) || !sch->length) {
+ pr_debug("Basic checks failed\n");
+ return 1;
+ }
+
+ if (map)
+ set_bit(sch->type, map);
+ }
+
+ pr_debug("Basic checks passed\n");
+ return count == 0;
+}
+
+static int sctp_new_state(enum ip_conntrack_dir dir,
+ enum sctp_conntrack cur_state,
+ int chunk_type)
+{
+ int i;
+
+ pr_debug("Chunk type: %d\n", chunk_type);
+
+ switch (chunk_type) {
+ case SCTP_CID_INIT:
+ pr_debug("SCTP_CID_INIT\n");
+ i = 0;
+ break;
+ case SCTP_CID_INIT_ACK:
+ pr_debug("SCTP_CID_INIT_ACK\n");
+ i = 1;
+ break;
+ case SCTP_CID_ABORT:
+ pr_debug("SCTP_CID_ABORT\n");
+ i = 2;
+ break;
+ case SCTP_CID_SHUTDOWN:
+ pr_debug("SCTP_CID_SHUTDOWN\n");
+ i = 3;
+ break;
+ case SCTP_CID_SHUTDOWN_ACK:
+ pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
+ i = 4;
+ break;
+ case SCTP_CID_ERROR:
+ pr_debug("SCTP_CID_ERROR\n");
+ i = 5;
+ break;
+ case SCTP_CID_COOKIE_ECHO:
+ pr_debug("SCTP_CID_COOKIE_ECHO\n");
+ i = 6;
+ break;
+ case SCTP_CID_COOKIE_ACK:
+ pr_debug("SCTP_CID_COOKIE_ACK\n");
+ i = 7;
+ break;
+ case SCTP_CID_SHUTDOWN_COMPLETE:
+ pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
+ i = 8;
+ break;
+ default:
+ /* Other chunks like DATA, SACK, HEARTBEAT and
+ its ACK do not cause a change in state */
+ pr_debug("Unknown chunk type, Will stay in %s\n",
+ sctp_conntrack_names[cur_state]);
+ return cur_state;
+ }
+
+ pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
+ dir, sctp_conntrack_names[cur_state], chunk_type,
+ sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+ return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
+static int sctp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ enum sctp_conntrack new_state, old_state;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ const struct sctphdr *sh;
+ struct sctphdr _sctph;
+ const struct sctp_chunkhdr *sch;
+ struct sctp_chunkhdr _sch;
+ u_int32_t offset, count;
+ unsigned long map[256 / sizeof(unsigned long)] = { 0 };
+
+ sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ goto out;
+
+ if (do_basic_checks(ct, skb, dataoff, map) != 0)
+ goto out;
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+ !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+ !test_bit(SCTP_CID_ABORT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out;
+ }
+
+ old_state = new_state = SCTP_CONNTRACK_MAX;
+ write_lock_bh(&sctp_lock);
+ for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+ /* Special cases of Verification tag check (Sec 8.5.1) */
+ if (sch->type == SCTP_CID_INIT) {
+ /* Sec 8.5.1 (A) */
+ if (sh->vtag != 0)
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_ABORT) {
+ /* Sec 8.5.1 (B) */
+ if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+ sh->vtag != ct->proto.sctp.vtag[!dir])
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+ /* Sec 8.5.1 (C) */
+ if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+ sh->vtag != ct->proto.sctp.vtag[!dir] &&
+ sch->flags & SCTP_CHUNK_FLAG_T)
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+ /* Sec 8.5.1 (D) */
+ if (sh->vtag != ct->proto.sctp.vtag[dir])
+ goto out_unlock;
+ }
+
+ old_state = ct->proto.sctp.state;
+ new_state = sctp_new_state(dir, old_state, sch->type);
+
+ /* Invalid */
+ if (new_state == SCTP_CONNTRACK_MAX) {
+ pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
+ "conntrack=%u\n",
+ dir, sch->type, old_state);
+ goto out_unlock;
+ }
+
+ /* If it is an INIT or an INIT ACK note down the vtag */
+ if (sch->type == SCTP_CID_INIT ||
+ sch->type == SCTP_CID_INIT_ACK) {
+ sctp_inithdr_t _inithdr, *ih;
+
+ ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+ sizeof(_inithdr), &_inithdr);
+ if (ih == NULL)
+ goto out_unlock;
+ pr_debug("Setting vtag %x for dir %d\n",
+ ih->init_tag, !dir);
+ ct->proto.sctp.vtag[!dir] = ih->init_tag;
+ }
+
+ ct->proto.sctp.state = new_state;
+ if (old_state != new_state)
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ }
+ write_unlock_bh(&sctp_lock);
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
+
+ if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
+ dir == IP_CT_DIR_REPLY &&
+ new_state == SCTP_CONNTRACK_ESTABLISHED) {
+ pr_debug("Setting assured bit\n");
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+ }
+
+ return NF_ACCEPT;
+
+out_unlock:
+ write_unlock_bh(&sctp_lock);
+out:
+ return -NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ enum sctp_conntrack new_state;
+ const struct sctphdr *sh;
+ struct sctphdr _sctph;
+ const struct sctp_chunkhdr *sch;
+ struct sctp_chunkhdr _sch;
+ u_int32_t offset, count;
+ unsigned long map[256 / sizeof(unsigned long)] = { 0 };
+
+ sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ return false;
+
+ if (do_basic_checks(ct, skb, dataoff, map) != 0)
+ return false;
+
+ /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+ if (test_bit(SCTP_CID_ABORT, map) ||
+ test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+ test_bit(SCTP_CID_COOKIE_ACK, map))
+ return false;
+
+ new_state = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+ /* Don't need lock here: this conntrack not in circulation yet */
+ new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+ SCTP_CONNTRACK_NONE, sch->type);
+
+ /* Invalid: delete conntrack */
+ if (new_state == SCTP_CONNTRACK_NONE ||
+ new_state == SCTP_CONNTRACK_MAX) {
+ pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
+ return false;
+ }
+
+ /* Copy the vtag into the state info */
+ if (sch->type == SCTP_CID_INIT) {
+ if (sh->vtag == 0) {
+ sctp_inithdr_t _inithdr, *ih;
+
+ ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+ sizeof(_inithdr), &_inithdr);
+ if (ih == NULL)
+ return false;
+
+ pr_debug("Setting vtag %x for new conn\n",
+ ih->init_tag);
+
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+ ih->init_tag;
+ } else {
+ /* Sec 8.5.1 (A) */
+ return false;
+ }
+ }
+ /* If it is a shutdown ack OOTB packet, we expect a return
+ shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+ else {
+ pr_debug("Setting vtag %x for new conn OOTB\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+ }
+
+ ct->proto.sctp.state = new_state;
+ }
+
+ return true;
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+ const struct nf_conn *ct)
+{
+ struct nlattr *nest_parms;
+
+ read_lock_bh(&sctp_lock);
+ nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state);
+
+ NLA_PUT_BE32(skb,
+ CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]);
+
+ NLA_PUT_BE32(skb,
+ CTA_PROTOINFO_SCTP_VTAG_REPLY,
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
+
+ read_unlock_bh(&sctp_lock);
+
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ read_unlock_bh(&sctp_lock);
+ return -1;
+}
+
+static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
+ [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 },
+ [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 },
+};
+
+static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
+{
+ struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
+ struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1];
+ int err;
+
+ /* updates may not contain the internal protocol info, skip parsing */
+ if (!attr)
+ return 0;
+
+ err = nla_parse_nested(tb,
+ CTA_PROTOINFO_SCTP_MAX,
+ attr,
+ sctp_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[CTA_PROTOINFO_SCTP_STATE] ||
+ !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] ||
+ !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
+ return -EINVAL;
+
+ write_lock_bh(&sctp_lock);
+ ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
+ nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+ nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
+ write_unlock_bh(&sctp_lock);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+static unsigned int sctp_sysctl_table_users;
+static struct ctl_table_header *sctp_sysctl_header;
+static struct ctl_table sctp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_sctp_timeout_closed",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_cookie_wait",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_established",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table sctp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_sctp_timeout_closed",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_cookie_wait",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_established",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
+ .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif
+
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .me = THIS_MODULE,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = sctp_to_nlattr,
+ .from_nlattr = nlattr_to_sctp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &sctp_sysctl_table_users,
+ .ctl_table_header = &sctp_sysctl_header,
+ .ctl_table = sctp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = sctp_compat_sysctl_table,
+#endif
+#endif
+};
+
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .me = THIS_MODULE,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = sctp_to_nlattr,
+ .from_nlattr = nlattr_to_sctp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &sctp_sysctl_table_users,
+ .ctl_table_header = &sctp_sysctl_header,
+ .ctl_table = sctp_sysctl_table,
+#endif
+};
+
+static int __init nf_conntrack_proto_sctp_init(void)
+{
+ int ret;
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
+ if (ret) {
+ printk("nf_conntrack_l4proto_sctp4: protocol register failed\n");
+ goto out;
+ }
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
+ if (ret) {
+ printk("nf_conntrack_l4proto_sctp6: protocol register failed\n");
+ goto cleanup_sctp4;
+ }
+
+ return ret;
+
+ cleanup_sctp4:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+ out:
+ return ret;
+}
+
+static void __exit nf_conntrack_proto_sctp_fini(void)
+{
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+}
+
+module_init(nf_conntrack_proto_sctp_init);
+module_exit(nf_conntrack_proto_sctp_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
+MODULE_ALIAS("ip_conntrack_proto_sctp");
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
new file mode 100644
index 0000000..f947ec4
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -0,0 +1,1440 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
+
+/* Protects ct->proto.tcp */
+static DEFINE_RWLOCK(tcp_lock);
+
+/* "Be conservative in what you do,
+ be liberal in what you accept from others."
+ If it's non-zero, we mark only out of window RST segments as INVALID. */
+static int nf_ct_tcp_be_liberal __read_mostly = 0;
+
+/* If it is set to zero, we disable picking up already established
+ connections. */
+static int nf_ct_tcp_loose __read_mostly = 1;
+
+/* Max number of the retransmitted packets without receiving an (acceptable)
+ ACK from the destination. If this number is reached, a shorter timer
+ will be started. */
+static int nf_ct_tcp_max_retrans __read_mostly = 3;
+
+ /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+ closely. They're more complex. --RR */
+
+static const char *const tcp_conntrack_names[] = {
+ "NONE",
+ "SYN_SENT",
+ "SYN_RECV",
+ "ESTABLISHED",
+ "FIN_WAIT",
+ "CLOSE_WAIT",
+ "LAST_ACK",
+ "TIME_WAIT",
+ "CLOSE",
+ "LISTEN"
+};
+
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+ Linux uses 15 packets as limit, which corresponds
+ to ~13-30min depending on RTO. */
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
+static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS;
+
+static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
+ [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
+ [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
+ [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
+ [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
+ [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
+ [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
+ [TCP_CONNTRACK_CLOSE] = 10 SECS,
+};
+
+#define sNO TCP_CONNTRACK_NONE
+#define sSS TCP_CONNTRACK_SYN_SENT
+#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
+#define sFW TCP_CONNTRACK_FIN_WAIT
+#define sCW TCP_CONNTRACK_CLOSE_WAIT
+#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
+#define sLI TCP_CONNTRACK_LISTEN
+#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+ TCP_SYN_SET,
+ TCP_SYNACK_SET,
+ TCP_FIN_SET,
+ TCP_ACK_SET,
+ TCP_RST_SET,
+ TCP_NONE_SET,
+};
+
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE: initial state
+ * SYN_SENT: SYN-only packet seen
+ * SYN_RECV: SYN-ACK packet seen
+ * ESTABLISHED: ACK packet seen
+ * FIN_WAIT: FIN packet seen
+ * CLOSE_WAIT: ACK seen (after FIN)
+ * LAST_ACK: FIN seen (after FIN)
+ * TIME_WAIT: last ACK seen
+ * CLOSE: closed connection (RST)
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ * if they may be either invalid or valid
+ * and the receiver may send back a connection
+ * closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ * if they are invalid
+ * or we do not support the request (simultaneous open)
+ */
+static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+ {
+/* ORIGINAL */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ * sNO -> sSS Initialize a new connection
+ * sSS -> sSS Retransmitted SYN
+ * sSR -> sIG Late retransmitted SYN?
+ * sES -> sIG Error: SYNs in window outside the SYN_SENT state
+ * are errors. Receiver will reply with RST
+ * and close the connection.
+ * Or we are not in sync and hold a dead connection.
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sSS Reopened connection (RFC 1122).
+ * sCL -> sSS
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ * - either it tries to set up a simultaneous open, which is
+ * not supported;
+ * - or the firewall has just been inserted between the two hosts
+ * during the session set-up. The SYN will be retransmitted
+ * by the true client (or it'll time out).
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sNO -> sIV Too late and no reason to do anything...
+ * sSS -> sIV Client migth not send FIN in this state:
+ * we enforce waiting for a SYN/ACK reply first.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions, waiting for
+ * the last ACK.
+ * Migth be a retransmitted FIN as well...
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN. Remain in the same state.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sNO -> sES Assumed.
+ * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sSR -> sES Established state is reached.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK. Remain in the same state.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+ },
+ {
+/* REPLY */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * sNO -> sIV Never reached.
+ * sSS -> sIV Simultaneous open, not supported
+ * sSR -> sIV Simultaneous open, not supported.
+ * sES -> sIV Server may not initiate a connection.
+ * sFW -> sIV
+ * sCW -> sIV
+ * sLA -> sIV
+ * sTW -> sIV Reopened connection, but server may not do it.
+ * sCL -> sIV
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ * sSS -> sSR Standard open.
+ * sSR -> sSR Retransmitted SYN/ACK.
+ * sES -> sIG Late retransmitted SYN/ACK?
+ * sFW -> sIG Might be SYN/ACK answering ignored SYN
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sIG
+ * sCL -> sIG
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ * sSS -> sIV Server might not send FIN in this state.
+ * sSR -> sFW Close started.
+ * sES -> sFW
+ * sFW -> sLA FIN seen in both directions.
+ * sCW -> sLA
+ * sLA -> sLA Retransmitted FIN.
+ * sTW -> sTW
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ * sSS -> sIG Might be a half-open connection.
+ * sSR -> sSR Might answer late resent SYN.
+ * sES -> sES :-)
+ * sFW -> sCW Normal close request answered by ACK.
+ * sCW -> sCW
+ * sLA -> sTW Last ACK detected.
+ * sTW -> sTW Retransmitted last ACK.
+ * sCL -> sCL
+ */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+ }
+};
+
+static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct tcphdr *hp;
+ struct tcphdr _hdr;
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->src.u.tcp.port = hp->source;
+ tuple->dst.u.tcp.port = hp->dest;
+
+ return true;
+}
+
+static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u.tcp.port = orig->dst.u.tcp.port;
+ tuple->dst.u.tcp.port = orig->src.u.tcp.port;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int tcp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+{
+ enum tcp_conntrack state;
+
+ read_lock_bh(&tcp_lock);
+ state = ct->proto.tcp.state;
+ read_unlock_bh(&tcp_lock);
+
+ return seq_printf(s, "%s ", tcp_conntrack_names[state]);
+}
+
+static unsigned int get_conntrack_index(const struct tcphdr *tcph)
+{
+ if (tcph->rst) return TCP_RST_SET;
+ else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+ else if (tcph->fin) return TCP_FIN_SET;
+ else if (tcph->ack) return TCP_ACK_SET;
+ else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+ in IP Filter' by Guido van Rooij.
+
+ http://www.nluug.nl/events/sane2000/papers.html
+ http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+
+ The boundaries and the conditions are changed according to RFC793:
+ the packet must intersect the window (i.e. segments may be
+ after the right or before the left edge) and thus receivers may ACK
+ segments after the right edge of the window.
+
+ td_maxend = max(sack + max(win,1)) seen in reply packets
+ td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+ td_maxwin += seq + len - sender.td_maxend
+ if seq + len > sender.td_maxend
+ td_end = max(seq + len) seen in sent packets
+
+ I. Upper bound for valid data: seq <= sender.td_maxend
+ II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
+ III. Upper bound for valid (s)ack: sack <= receiver.td_end
+ IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
+
+ where sack is the highest right edge of sack block found in the packet
+ or ack in the case of packet without SACK option.
+
+ The upper bound limit for a valid (s)ack is not ignored -
+ we doesn't have to deal with fragments.
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+ size_t len,
+ unsigned int dataoff,
+ const struct tcphdr *tcph)
+{
+ /* XXX Should I use payload length field in IP/IPv6 header ?
+ * - YK */
+ return (seq + len - dataoff - tcph->doff*4
+ + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST 66000
+#define MAXACKWINDOW(sender) \
+ ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
+ : MAXACKWINCONST)
+
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *tcph,
+ struct ip_ct_tcp_state *state)
+{
+ unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+ const unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+
+ if (!length)
+ return;
+
+ ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+ length, buff);
+ BUG_ON(ptr == NULL);
+
+ state->td_scale =
+ state->flags = 0;
+
+ while (length > 0) {
+ int opcode=*ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize=*ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK_PERM
+ && opsize == TCPOLEN_SACK_PERM)
+ state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+ else if (opcode == TCPOPT_WINDOW
+ && opsize == TCPOLEN_WINDOW) {
+ state->td_scale = *(u_int8_t *)ptr;
+
+ if (state->td_scale > 14) {
+ /* See RFC1323 */
+ state->td_scale = 14;
+ }
+ state->flags |=
+ IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
+ const struct tcphdr *tcph, __u32 *sack)
+{
+ unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+ const unsigned char *ptr;
+ int length = (tcph->doff*4) - sizeof(struct tcphdr);
+ __u32 tmp;
+
+ if (!length)
+ return;
+
+ ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
+ length, buff);
+ BUG_ON(ptr == NULL);
+
+ /* Fast path for timestamp-only option */
+ if (length == TCPOLEN_TSTAMP_ALIGNED*4
+ && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+ | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
+ return;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize, i;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2) /* "silly options" */
+ return;
+ if (opsize > length)
+ break; /* don't parse partial options */
+
+ if (opcode == TCPOPT_SACK
+ && opsize >= (TCPOLEN_SACK_BASE
+ + TCPOLEN_SACK_PERBLOCK)
+ && !((opsize - TCPOLEN_SACK_BASE)
+ % TCPOLEN_SACK_PERBLOCK)) {
+ for (i = 0;
+ i < (opsize - TCPOLEN_SACK_BASE);
+ i += TCPOLEN_SACK_PERBLOCK) {
+ tmp = ntohl(*((__be32 *)(ptr+i)+1));
+
+ if (after(tmp, *sack))
+ *sack = tmp;
+ }
+ return;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+}
+
+static bool tcp_in_window(const struct nf_conn *ct,
+ struct ip_ct_tcp *state,
+ enum ip_conntrack_dir dir,
+ unsigned int index,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *tcph,
+ u_int8_t pf)
+{
+ struct net *net = nf_ct_net(ct);
+ struct ip_ct_tcp_state *sender = &state->seen[dir];
+ struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+ const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
+ __u32 seq, ack, sack, end, win, swin;
+ bool res;
+
+ /*
+ * Get the required data from the packet.
+ */
+ seq = ntohl(tcph->seq);
+ ack = sack = ntohl(tcph->ack_seq);
+ win = ntohs(tcph->window);
+ end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
+
+ if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+ tcp_sack(skb, dataoff, tcph, &sack);
+
+ pr_debug("tcp_in_window: START\n");
+ pr_debug("tcp_in_window: ");
+ nf_ct_dump_tuple(tuple);
+ pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
+ seq, ack, sack, win, end);
+ pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ if (sender->td_end == 0) {
+ /*
+ * Initialize sender data.
+ */
+ if (tcph->syn && tcph->ack) {
+ /*
+ * Outgoing SYN-ACK in reply to a SYN.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, dataoff, tcph, sender);
+ /*
+ * RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
+ && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
+ sender->td_scale =
+ receiver->td_scale = 0;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ sender->td_end = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+ sender->td_maxend = end + sender->td_maxwin;
+ }
+ } else if (((state->state == TCP_CONNTRACK_SYN_SENT
+ && dir == IP_CT_DIR_ORIGINAL)
+ || (state->state == TCP_CONNTRACK_SYN_RECV
+ && dir == IP_CT_DIR_REPLY))
+ && after(end, sender->td_end)) {
+ /*
+ * RFC 793: "if a TCP is reinitialized ... then it need
+ * not wait at all; it must only be sure to use sequence
+ * numbers larger than those recently used."
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, dataoff, tcph, sender);
+ }
+
+ if (!(tcph->ack)) {
+ /*
+ * If there is no ACK, just pretend it was set and OK.
+ */
+ ack = sack = receiver->td_end;
+ } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
+ (TCP_FLAG_ACK|TCP_FLAG_RST))
+ && (ack == 0)) {
+ /*
+ * Broken TCP stacks, that set ACK in RST packets as well
+ * with zero ack value.
+ */
+ ack = sack = receiver->td_end;
+ }
+
+ if (seq == end
+ && (!tcph->rst
+ || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+ /*
+ * Packets contains no data: we assume it is valid
+ * and check the ack value only.
+ * However RST segments are always validated by their
+ * SEQ number, except when seq == 0 (reset sent answering
+ * SYN.
+ */
+ seq = end = sender->td_end;
+
+ pr_debug("tcp_in_window: ");
+ nf_ct_dump_tuple(tuple);
+ pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
+ seq, ack, sack, win, end);
+ pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+
+ pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+ before(seq, sender->td_maxend + 1),
+ after(end, sender->td_end - receiver->td_maxwin - 1),
+ before(sack, receiver->td_end + 1),
+ after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
+
+ if (before(seq, sender->td_maxend + 1) &&
+ after(end, sender->td_end - receiver->td_maxwin - 1) &&
+ before(sack, receiver->td_end + 1) &&
+ after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
+ /*
+ * Take into account window scaling (RFC 1323).
+ */
+ if (!tcph->syn)
+ win <<= sender->td_scale;
+
+ /*
+ * Update sender data.
+ */
+ swin = win + (sack - ack);
+ if (sender->td_maxwin < swin)
+ sender->td_maxwin = swin;
+ if (after(end, sender->td_end)) {
+ sender->td_end = end;
+ sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+ }
+ /*
+ * Update receiver data.
+ */
+ if (after(end, sender->td_maxend))
+ receiver->td_maxwin += end - sender->td_maxend;
+ if (after(sack + win, receiver->td_maxend - 1)) {
+ receiver->td_maxend = sack + win;
+ if (win == 0)
+ receiver->td_maxend++;
+ }
+ if (ack == receiver->td_end)
+ receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+ /*
+ * Check retransmissions.
+ */
+ if (index == TCP_ACK_SET) {
+ if (state->last_dir == dir
+ && state->last_seq == seq
+ && state->last_ack == ack
+ && state->last_end == end
+ && state->last_win == win)
+ state->retrans++;
+ else {
+ state->last_dir = dir;
+ state->last_seq = seq;
+ state->last_ack = ack;
+ state->last_end = end;
+ state->last_win = win;
+ state->retrans = 0;
+ }
+ }
+ res = true;
+ } else {
+ res = false;
+ if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
+ nf_ct_tcp_be_liberal)
+ res = true;
+ if (!res && LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: %s ",
+ before(seq, sender->td_maxend + 1) ?
+ after(end, sender->td_end - receiver->td_maxwin - 1) ?
+ before(sack, receiver->td_end + 1) ?
+ after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+ : "ACK is under the lower bound (possible overly delayed ACK)"
+ : "ACK is over the upper bound (ACKed data not seen yet)"
+ : "SEQ is under the lower bound (already ACKed data retransmitted)"
+ : "SEQ is over the upper bound (over the window of the receiver)");
+ }
+
+ pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
+ "receiver end=%u maxend=%u maxwin=%u\n",
+ res, sender->td_end, sender->td_maxend, sender->td_maxwin,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+ return res;
+}
+
+#ifdef CONFIG_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+/* Caller must linearize skb at tcp header. */
+void nf_conntrack_tcp_update(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conn *ct,
+ int dir)
+{
+ const struct tcphdr *tcph = (const void *)skb->data + dataoff;
+ const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
+ const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
+ __u32 end;
+
+ end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
+
+ write_lock_bh(&tcp_lock);
+ /*
+ * We have to worry for the ack in the reply packet only...
+ */
+ if (after(end, ct->proto.tcp.seen[dir].td_end))
+ ct->proto.tcp.seen[dir].td_end = end;
+ ct->proto.tcp.last_end = end;
+ write_unlock_bh(&tcp_lock);
+ pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
+#endif
+
+#define TH_FIN 0x01
+#define TH_SYN 0x02
+#define TH_RST 0x04
+#define TH_PUSH 0x08
+#define TH_ACK 0x10
+#define TH_URG 0x20
+#define TH_ECE 0x40
+#define TH_CWR 0x80
+
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
+{
+ [TH_SYN] = 1,
+ [TH_SYN|TH_URG] = 1,
+ [TH_SYN|TH_ACK] = 1,
+ [TH_RST] = 1,
+ [TH_RST|TH_ACK] = 1,
+ [TH_FIN|TH_ACK] = 1,
+ [TH_FIN|TH_ACK|TH_URG] = 1,
+ [TH_ACK] = 1,
+ [TH_ACK|TH_URG] = 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
+static int tcp_error(struct net *net,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ unsigned int tcplen = skb->len - dataoff;
+ u_int8_t tcpflags;
+
+ /* Smaller that minimal TCP header? */
+ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Not whole TCP header or malformed packet */
+ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the checksum is assumed to be correct.
+ */
+ /* FIXME: Source route IP option packets --RR */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: bad TCP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ /* Check TCP flags. */
+ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
+ if (!tcp_valid_flags[tcpflags]) {
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: invalid TCP flag combination ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int tcp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_tuple *tuple;
+ enum tcp_conntrack new_state, old_state;
+ enum ip_conntrack_dir dir;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ unsigned long timeout;
+ unsigned int index;
+
+ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
+ write_lock_bh(&tcp_lock);
+ old_state = ct->proto.tcp.state;
+ dir = CTINFO2DIR(ctinfo);
+ index = get_conntrack_index(th);
+ new_state = tcp_conntracks[dir][index][old_state];
+ tuple = &ct->tuplehash[dir].tuple;
+
+ switch (new_state) {
+ case TCP_CONNTRACK_SYN_SENT:
+ if (old_state < TCP_CONNTRACK_TIME_WAIT)
+ break;
+ /* RFC 1122: "When a connection is closed actively,
+ * it MUST linger in TIME-WAIT state for a time 2xMSL
+ * (Maximum Segment Lifetime). However, it MAY accept
+ * a new SYN from the remote TCP to reopen the connection
+ * directly from TIME-WAIT state, if..."
+ * We ignore the conditions because we are in the
+ * TIME-WAIT state anyway.
+ *
+ * Handle aborted connections: we and the server
+ * think there is an existing connection but the client
+ * aborts it and starts a new one.
+ */
+ if (((ct->proto.tcp.seen[dir].flags
+ | ct->proto.tcp.seen[!dir].flags)
+ & IP_CT_TCP_FLAG_CLOSE_INIT)
+ || (ct->proto.tcp.last_dir == dir
+ && ct->proto.tcp.last_index == TCP_RST_SET)) {
+ /* Attempt to reopen a closed/aborted connection.
+ * Delete this connection and look up again. */
+ write_unlock_bh(&tcp_lock);
+
+ /* Only repeat if we can actually remove the timer.
+ * Destruction may already be in progress in process
+ * context and we must give it a chance to terminate.
+ */
+ if (nf_ct_kill(ct))
+ return -NF_REPEAT;
+ return -NF_DROP;
+ }
+ /* Fall through */
+ case TCP_CONNTRACK_IGNORE:
+ /* Ignored packets:
+ *
+ * Our connection entry may be out of sync, so ignore
+ * packets which may signal the real connection between
+ * the client and the server.
+ *
+ * a) SYN in ORIGINAL
+ * b) SYN/ACK in REPLY
+ * c) ACK in reply direction after initial SYN in original.
+ *
+ * If the ignored packet is invalid, the receiver will send
+ * a RST we'll catch below.
+ */
+ if (index == TCP_SYNACK_SET
+ && ct->proto.tcp.last_index == TCP_SYN_SET
+ && ct->proto.tcp.last_dir != dir
+ && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
+ /* b) This SYN/ACK acknowledges a SYN that we earlier
+ * ignored as invalid. This means that the client and
+ * the server are both in sync, while the firewall is
+ * not. We kill this session and block the SYN/ACK so
+ * that the client cannot but retransmit its SYN and
+ * thus initiate a clean new session.
+ */
+ write_unlock_bh(&tcp_lock);
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: killing out of sync session ");
+ nf_ct_kill(ct);
+ return -NF_DROP;
+ }
+ ct->proto.tcp.last_index = index;
+ ct->proto.tcp.last_dir = dir;
+ ct->proto.tcp.last_seq = ntohl(th->seq);
+ ct->proto.tcp.last_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
+
+ write_unlock_bh(&tcp_lock);
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: invalid packet ignored ");
+ return NF_ACCEPT;
+ case TCP_CONNTRACK_MAX:
+ /* Invalid packet */
+ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+ dir, get_conntrack_index(th), old_state);
+ write_unlock_bh(&tcp_lock);
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: invalid state ");
+ return -NF_ACCEPT;
+ case TCP_CONNTRACK_CLOSE:
+ if (index == TCP_RST_SET
+ && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+ && ct->proto.tcp.last_index == TCP_SYN_SET)
+ || (!test_bit(IPS_ASSURED_BIT, &ct->status)
+ && ct->proto.tcp.last_index == TCP_ACK_SET))
+ && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
+ /* RST sent to invalid SYN or ACK we had let through
+ * at a) and c) above:
+ *
+ * a) SYN was in window then
+ * c) we hold a half-open connection.
+ *
+ * Delete our connection entry.
+ * We skip window checking, because packet might ACK
+ * segments we ignored. */
+ goto in_window;
+ }
+ /* Just fall through */
+ default:
+ /* Keep compilers happy. */
+ break;
+ }
+
+ if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
+ skb, dataoff, th, pf)) {
+ write_unlock_bh(&tcp_lock);
+ return -NF_ACCEPT;
+ }
+ in_window:
+ /* From now on we have got in-window packets */
+ ct->proto.tcp.last_index = index;
+ ct->proto.tcp.last_dir = dir;
+
+ pr_debug("tcp_conntracks: ");
+ nf_ct_dump_tuple(tuple);
+ pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+ (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+ (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+ old_state, new_state);
+
+ ct->proto.tcp.state = new_state;
+ if (old_state != new_state
+ && new_state == TCP_CONNTRACK_FIN_WAIT)
+ ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+
+ if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
+ tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
+ timeout = nf_ct_tcp_timeout_max_retrans;
+ else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+ IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+ tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
+ timeout = nf_ct_tcp_timeout_unacknowledged;
+ else
+ timeout = tcp_timeouts[new_state];
+ write_unlock_bh(&tcp_lock);
+
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
+ if (new_state != old_state)
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+
+ if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ /* If only reply is a RST, we can consider ourselves not to
+ have an established connection: this is a fairly common
+ problem case, so we can delete the conntrack
+ immediately. --RR */
+ if (th->rst) {
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_ACCEPT;
+ }
+ } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
+ && (old_state == TCP_CONNTRACK_SYN_RECV
+ || old_state == TCP_CONNTRACK_ESTABLISHED)
+ && new_state == TCP_CONNTRACK_ESTABLISHED) {
+ /* Set ASSURED if we see see valid ack in ESTABLISHED
+ after SYN_RECV or a valid answer for a picked up
+ connection. */
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+ }
+ nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ enum tcp_conntrack new_state;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
+ const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
+
+ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
+ BUG_ON(th == NULL);
+
+ /* Don't need lock here: this conntrack not in circulation yet */
+ new_state
+ = tcp_conntracks[0][get_conntrack_index(th)]
+ [TCP_CONNTRACK_NONE];
+
+ /* Invalid: delete conntrack */
+ if (new_state >= TCP_CONNTRACK_MAX) {
+ pr_debug("nf_ct_tcp: invalid new deleting.\n");
+ return false;
+ }
+
+ if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ /* SYN packet */
+ ct->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ dataoff, th);
+ ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (ct->proto.tcp.seen[0].td_maxwin == 0)
+ ct->proto.tcp.seen[0].td_maxwin = 1;
+ ct->proto.tcp.seen[0].td_maxend =
+ ct->proto.tcp.seen[0].td_end;
+
+ tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+ ct->proto.tcp.seen[1].flags = 0;
+ } else if (nf_ct_tcp_loose == 0) {
+ /* Don't try to pick up connections. */
+ return false;
+ } else {
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ ct->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ dataoff, th);
+ ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (ct->proto.tcp.seen[0].td_maxwin == 0)
+ ct->proto.tcp.seen[0].td_maxwin = 1;
+ ct->proto.tcp.seen[0].td_maxend =
+ ct->proto.tcp.seen[0].td_end +
+ ct->proto.tcp.seen[0].td_maxwin;
+ ct->proto.tcp.seen[0].td_scale = 0;
+
+ /* We assume SACK and liberal window checking to handle
+ * window scaling */
+ ct->proto.tcp.seen[0].flags =
+ ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+ IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ ct->proto.tcp.seen[1].td_end = 0;
+ ct->proto.tcp.seen[1].td_maxend = 0;
+ ct->proto.tcp.seen[1].td_maxwin = 1;
+ ct->proto.tcp.seen[1].td_scale = 0;
+
+ /* tcp_packet will set them */
+ ct->proto.tcp.state = TCP_CONNTRACK_NONE;
+ ct->proto.tcp.last_index = TCP_NONE_SET;
+
+ pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+ return true;
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+ const struct nf_conn *ct)
+{
+ struct nlattr *nest_parms;
+ struct nf_ct_tcp_flags tmp = {};
+
+ read_lock_bh(&tcp_lock);
+ nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
+
+ NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+ ct->proto.tcp.seen[0].td_scale);
+
+ NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
+ ct->proto.tcp.seen[1].td_scale);
+
+ tmp.flags = ct->proto.tcp.seen[0].flags;
+ NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
+
+ tmp.flags = ct->proto.tcp.seen[1].flags;
+ NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
+ read_unlock_bh(&tcp_lock);
+
+ nla_nest_end(skb, nest_parms);
+
+ return 0;
+
+nla_put_failure:
+ read_unlock_bh(&tcp_lock);
+ return -1;
+}
+
+static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
+ [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
+ [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
+};
+
+static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
+{
+ struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
+ struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
+ int err;
+
+ /* updates could not contain anything about the private
+ * protocol info, in that case skip the parsing */
+ if (!pattr)
+ return 0;
+
+ err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[CTA_PROTOINFO_TCP_STATE] &&
+ nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
+ return -EINVAL;
+
+ write_lock_bh(&tcp_lock);
+ if (tb[CTA_PROTOINFO_TCP_STATE])
+ ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
+ struct nf_ct_tcp_flags *attr =
+ nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
+ ct->proto.tcp.seen[0].flags &= ~attr->mask;
+ ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
+ struct nf_ct_tcp_flags *attr =
+ nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
+ ct->proto.tcp.seen[1].flags &= ~attr->mask;
+ ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
+ tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
+ ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+ ct->proto.tcp.seen[0].td_scale =
+ nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
+ ct->proto.tcp.seen[1].td_scale =
+ nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
+ }
+ write_unlock_bh(&tcp_lock);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+static unsigned int tcp_sysctl_table_users;
+static struct ctl_table_header *tcp_sysctl_header;
+static struct ctl_table tcp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_tcp_timeout_syn_sent",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_syn_recv",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_established",
+ .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_fin_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_close_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_last_ack",
+ .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_time_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_close",
+ .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_max_retrans",
+ .data = &nf_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_tcp_timeout_unacknowledged",
+ .data = &nf_ct_tcp_timeout_unacknowledged,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "nf_conntrack_tcp_loose",
+ .data = &nf_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "nf_conntrack_tcp_be_liberal",
+ .data = &nf_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "nf_conntrack_tcp_max_retrans",
+ .data = &nf_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table tcp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_tcp_timeout_syn_sent",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_syn_recv",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_established",
+ .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_fin_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_close_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_last_ack",
+ .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_time_wait",
+ .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_close",
+ .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_tcp_timeout_max_retrans",
+ .data = &nf_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "ip_conntrack_tcp_loose",
+ .data = &nf_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "ip_conntrack_tcp_be_liberal",
+ .data = &nf_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_max_retrans",
+ .data = &nf_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_TCP,
+ .name = "tcp",
+ .pkt_to_tuple = tcp_pkt_to_tuple,
+ .invert_tuple = tcp_invert_tuple,
+ .print_tuple = tcp_print_tuple,
+ .print_conntrack = tcp_print_conntrack,
+ .packet = tcp_packet,
+ .new = tcp_new,
+ .error = tcp_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = tcp_to_nlattr,
+ .from_nlattr = nlattr_to_tcp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &tcp_sysctl_table_users,
+ .ctl_table_header = &tcp_sysctl_header,
+ .ctl_table = tcp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = tcp_compat_sysctl_table,
+#endif
+#endif
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
+{
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_TCP,
+ .name = "tcp",
+ .pkt_to_tuple = tcp_pkt_to_tuple,
+ .invert_tuple = tcp_invert_tuple,
+ .print_tuple = tcp_print_tuple,
+ .print_conntrack = tcp_print_conntrack,
+ .packet = tcp_packet,
+ .new = tcp_new,
+ .error = tcp_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .to_nlattr = tcp_to_nlattr,
+ .from_nlattr = nlattr_to_tcp,
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &tcp_sysctl_table_users,
+ .ctl_table_header = &tcp_sysctl_header,
+ .ctl_table = tcp_sysctl_table,
+#endif
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
new file mode 100644
index 0000000..7c2ca48
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -0,0 +1,231 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
+static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
+
+static bool udp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct udphdr *hp;
+ struct udphdr _hdr;
+
+ /* Actually only need first 8 bytes. */
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->src.u.udp.port = hp->source;
+ tuple->dst.u.udp.port = hp->dest;
+
+ return true;
+}
+
+static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u.udp.port = orig->dst.u.udp.port;
+ tuple->dst.u.udp.port = orig->src.u.udp.port;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int udp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+static int udp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ /* If we've seen traffic both ways, this is some kind of UDP
+ stream. Extend timeout. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
+ /* Also, more likely to be important, and not a probe */
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+ } else
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ return true;
+}
+
+static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ unsigned int udplen = skb->len - dataoff;
+ const struct udphdr *hdr;
+ struct udphdr _hdr;
+
+ /* Header is too small? */
+ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hdr == NULL) {
+ if (LOG_INVALID(net, IPPROTO_UDP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Truncated/malformed packets */
+ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+ if (LOG_INVALID(net, IPPROTO_UDP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udp: truncated/malformed packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* Packet with no checksum */
+ if (!hdr->check)
+ return NF_ACCEPT;
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the checksum is assumed to be correct.
+ * FIXME: Source route IP option packets --RR */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
+ if (LOG_INVALID(net, IPPROTO_UDP))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udp: bad UDP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+#ifdef CONFIG_SYSCTL
+static unsigned int udp_sysctl_table_users;
+static struct ctl_table_header *udp_sysctl_header;
+static struct ctl_table udp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_udp_timeout",
+ .data = &nf_ct_udp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_udp_timeout_stream",
+ .data = &nf_ct_udp_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table udp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_udp_timeout",
+ .data = &nf_ct_udp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .procname = "ip_conntrack_udp_timeout_stream",
+ .data = &nf_ct_udp_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_UDP,
+ .name = "udp",
+ .pkt_to_tuple = udp_pkt_to_tuple,
+ .invert_tuple = udp_invert_tuple,
+ .print_tuple = udp_print_tuple,
+ .packet = udp_packet,
+ .new = udp_new,
+ .error = udp_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udp_sysctl_table_users,
+ .ctl_table_header = &udp_sysctl_header,
+ .ctl_table = udp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = udp_compat_sysctl_table,
+#endif
+#endif
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
+{
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_UDP,
+ .name = "udp",
+ .pkt_to_tuple = udp_pkt_to_tuple,
+ .invert_tuple = udp_invert_tuple,
+ .print_tuple = udp_print_tuple,
+ .packet = udp_packet,
+ .new = udp_new,
+ .error = udp_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udp_sysctl_table_users,
+ .ctl_table_header = &udp_sysctl_header,
+ .ctl_table = udp_sysctl_table,
+#endif
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
new file mode 100644
index 0000000..d22d839
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -0,0 +1,242 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ;
+static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ;
+
+static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct udphdr *hp;
+ struct udphdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->src.u.udp.port = hp->source;
+ tuple->dst.u.udp.port = hp->dest;
+ return true;
+}
+
+static bool udplite_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u.udp.port = orig->dst.u.udp.port;
+ tuple->dst.u.udp.port = orig->src.u.udp.port;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int udplite_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+static int udplite_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ /* If we've seen traffic both ways, this is some kind of UDP
+ stream. Extend timeout. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ nf_ct_udplite_timeout_stream);
+ /* Also, more likely to be important, and not a probe */
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_STATUS, ct);
+ } else
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ return true;
+}
+
+static int udplite_error(struct net *net,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ unsigned int udplen = skb->len - dataoff;
+ const struct udphdr *hdr;
+ struct udphdr _hdr;
+ unsigned int cscov;
+
+ /* Header is too small? */
+ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hdr == NULL) {
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udplite: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ cscov = ntohs(hdr->len);
+ if (cscov == 0)
+ cscov = udplen;
+ else if (cscov < sizeof(*hdr) || cscov > udplen) {
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udplite: invalid checksum coverage ");
+ return -NF_ACCEPT;
+ }
+
+ /* UDPLITE mandates checksums */
+ if (!hdr->check) {
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udplite: checksum missing ");
+ return -NF_ACCEPT;
+ }
+
+ /* Checksum invalid? Ignore. */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
+ pf)) {
+ if (LOG_INVALID(net, IPPROTO_UDPLITE))
+ nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_udplite: bad UDPLite checksum ");
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+#ifdef CONFIG_SYSCTL
+static unsigned int udplite_sysctl_table_users;
+static struct ctl_table_header *udplite_sysctl_header;
+static struct ctl_table udplite_sysctl_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_udplite_timeout",
+ .data = &nf_ct_udplite_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_udplite_timeout_stream",
+ .data = &nf_ct_udplite_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_UDPLITE,
+ .name = "udplite",
+ .pkt_to_tuple = udplite_pkt_to_tuple,
+ .invert_tuple = udplite_invert_tuple,
+ .print_tuple = udplite_print_tuple,
+ .packet = udplite_packet,
+ .new = udplite_new,
+ .error = udplite_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udplite_sysctl_table_users,
+ .ctl_table_header = &udplite_sysctl_header,
+ .ctl_table = udplite_sysctl_table,
+#endif
+};
+
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
+{
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_UDPLITE,
+ .name = "udplite",
+ .pkt_to_tuple = udplite_pkt_to_tuple,
+ .invert_tuple = udplite_invert_tuple,
+ .print_tuple = udplite_print_tuple,
+ .packet = udplite_packet,
+ .new = udplite_new,
+ .error = udplite_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nla_policy = nf_ct_port_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udplite_sysctl_table_users,
+ .ctl_table_header = &udplite_sysctl_header,
+ .ctl_table = udplite_sysctl_table,
+#endif
+};
+
+static int __init nf_conntrack_proto_udplite_init(void)
+{
+ int err;
+
+ err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4);
+ if (err < 0)
+ goto err1;
+ err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6);
+ if (err < 0)
+ goto err2;
+ return 0;
+err2:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+err1:
+ return err;
+}
+
+static void __exit nf_conntrack_proto_udplite_exit(void)
+{
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+}
+
+module_init(nf_conntrack_proto_udplite_init);
+module_exit(nf_conntrack_proto_udplite_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
new file mode 100644
index 0000000..a94294b
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -0,0 +1,236 @@
+/* SANE connection tracking helper
+ * (SANE = Scanner Access Now Easy)
+ * For documentation about the SANE network protocol see
+ * http://www.sane-project.org/html/doc015.html
+ */
+
+/* Copyright (C) 2007 Red Hat, Inc.
+ * Author: Michal Schmidt <mschmidt@redhat.com>
+ * Based on the FTP conntrack helper (net/netfilter/nf_conntrack_ftp.c):
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sane.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
+MODULE_DESCRIPTION("SANE connection tracking helper");
+
+static char *sane_buffer;
+
+static DEFINE_SPINLOCK(nf_sane_lock);
+
+#define MAX_PORTS 8
+static u_int16_t ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+
+struct sane_request {
+ __be32 RPC_code;
+#define SANE_NET_START 7 /* RPC code */
+
+ __be32 handle;
+};
+
+struct sane_reply_net_start {
+ __be32 status;
+#define SANE_STATUS_SUCCESS 0
+
+ __be16 zero;
+ __be16 port;
+ /* other fields aren't interesting for conntrack */
+};
+
+static int help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff, datalen;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ void *sb_ptr;
+ int ret = NF_ACCEPT;
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_ct_sane_master *ct_sane_info;
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ struct sane_request *req;
+ struct sane_reply_net_start *reply;
+
+ ct_sane_info = &nfct_help(ct)->help.ct_sane_info;
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY)
+ return NF_ACCEPT;
+
+ /* Not a full tcp header? */
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ /* No data? */
+ dataoff = protoff + th->doff * 4;
+ if (dataoff >= skb->len)
+ return NF_ACCEPT;
+
+ datalen = skb->len - dataoff;
+
+ spin_lock_bh(&nf_sane_lock);
+ sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
+ BUG_ON(sb_ptr == NULL);
+
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ if (datalen != sizeof(struct sane_request))
+ goto out;
+
+ req = sb_ptr;
+ if (req->RPC_code != htonl(SANE_NET_START)) {
+ /* Not an interesting command */
+ ct_sane_info->state = SANE_STATE_NORMAL;
+ goto out;
+ }
+
+ /* We're interested in the next reply */
+ ct_sane_info->state = SANE_STATE_START_REQUESTED;
+ goto out;
+ }
+
+ /* Is it a reply to an uninteresting command? */
+ if (ct_sane_info->state != SANE_STATE_START_REQUESTED)
+ goto out;
+
+ /* It's a reply to SANE_NET_START. */
+ ct_sane_info->state = SANE_STATE_NORMAL;
+
+ if (datalen < sizeof(struct sane_reply_net_start)) {
+ pr_debug("nf_ct_sane: NET_START reply too short\n");
+ goto out;
+ }
+
+ reply = sb_ptr;
+ if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
+ /* saned refused the command */
+ pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
+ ntohl(reply->status));
+ goto out;
+ }
+
+ /* Invalid saned reply? Ignore it. */
+ if (reply->zero != 0)
+ goto out;
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+ &tuple->src.u3, &tuple->dst.u3,
+ IPPROTO_TCP, NULL, &reply->port);
+
+ pr_debug("nf_ct_sane: expect: ");
+ nf_ct_dump_tuple(&exp->tuple);
+
+ /* Can't expect this? Best to drop packet now. */
+ if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+
+ nf_ct_expect_put(exp);
+
+out:
+ spin_unlock_bh(&nf_sane_lock);
+ return ret;
+}
+
+static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly;
+static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly;
+
+static const struct nf_conntrack_expect_policy sane_exp_policy = {
+ .max_expected = 1,
+ .timeout = 5 * 60,
+};
+
+/* don't make this __exit, since it's called from __init ! */
+static void nf_conntrack_sane_fini(void)
+{
+ int i, j;
+
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++) {
+ pr_debug("nf_ct_sane: unregistering helper for pf: %d "
+ "port: %d\n",
+ sane[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_helper_unregister(&sane[i][j]);
+ }
+ }
+
+ kfree(sane_buffer);
+}
+
+static int __init nf_conntrack_sane_init(void)
+{
+ int i, j = -1, ret = 0;
+ char *tmpname;
+
+ sane_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!sane_buffer)
+ return -ENOMEM;
+
+ if (ports_c == 0)
+ ports[ports_c++] = SANE_PORT;
+
+ /* FIXME should be configurable whether IPv4 and IPv6 connections
+ are tracked or not - YK */
+ for (i = 0; i < ports_c; i++) {
+ sane[i][0].tuple.src.l3num = PF_INET;
+ sane[i][1].tuple.src.l3num = PF_INET6;
+ for (j = 0; j < 2; j++) {
+ sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
+ sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
+ sane[i][j].expect_policy = &sane_exp_policy;
+ sane[i][j].me = THIS_MODULE;
+ sane[i][j].help = help;
+ tmpname = &sane_names[i][j][0];
+ if (ports[i] == SANE_PORT)
+ sprintf(tmpname, "sane");
+ else
+ sprintf(tmpname, "sane-%d", ports[i]);
+ sane[i][j].name = tmpname;
+
+ pr_debug("nf_ct_sane: registering helper for pf: %d "
+ "port: %d\n",
+ sane[i][j].tuple.src.l3num, ports[i]);
+ ret = nf_conntrack_helper_register(&sane[i][j]);
+ if (ret) {
+ printk(KERN_ERR "nf_ct_sane: failed to "
+ "register helper for pf: %d port: %d\n",
+ sane[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_sane_fini();
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+module_init(nf_conntrack_sane_init);
+module_exit(nf_conntrack_sane_fini);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
new file mode 100644
index 0000000..6813f1c
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -0,0 +1,1377 @@
+/* SIP extension for IP connection tracking.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_conntrack_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP connection tracking helper");
+MODULE_ALIAS("ip_conntrack_sip");
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of SIP servers");
+
+static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT;
+module_param(sip_timeout, uint, 0600);
+MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
+
+static int sip_direct_signalling __read_mostly = 1;
+module_param(sip_direct_signalling, int, 0600);
+MODULE_PARM_DESC(sip_direct_signalling, "expect incoming calls from registrar "
+ "only (default 1)");
+
+static int sip_direct_media __read_mostly = 1;
+module_param(sip_direct_media, int, 0600);
+MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
+ "endpoints only (default 1)");
+
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
+
+unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ struct nf_conntrack_expect *exp,
+ unsigned int matchoff,
+ unsigned int matchlen) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
+
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ const union nf_inet_addr *addr)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
+
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ u_int16_t port) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
+
+unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ const union nf_inet_addr *addr)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
+
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp,
+ unsigned int mediaoff,
+ unsigned int medialen,
+ union nf_inet_addr *rtp_addr)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook);
+
+static int string_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ int len = 0;
+
+ while (dptr < limit && isalpha(*dptr)) {
+ dptr++;
+ len++;
+ }
+ return len;
+}
+
+static int digits_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ int len = 0;
+ while (dptr < limit && isdigit(*dptr)) {
+ dptr++;
+ len++;
+ }
+ return len;
+}
+
+/* get media type + port length */
+static int media_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ int len = string_len(ct, dptr, limit, shift);
+
+ dptr += len;
+ if (dptr >= limit || *dptr != ' ')
+ return 0;
+ len++;
+ dptr++;
+
+ return len + digits_len(ct, dptr, limit, shift);
+}
+
+static int parse_addr(const struct nf_conn *ct, const char *cp,
+ const char **endp, union nf_inet_addr *addr,
+ const char *limit)
+{
+ const char *end;
+ int ret = 0;
+
+ memset(addr, 0, sizeof(*addr));
+ switch (nf_ct_l3num(ct)) {
+ case AF_INET:
+ ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ break;
+ case AF_INET6:
+ ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ break;
+ default:
+ BUG();
+ }
+
+ if (ret == 0 || end == cp)
+ return 0;
+ if (endp)
+ *endp = end;
+ return 1;
+}
+
+/* skip ip address. returns its length. */
+static int epaddr_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ union nf_inet_addr addr;
+ const char *aux = dptr;
+
+ if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ pr_debug("ip: %s parse failed.!\n", dptr);
+ return 0;
+ }
+
+ /* Port number */
+ if (*dptr == ':') {
+ dptr++;
+ dptr += digits_len(ct, dptr, limit, shift);
+ }
+ return dptr - aux;
+}
+
+/* get address length, skiping user info. */
+static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ const char *start = dptr;
+ int s = *shift;
+
+ /* Search for @, but stop at the end of the line.
+ * We are inside a sip: URI, so we don't need to worry about
+ * continuation lines. */
+ while (dptr < limit &&
+ *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
+ (*shift)++;
+ dptr++;
+ }
+
+ if (dptr < limit && *dptr == '@') {
+ dptr++;
+ (*shift)++;
+ } else {
+ dptr = start;
+ *shift = s;
+ }
+
+ return epaddr_len(ct, dptr, limit, shift);
+}
+
+/* Parse a SIP request line of the form:
+ *
+ * Request-Line = Method SP Request-URI SP SIP-Version CRLF
+ *
+ * and return the offset and length of the address contained in the Request-URI.
+ */
+int ct_sip_parse_request(const struct nf_conn *ct,
+ const char *dptr, unsigned int datalen,
+ unsigned int *matchoff, unsigned int *matchlen,
+ union nf_inet_addr *addr, __be16 *port)
+{
+ const char *start = dptr, *limit = dptr + datalen, *end;
+ unsigned int mlen;
+ unsigned int p;
+ int shift = 0;
+
+ /* Skip method and following whitespace */
+ mlen = string_len(ct, dptr, limit, NULL);
+ if (!mlen)
+ return 0;
+ dptr += mlen;
+ if (++dptr >= limit)
+ return 0;
+
+ /* Find SIP URI */
+ limit -= strlen("sip:");
+ for (; dptr < limit; dptr++) {
+ if (*dptr == '\r' || *dptr == '\n')
+ return -1;
+ if (strnicmp(dptr, "sip:", strlen("sip:")) == 0)
+ break;
+ }
+ if (!skp_epaddr_len(ct, dptr, limit, &shift))
+ return 0;
+ dptr += shift;
+
+ if (!parse_addr(ct, dptr, &end, addr, limit))
+ return -1;
+ if (end < limit && *end == ':') {
+ end++;
+ p = simple_strtoul(end, (char **)&end, 10);
+ if (p < 1024 || p > 65535)
+ return -1;
+ *port = htons(p);
+ } else
+ *port = htons(SIP_PORT);
+
+ if (end == dptr)
+ return 0;
+ *matchoff = dptr - start;
+ *matchlen = end - dptr;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_request);
+
+/* SIP header parsing: SIP headers are located at the beginning of a line, but
+ * may span several lines, in which case the continuation lines begin with a
+ * whitespace character. RFC 2543 allows lines to be terminated with CR, LF or
+ * CRLF, RFC 3261 allows only CRLF, we support both.
+ *
+ * Headers are followed by (optionally) whitespace, a colon, again (optionally)
+ * whitespace and the values. Whitespace in this context means any amount of
+ * tabs, spaces and continuation lines, which are treated as a single whitespace
+ * character.
+ *
+ * Some headers may appear multiple times. A comma seperated list of values is
+ * equivalent to multiple headers.
+ */
+static const struct sip_header ct_sip_hdrs[] = {
+ [SIP_HDR_CSEQ] = SIP_HDR("CSeq", NULL, NULL, digits_len),
+ [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len),
+ [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len),
+ [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
+ [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len),
+ [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len),
+ [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len),
+};
+
+static const char *sip_follow_continuation(const char *dptr, const char *limit)
+{
+ /* Walk past newline */
+ if (++dptr >= limit)
+ return NULL;
+
+ /* Skip '\n' in CR LF */
+ if (*(dptr - 1) == '\r' && *dptr == '\n') {
+ if (++dptr >= limit)
+ return NULL;
+ }
+
+ /* Continuation line? */
+ if (*dptr != ' ' && *dptr != '\t')
+ return NULL;
+
+ /* skip leading whitespace */
+ for (; dptr < limit; dptr++) {
+ if (*dptr != ' ' && *dptr != '\t')
+ break;
+ }
+ return dptr;
+}
+
+static const char *sip_skip_whitespace(const char *dptr, const char *limit)
+{
+ for (; dptr < limit; dptr++) {
+ if (*dptr == ' ')
+ continue;
+ if (*dptr != '\r' && *dptr != '\n')
+ break;
+ dptr = sip_follow_continuation(dptr, limit);
+ if (dptr == NULL)
+ return NULL;
+ }
+ return dptr;
+}
+
+/* Search within a SIP header value, dealing with continuation lines */
+static const char *ct_sip_header_search(const char *dptr, const char *limit,
+ const char *needle, unsigned int len)
+{
+ for (limit -= len; dptr < limit; dptr++) {
+ if (*dptr == '\r' || *dptr == '\n') {
+ dptr = sip_follow_continuation(dptr, limit);
+ if (dptr == NULL)
+ break;
+ continue;
+ }
+
+ if (strnicmp(dptr, needle, len) == 0)
+ return dptr;
+ }
+ return NULL;
+}
+
+int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ enum sip_header_types type,
+ unsigned int *matchoff, unsigned int *matchlen)
+{
+ const struct sip_header *hdr = &ct_sip_hdrs[type];
+ const char *start = dptr, *limit = dptr + datalen;
+ int shift = 0;
+
+ for (dptr += dataoff; dptr < limit; dptr++) {
+ /* Find beginning of line */
+ if (*dptr != '\r' && *dptr != '\n')
+ continue;
+ if (++dptr >= limit)
+ break;
+ if (*(dptr - 1) == '\r' && *dptr == '\n') {
+ if (++dptr >= limit)
+ break;
+ }
+
+ /* Skip continuation lines */
+ if (*dptr == ' ' || *dptr == '\t')
+ continue;
+
+ /* Find header. Compact headers must be followed by a
+ * non-alphabetic character to avoid mismatches. */
+ if (limit - dptr >= hdr->len &&
+ strnicmp(dptr, hdr->name, hdr->len) == 0)
+ dptr += hdr->len;
+ else if (hdr->cname && limit - dptr >= hdr->clen + 1 &&
+ strnicmp(dptr, hdr->cname, hdr->clen) == 0 &&
+ !isalpha(*(dptr + hdr->clen + 1)))
+ dptr += hdr->clen;
+ else
+ continue;
+
+ /* Find and skip colon */
+ dptr = sip_skip_whitespace(dptr, limit);
+ if (dptr == NULL)
+ break;
+ if (*dptr != ':' || ++dptr >= limit)
+ break;
+
+ /* Skip whitespace after colon */
+ dptr = sip_skip_whitespace(dptr, limit);
+ if (dptr == NULL)
+ break;
+
+ *matchoff = dptr - start;
+ if (hdr->search) {
+ dptr = ct_sip_header_search(dptr, limit, hdr->search,
+ hdr->slen);
+ if (!dptr)
+ return -1;
+ dptr += hdr->slen;
+ }
+
+ *matchlen = hdr->match_len(ct, dptr, limit, &shift);
+ if (!*matchlen)
+ return -1;
+ *matchoff = dptr - start + shift;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ct_sip_get_header);
+
+/* Get next header field in a list of comma seperated values */
+static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ enum sip_header_types type,
+ unsigned int *matchoff, unsigned int *matchlen)
+{
+ const struct sip_header *hdr = &ct_sip_hdrs[type];
+ const char *start = dptr, *limit = dptr + datalen;
+ int shift = 0;
+
+ dptr += dataoff;
+
+ dptr = ct_sip_header_search(dptr, limit, ",", strlen(","));
+ if (!dptr)
+ return 0;
+
+ dptr = ct_sip_header_search(dptr, limit, hdr->search, hdr->slen);
+ if (!dptr)
+ return 0;
+ dptr += hdr->slen;
+
+ *matchoff = dptr - start;
+ *matchlen = hdr->match_len(ct, dptr, limit, &shift);
+ if (!*matchlen)
+ return -1;
+ *matchoff += shift;
+ return 1;
+}
+
+/* Walk through headers until a parsable one is found or no header of the
+ * given type is left. */
+static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ enum sip_header_types type, int *in_header,
+ unsigned int *matchoff, unsigned int *matchlen)
+{
+ int ret;
+
+ if (in_header && *in_header) {
+ while (1) {
+ ret = ct_sip_next_header(ct, dptr, dataoff, datalen,
+ type, matchoff, matchlen);
+ if (ret > 0)
+ return ret;
+ if (ret == 0)
+ break;
+ dataoff += *matchoff;
+ }
+ *in_header = 0;
+ }
+
+ while (1) {
+ ret = ct_sip_get_header(ct, dptr, dataoff, datalen,
+ type, matchoff, matchlen);
+ if (ret > 0)
+ break;
+ if (ret == 0)
+ return ret;
+ dataoff += *matchoff;
+ }
+
+ if (in_header)
+ *in_header = 1;
+ return 1;
+}
+
+/* Locate a SIP header, parse the URI and return the offset and length of
+ * the address as well as the address and port themselves. A stream of
+ * headers can be parsed by handing in a non-NULL datalen and in_header
+ * pointer.
+ */
+int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
+ unsigned int *dataoff, unsigned int datalen,
+ enum sip_header_types type, int *in_header,
+ unsigned int *matchoff, unsigned int *matchlen,
+ union nf_inet_addr *addr, __be16 *port)
+{
+ const char *c, *limit = dptr + datalen;
+ unsigned int p;
+ int ret;
+
+ ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen,
+ type, in_header, matchoff, matchlen);
+ WARN_ON(ret < 0);
+ if (ret == 0)
+ return ret;
+
+ if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+ return -1;
+ if (*c == ':') {
+ c++;
+ p = simple_strtoul(c, (char **)&c, 10);
+ if (p < 1024 || p > 65535)
+ return -1;
+ *port = htons(p);
+ } else
+ *port = htons(SIP_PORT);
+
+ if (dataoff)
+ *dataoff = c - dptr;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
+
+/* Parse address from header parameter and return address, offset and length */
+int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ const char *name,
+ unsigned int *matchoff, unsigned int *matchlen,
+ union nf_inet_addr *addr)
+{
+ const char *limit = dptr + datalen;
+ const char *start, *end;
+
+ limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+ if (!limit)
+ limit = dptr + datalen;
+
+ start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+ if (!start)
+ return 0;
+
+ start += strlen(name);
+ if (!parse_addr(ct, start, &end, addr, limit))
+ return 0;
+ *matchoff = start - dptr;
+ *matchlen = end - start;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_address_param);
+
+/* Parse numerical header parameter and return value, offset and length */
+int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ const char *name,
+ unsigned int *matchoff, unsigned int *matchlen,
+ unsigned int *val)
+{
+ const char *limit = dptr + datalen;
+ const char *start;
+ char *end;
+
+ limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+ if (!limit)
+ limit = dptr + datalen;
+
+ start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+ if (!start)
+ return 0;
+
+ start += strlen(name);
+ *val = simple_strtoul(start, &end, 0);
+ if (start == end)
+ return 0;
+ if (matchoff && matchlen) {
+ *matchoff = start - dptr;
+ *matchlen = end - start;
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
+
+/* SDP header parsing: a SDP session description contains an ordered set of
+ * headers, starting with a section containing general session parameters,
+ * optionally followed by multiple media descriptions.
+ *
+ * SDP headers always start at the beginning of a line. According to RFC 2327:
+ * "The sequence CRLF (0x0d0a) is used to end a record, although parsers should
+ * be tolerant and also accept records terminated with a single newline
+ * character". We handle both cases.
+ */
+static const struct sip_header ct_sdp_hdrs[] = {
+ [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len),
+ [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len),
+ [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len),
+ [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len),
+ [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len),
+ [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len),
+};
+
+/* Linear string search within SDP header values */
+static const char *ct_sdp_header_search(const char *dptr, const char *limit,
+ const char *needle, unsigned int len)
+{
+ for (limit -= len; dptr < limit; dptr++) {
+ if (*dptr == '\r' || *dptr == '\n')
+ break;
+ if (strncmp(dptr, needle, len) == 0)
+ return dptr;
+ }
+ return NULL;
+}
+
+/* Locate a SDP header (optionally a substring within the header value),
+ * optionally stopping at the first occurence of the term header, parse
+ * it and return the offset and length of the data we're interested in.
+ */
+int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ unsigned int *matchoff, unsigned int *matchlen)
+{
+ const struct sip_header *hdr = &ct_sdp_hdrs[type];
+ const struct sip_header *thdr = &ct_sdp_hdrs[term];
+ const char *start = dptr, *limit = dptr + datalen;
+ int shift = 0;
+
+ for (dptr += dataoff; dptr < limit; dptr++) {
+ /* Find beginning of line */
+ if (*dptr != '\r' && *dptr != '\n')
+ continue;
+ if (++dptr >= limit)
+ break;
+ if (*(dptr - 1) == '\r' && *dptr == '\n') {
+ if (++dptr >= limit)
+ break;
+ }
+
+ if (term != SDP_HDR_UNSPEC &&
+ limit - dptr >= thdr->len &&
+ strnicmp(dptr, thdr->name, thdr->len) == 0)
+ break;
+ else if (limit - dptr >= hdr->len &&
+ strnicmp(dptr, hdr->name, hdr->len) == 0)
+ dptr += hdr->len;
+ else
+ continue;
+
+ *matchoff = dptr - start;
+ if (hdr->search) {
+ dptr = ct_sdp_header_search(dptr, limit, hdr->search,
+ hdr->slen);
+ if (!dptr)
+ return -1;
+ dptr += hdr->slen;
+ }
+
+ *matchlen = hdr->match_len(ct, dptr, limit, &shift);
+ if (!*matchlen)
+ return -1;
+ *matchoff = dptr - start + shift;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ct_sip_get_sdp_header);
+
+static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ unsigned int *matchoff, unsigned int *matchlen,
+ union nf_inet_addr *addr)
+{
+ int ret;
+
+ ret = ct_sip_get_sdp_header(ct, dptr, dataoff, datalen, type, term,
+ matchoff, matchlen);
+ if (ret <= 0)
+ return ret;
+
+ if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
+ dptr + *matchoff + *matchlen))
+ return -1;
+ return 1;
+}
+
+static int refresh_signalling_expectation(struct nf_conn *ct,
+ union nf_inet_addr *addr,
+ __be16 port,
+ unsigned int expires)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_expect *exp;
+ struct hlist_node *n, *next;
+ int found = 0;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+ if (exp->class != SIP_EXPECT_SIGNALLING ||
+ !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+ exp->tuple.dst.u.udp.port != port)
+ continue;
+ if (!del_timer(&exp->timeout))
+ continue;
+ exp->flags &= ~NF_CT_EXPECT_INACTIVE;
+ exp->timeout.expires = jiffies + expires * HZ;
+ add_timer(&exp->timeout);
+ found = 1;
+ break;
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+ return found;
+}
+
+static void flush_expectations(struct nf_conn *ct, bool media)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_expect *exp;
+ struct hlist_node *n, *next;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+ if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
+ continue;
+ if (!del_timer(&exp->timeout))
+ continue;
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ if (!media)
+ break;
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+}
+
+static int set_expected_rtp_rtcp(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ union nf_inet_addr *daddr, __be16 port,
+ enum sip_expectation_classes class,
+ unsigned int mediaoff, unsigned int medialen)
+{
+ struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct net *net = nf_ct_net(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ union nf_inet_addr *saddr;
+ struct nf_conntrack_tuple tuple;
+ int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
+ u_int16_t base_port;
+ __be16 rtp_port, rtcp_port;
+ typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port;
+ typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media;
+
+ saddr = NULL;
+ if (sip_direct_media) {
+ if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3))
+ return NF_ACCEPT;
+ saddr = &ct->tuplehash[!dir].tuple.src.u3;
+ }
+
+ /* We need to check whether the registration exists before attempting
+ * to register it since we can see the same media description multiple
+ * times on different connections in case multiple endpoints receive
+ * the same call.
+ *
+ * RTP optimization: if we find a matching media channel expectation
+ * and both the expectation and this connection are SNATed, we assume
+ * both sides can reach each other directly and use the final
+ * destination address from the expectation. We still need to keep
+ * the NATed expectations for media that might arrive from the
+ * outside, and additionally need to expect the direct RTP stream
+ * in case it passes through us even without NAT.
+ */
+ memset(&tuple, 0, sizeof(tuple));
+ if (saddr)
+ tuple.src.u3 = *saddr;
+ tuple.src.l3num = nf_ct_l3num(ct);
+ tuple.dst.protonum = IPPROTO_UDP;
+ tuple.dst.u3 = *daddr;
+ tuple.dst.u.udp.port = port;
+
+ rcu_read_lock();
+ do {
+ exp = __nf_ct_expect_find(net, &tuple);
+
+ if (!exp || exp->master == ct ||
+ nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
+ exp->class != class)
+ break;
+#ifdef CONFIG_NF_NAT_NEEDED
+ if (exp->tuple.src.l3num == AF_INET && !direct_rtp &&
+ (exp->saved_ip != exp->tuple.dst.u3.ip ||
+ exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
+ ct->status & IPS_NAT_MASK) {
+ daddr->ip = exp->saved_ip;
+ tuple.dst.u3.ip = exp->saved_ip;
+ tuple.dst.u.udp.port = exp->saved_proto.udp.port;
+ direct_rtp = 1;
+ } else
+#endif
+ skip_expect = 1;
+ } while (!skip_expect);
+ rcu_read_unlock();
+
+ base_port = ntohs(tuple.dst.u.udp.port) & ~1;
+ rtp_port = htons(base_port);
+ rtcp_port = htons(base_port + 1);
+
+ if (direct_rtp) {
+ nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
+ if (nf_nat_sdp_port &&
+ !nf_nat_sdp_port(skb, dptr, datalen,
+ mediaoff, medialen, ntohs(rtp_port)))
+ goto err1;
+ }
+
+ if (skip_expect)
+ return NF_ACCEPT;
+
+ rtp_exp = nf_ct_expect_alloc(ct);
+ if (rtp_exp == NULL)
+ goto err1;
+ nf_ct_expect_init(rtp_exp, class, nf_ct_l3num(ct), saddr, daddr,
+ IPPROTO_UDP, NULL, &rtp_port);
+
+ rtcp_exp = nf_ct_expect_alloc(ct);
+ if (rtcp_exp == NULL)
+ goto err2;
+ nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
+ IPPROTO_UDP, NULL, &rtcp_port);
+
+ nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
+ if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
+ ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp,
+ mediaoff, medialen, daddr);
+ else {
+ if (nf_ct_expect_related(rtp_exp) == 0) {
+ if (nf_ct_expect_related(rtcp_exp) != 0)
+ nf_ct_unexpect_related(rtp_exp);
+ else
+ ret = NF_ACCEPT;
+ }
+ }
+ nf_ct_expect_put(rtcp_exp);
+err2:
+ nf_ct_expect_put(rtp_exp);
+err1:
+ return ret;
+}
+
+static const struct sdp_media_type sdp_media_types[] = {
+ SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
+ SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+};
+
+static const struct sdp_media_type *sdp_media_type(const char *dptr,
+ unsigned int matchoff,
+ unsigned int matchlen)
+{
+ const struct sdp_media_type *t;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(sdp_media_types); i++) {
+ t = &sdp_media_types[i];
+ if (matchlen < t->len ||
+ strncmp(dptr + matchoff, t->name, t->len))
+ continue;
+ return t;
+ }
+ return NULL;
+}
+
+static int process_sdp(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+ unsigned int matchoff, matchlen;
+ unsigned int mediaoff, medialen;
+ unsigned int sdpoff;
+ unsigned int caddr_len, maddr_len;
+ unsigned int i;
+ union nf_inet_addr caddr, maddr, rtp_addr;
+ unsigned int port;
+ enum sdp_header_types c_hdr;
+ const struct sdp_media_type *t;
+ int ret = NF_ACCEPT;
+ typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
+ typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
+
+ nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
+ c_hdr = nf_ct_l3num(ct) == AF_INET ? SDP_HDR_CONNECTION_IP4 :
+ SDP_HDR_CONNECTION_IP6;
+
+ /* Find beginning of session description */
+ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+ SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+ &matchoff, &matchlen) <= 0)
+ return NF_ACCEPT;
+ sdpoff = matchoff;
+
+ /* The connection information is contained in the session description
+ * and/or once per media description. The first media description marks
+ * the end of the session description. */
+ caddr_len = 0;
+ if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
+ c_hdr, SDP_HDR_MEDIA,
+ &matchoff, &matchlen, &caddr) > 0)
+ caddr_len = matchlen;
+
+ mediaoff = sdpoff;
+ for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
+ if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen,
+ SDP_HDR_MEDIA, SDP_HDR_UNSPEC,
+ &mediaoff, &medialen) <= 0)
+ break;
+
+ /* Get media type and port number. A media port value of zero
+ * indicates an inactive stream. */
+ t = sdp_media_type(*dptr, mediaoff, medialen);
+ if (!t) {
+ mediaoff += medialen;
+ continue;
+ }
+ mediaoff += t->len;
+ medialen -= t->len;
+
+ port = simple_strtoul(*dptr + mediaoff, NULL, 10);
+ if (port == 0)
+ continue;
+ if (port < 1024 || port > 65535)
+ return NF_DROP;
+
+ /* The media description overrides the session description. */
+ maddr_len = 0;
+ if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen,
+ c_hdr, SDP_HDR_MEDIA,
+ &matchoff, &matchlen, &maddr) > 0) {
+ maddr_len = matchlen;
+ memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
+ } else if (caddr_len)
+ memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
+ else
+ return NF_DROP;
+
+ ret = set_expected_rtp_rtcp(skb, dptr, datalen,
+ &rtp_addr, htons(port), t->class,
+ mediaoff, medialen);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ /* Update media connection address if present */
+ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
+ ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen,
+ c_hdr, SDP_HDR_MEDIA, &rtp_addr);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ i++;
+ }
+
+ /* Update session connection and owner addresses */
+ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
+ if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
+
+ if (ret == NF_ACCEPT && i > 0)
+ help->help.ct_sip_info.invite_cseq = cseq;
+
+ return ret;
+}
+static int process_invite_response(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq, unsigned int code)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+
+ if ((code >= 100 && code <= 199) ||
+ (code >= 200 && code <= 299))
+ return process_sdp(skb, dptr, datalen, cseq);
+ else if (help->help.ct_sip_info.invite_cseq == cseq)
+ flush_expectations(ct, true);
+ return NF_ACCEPT;
+}
+
+static int process_update_response(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq, unsigned int code)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+
+ if ((code >= 100 && code <= 199) ||
+ (code >= 200 && code <= 299))
+ return process_sdp(skb, dptr, datalen, cseq);
+ else if (help->help.ct_sip_info.invite_cseq == cseq)
+ flush_expectations(ct, true);
+ return NF_ACCEPT;
+}
+
+static int process_prack_response(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq, unsigned int code)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+
+ if ((code >= 100 && code <= 199) ||
+ (code >= 200 && code <= 299))
+ return process_sdp(skb, dptr, datalen, cseq);
+ else if (help->help.ct_sip_info.invite_cseq == cseq)
+ flush_expectations(ct, true);
+ return NF_ACCEPT;
+}
+
+static int process_bye_request(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+ flush_expectations(ct, true);
+ return NF_ACCEPT;
+}
+
+/* Parse a REGISTER request and create a permanent expectation for incoming
+ * signalling connections. The expectation is marked inactive and is activated
+ * when receiving a response indicating success from the registrar.
+ */
+static int process_register_request(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned int matchoff, matchlen;
+ struct nf_conntrack_expect *exp;
+ union nf_inet_addr *saddr, daddr;
+ __be16 port;
+ unsigned int expires = 0;
+ int ret;
+ typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
+
+ /* Expected connections can not register again. */
+ if (ct->status & IPS_EXPECTED)
+ return NF_ACCEPT;
+
+ /* We must check the expiration time: a value of zero signals the
+ * registrar to release the binding. We'll remove our expectation
+ * when receiving the new bindings in the response, but we don't
+ * want to create new ones.
+ *
+ * The expiration time may be contained in Expires: header, the
+ * Contact: header parameters or the URI parameters.
+ */
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES,
+ &matchoff, &matchlen) > 0)
+ expires = simple_strtoul(*dptr + matchoff, NULL, 10);
+
+ ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+ SIP_HDR_CONTACT, NULL,
+ &matchoff, &matchlen, &daddr, &port);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
+ /* We don't support third-party registrations */
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
+ return NF_ACCEPT;
+
+ if (ct_sip_parse_numerical_param(ct, *dptr,
+ matchoff + matchlen, *datalen,
+ "expires=", NULL, NULL, &expires) < 0)
+ return NF_DROP;
+
+ if (expires == 0) {
+ ret = NF_ACCEPT;
+ goto store_cseq;
+ }
+
+ exp = nf_ct_expect_alloc(ct);
+ if (!exp)
+ return NF_DROP;
+
+ saddr = NULL;
+ if (sip_direct_signalling)
+ saddr = &ct->tuplehash[!dir].tuple.src.u3;
+
+ nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
+ saddr, &daddr, IPPROTO_UDP, NULL, &port);
+ exp->timeout.expires = sip_timeout * HZ;
+ exp->helper = nfct_help(ct)->helper;
+ exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
+
+ nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
+ if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_sip_expect(skb, dptr, datalen, exp,
+ matchoff, matchlen);
+ else {
+ if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+ else
+ ret = NF_ACCEPT;
+ }
+ nf_ct_expect_put(exp);
+
+store_cseq:
+ if (ret == NF_ACCEPT)
+ help->help.ct_sip_info.register_cseq = cseq;
+ return ret;
+}
+
+static int process_register_response(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq, unsigned int code)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ union nf_inet_addr addr;
+ __be16 port;
+ unsigned int matchoff, matchlen, dataoff = 0;
+ unsigned int expires = 0;
+ int in_contact = 0, ret;
+
+ /* According to RFC 3261, "UAs MUST NOT send a new registration until
+ * they have received a final response from the registrar for the
+ * previous one or the previous REGISTER request has timed out".
+ *
+ * However, some servers fail to detect retransmissions and send late
+ * responses, so we store the sequence number of the last valid
+ * request and compare it here.
+ */
+ if (help->help.ct_sip_info.register_cseq != cseq)
+ return NF_ACCEPT;
+
+ if (code >= 100 && code <= 199)
+ return NF_ACCEPT;
+ if (code < 200 || code > 299)
+ goto flush;
+
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES,
+ &matchoff, &matchlen) > 0)
+ expires = simple_strtoul(*dptr + matchoff, NULL, 10);
+
+ while (1) {
+ unsigned int c_expires = expires;
+
+ ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+ SIP_HDR_CONTACT, &in_contact,
+ &matchoff, &matchlen,
+ &addr, &port);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ break;
+
+ /* We don't support third-party registrations */
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
+ continue;
+
+ ret = ct_sip_parse_numerical_param(ct, *dptr,
+ matchoff + matchlen,
+ *datalen, "expires=",
+ NULL, NULL, &c_expires);
+ if (ret < 0)
+ return NF_DROP;
+ if (c_expires == 0)
+ break;
+ if (refresh_signalling_expectation(ct, &addr, port, c_expires))
+ return NF_ACCEPT;
+ }
+
+flush:
+ flush_expectations(ct, false);
+ return NF_ACCEPT;
+}
+
+static const struct sip_handler sip_handlers[] = {
+ SIP_HANDLER("INVITE", process_sdp, process_invite_response),
+ SIP_HANDLER("UPDATE", process_sdp, process_update_response),
+ SIP_HANDLER("ACK", process_sdp, NULL),
+ SIP_HANDLER("PRACK", process_sdp, process_prack_response),
+ SIP_HANDLER("BYE", process_bye_request, NULL),
+ SIP_HANDLER("REGISTER", process_register_request, process_register_response),
+};
+
+static int process_sip_response(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchoff, matchlen;
+ unsigned int code, cseq, dataoff, i;
+
+ if (*datalen < strlen("SIP/2.0 200"))
+ return NF_ACCEPT;
+ code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10);
+ if (!code)
+ return NF_DROP;
+
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
+ &matchoff, &matchlen) <= 0)
+ return NF_DROP;
+ cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
+ if (!cseq)
+ return NF_DROP;
+ dataoff = matchoff + matchlen + 1;
+
+ for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
+ handler = &sip_handlers[i];
+ if (handler->response == NULL)
+ continue;
+ if (*datalen < dataoff + handler->len ||
+ strnicmp(*dptr + dataoff, handler->method, handler->len))
+ continue;
+ return handler->response(skb, dptr, datalen, cseq, code);
+ }
+ return NF_ACCEPT;
+}
+
+static int process_sip_request(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchoff, matchlen;
+ unsigned int cseq, i;
+
+ for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
+ handler = &sip_handlers[i];
+ if (handler->request == NULL)
+ continue;
+ if (*datalen < handler->len ||
+ strnicmp(*dptr, handler->method, handler->len))
+ continue;
+
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
+ &matchoff, &matchlen) <= 0)
+ return NF_DROP;
+ cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
+ if (!cseq)
+ return NF_DROP;
+
+ return handler->request(skb, dptr, datalen, cseq);
+ }
+ return NF_ACCEPT;
+}
+
+static int sip_help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff, datalen;
+ const char *dptr;
+ int ret;
+ typeof(nf_nat_sip_hook) nf_nat_sip;
+
+ /* No Data ? */
+ dataoff = protoff + sizeof(struct udphdr);
+ if (dataoff >= skb->len)
+ return NF_ACCEPT;
+
+ nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+ if (!skb_is_nonlinear(skb))
+ dptr = skb->data + dataoff;
+ else {
+ pr_debug("Copy of skbuff not supported yet.\n");
+ return NF_ACCEPT;
+ }
+
+ datalen = skb->len - dataoff;
+ if (datalen < strlen("SIP/2.0 200"))
+ return NF_ACCEPT;
+
+ if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+ ret = process_sip_request(skb, &dptr, &datalen);
+ else
+ ret = process_sip_response(skb, &dptr, &datalen);
+
+ if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+ nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+ if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen))
+ ret = NF_DROP;
+ }
+
+ return ret;
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
+static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+
+static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
+ [SIP_EXPECT_SIGNALLING] = {
+ .max_expected = 1,
+ .timeout = 3 * 60,
+ },
+ [SIP_EXPECT_AUDIO] = {
+ .max_expected = 2 * IP_CT_DIR_MAX,
+ .timeout = 3 * 60,
+ },
+ [SIP_EXPECT_VIDEO] = {
+ .max_expected = 2 * IP_CT_DIR_MAX,
+ .timeout = 3 * 60,
+ },
+};
+
+static void nf_conntrack_sip_fini(void)
+{
+ int i, j;
+
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++) {
+ if (sip[i][j].me == NULL)
+ continue;
+ nf_conntrack_helper_unregister(&sip[i][j]);
+ }
+ }
+}
+
+static int __init nf_conntrack_sip_init(void)
+{
+ int i, j, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = SIP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ memset(&sip[i], 0, sizeof(sip[i]));
+
+ sip[i][0].tuple.src.l3num = AF_INET;
+ sip[i][1].tuple.src.l3num = AF_INET6;
+ for (j = 0; j < 2; j++) {
+ sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+ sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
+ sip[i][j].expect_policy = sip_exp_policy;
+ sip[i][j].expect_class_max = SIP_EXPECT_MAX;
+ sip[i][j].me = THIS_MODULE;
+ sip[i][j].help = sip_help;
+
+ tmpname = &sip_names[i][j][0];
+ if (ports[i] == SIP_PORT)
+ sprintf(tmpname, "sip");
+ else
+ sprintf(tmpname, "sip-%u", i);
+ sip[i][j].name = tmpname;
+
+ pr_debug("port #%u: %u\n", i, ports[i]);
+
+ ret = nf_conntrack_helper_register(&sip[i][j]);
+ if (ret) {
+ printk("nf_ct_sip: failed to register helper "
+ "for pf: %u port: %u\n",
+ sip[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_sip_fini();
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+module_init(nf_conntrack_sip_init);
+module_exit(nf_conntrack_sip_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
new file mode 100644
index 0000000..98106d4
--- /dev/null
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -0,0 +1,519 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_PROC_FS
+int
+print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
+{
+ return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple);
+}
+EXPORT_SYMBOL_GPL(print_tuple);
+
+struct ct_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0;
+ st->bucket < nf_conntrack_htable_size;
+ st->bucket++) {
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+
+ head = rcu_dereference(head->next);
+ while (head == NULL) {
+ if (++st->bucket >= nf_conntrack_htable_size)
+ return NULL;
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
+ }
+ return head;
+}
+
+static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+/* return 0 on success, 1 in case of error */
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ const struct nf_conntrack_tuple_hash *hash = v;
+ const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+
+ NF_CT_ASSERT(ct);
+
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ return 0;
+
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ NF_CT_ASSERT(l3proto);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ NF_CT_ASSERT(l4proto);
+
+ if (seq_printf(s, "%-8s %u %-8s %u %ld ",
+ l3proto->name, nf_ct_l3num(ct),
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
+ return -ENOSPC;
+
+ if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
+ return -ENOSPC;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ return -ENOSPC;
+
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (seq_printf(s, "[ASSURED] "))
+ return -ENOSPC;
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ if (seq_printf(s, "mark=%u ", ct->mark))
+ return -ENOSPC;
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (seq_printf(s, "secmark=%u ", ct->secmark))
+ return -ENOSPC;
+#endif
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static const struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
+}
+
+static const struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
+ const struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete
+ );
+ return 0;
+}
+
+static const struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int nf_conntrack_standalone_init_proc(struct net *net)
+{
+ struct proc_dir_entry *pde;
+
+ pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
+ if (!pde)
+ goto out_nf_conntrack;
+
+ pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
+ &ct_cpu_seq_fops);
+ if (!pde)
+ goto out_stat_nf_conntrack;
+ return 0;
+
+out_stat_nf_conntrack:
+ proc_net_remove(net, "nf_conntrack");
+out_nf_conntrack:
+ return -ENOMEM;
+}
+
+static void nf_conntrack_standalone_fini_proc(struct net *net)
+{
+ remove_proc_entry("nf_conntrack", net->proc_net_stat);
+ proc_net_remove(net, "nf_conntrack");
+}
+#else
+static int nf_conntrack_standalone_init_proc(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_standalone_fini_proc(struct net *net)
+{
+}
+#endif /* CONFIG_PROC_FS */
+
+/* Sysctl support */
+
+#ifdef CONFIG_SYSCTL
+/* Log invalid packets of a given protocol */
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static struct ctl_table_header *nf_ct_netfilter_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_MAX,
+ .procname = "nf_conntrack_max",
+ .data = &nf_conntrack_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_COUNT,
+ .procname = "nf_conntrack_count",
+ .data = &init_net.ct.count,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_BUCKETS,
+ .procname = "nf_conntrack_buckets",
+ .data = &nf_conntrack_htable_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_CHECKSUM,
+ .procname = "nf_conntrack_checksum",
+ .data = &init_net.ct.sysctl_checksum,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
+ .procname = "nf_conntrack_log_invalid",
+ .data = &init_net.ct.sysctl_log_invalid,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_expect_max",
+ .data = &nf_ct_expect_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+#define NET_NF_CONNTRACK_MAX 2089
+
+static ctl_table nf_ct_netfilter_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_MAX,
+ .procname = "nf_conntrack_max",
+ .data = &nf_conntrack_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static struct ctl_path nf_ct_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { }
+};
+
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_netfilter_header =
+ register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+ if (!nf_ct_netfilter_header)
+ goto out;
+ }
+
+ table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out_kmemdup;
+
+ table[1].data = &net->ct.count;
+ table[3].data = &net->ct.sysctl_checksum;
+ table[4].data = &net->ct.sysctl_log_invalid;
+
+ net->ct.sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.sysctl_header)
+ goto out_unregister_netfilter;
+
+ return 0;
+
+out_unregister_netfilter:
+ kfree(table);
+out_kmemdup:
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
+out:
+ printk("nf_conntrack: can't register to sysctl.\n");
+ return -ENOMEM;
+}
+
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net))
+ unregister_sysctl_table(nf_ct_netfilter_header);
+ table = net->ct.sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+static int nf_conntrack_net_init(struct net *net)
+{
+ int ret;
+
+ ret = nf_conntrack_init(net);
+ if (ret < 0)
+ goto out_init;
+ ret = nf_conntrack_standalone_init_proc(net);
+ if (ret < 0)
+ goto out_proc;
+ net->ct.sysctl_checksum = 1;
+ net->ct.sysctl_log_invalid = 0;
+ ret = nf_conntrack_standalone_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+ return 0;
+
+out_sysctl:
+ nf_conntrack_standalone_fini_proc(net);
+out_proc:
+ nf_conntrack_cleanup(net);
+out_init:
+ return ret;
+}
+
+static void nf_conntrack_net_exit(struct net *net)
+{
+ nf_conntrack_standalone_fini_sysctl(net);
+ nf_conntrack_standalone_fini_proc(net);
+ nf_conntrack_cleanup(net);
+}
+
+static struct pernet_operations nf_conntrack_net_ops = {
+ .init = nf_conntrack_net_init,
+ .exit = nf_conntrack_net_exit,
+};
+
+static int __init nf_conntrack_standalone_init(void)
+{
+ return register_pernet_subsys(&nf_conntrack_net_ops);
+}
+
+static void __exit nf_conntrack_standalone_fini(void)
+{
+ unregister_pernet_subsys(&nf_conntrack_net_ops);
+}
+
+module_init(nf_conntrack_standalone_init);
+module_exit(nf_conntrack_standalone_fini);
+
+/* Some modules need us, but don't depend directly on any symbol.
+ They should call this. */
+void need_conntrack(void)
+{
+}
+EXPORT_SYMBOL_GPL(need_conntrack);
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
new file mode 100644
index 0000000..f57f6e7
--- /dev/null
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -0,0 +1,152 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_tftp");
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
+
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ const struct tftphdr *tfh;
+ struct tftphdr _tftph;
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ unsigned int ret = NF_ACCEPT;
+ typeof(nf_nat_tftp_hook) nf_nat_tftp;
+
+ tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
+ sizeof(_tftph), &_tftph);
+ if (tfh == NULL)
+ return NF_ACCEPT;
+
+ switch (ntohs(tfh->opcode)) {
+ case TFTP_OPCODE_READ:
+ case TFTP_OPCODE_WRITE:
+ /* RRQ and WRQ works the same way */
+ nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL)
+ return NF_DROP;
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
+ nf_ct_l3num(ct),
+ &tuple->src.u3, &tuple->dst.u3,
+ IPPROTO_UDP, NULL, &tuple->dst.u.udp.port);
+
+ pr_debug("expect: ");
+ nf_ct_dump_tuple(&exp->tuple);
+
+ nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
+ if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_tftp(skb, ctinfo, exp);
+ else if (nf_ct_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_ct_expect_put(exp);
+ break;
+ case TFTP_OPCODE_DATA:
+ case TFTP_OPCODE_ACK:
+ pr_debug("Data/ACK opcode\n");
+ break;
+ case TFTP_OPCODE_ERROR:
+ pr_debug("Error opcode\n");
+ break;
+ default:
+ pr_debug("Unknown opcode\n");
+ }
+ return ret;
+}
+
+static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly;
+static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly;
+
+static const struct nf_conntrack_expect_policy tftp_exp_policy = {
+ .max_expected = 1,
+ .timeout = 5 * 60,
+};
+
+static void nf_conntrack_tftp_fini(void)
+{
+ int i, j;
+
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++)
+ nf_conntrack_helper_unregister(&tftp[i][j]);
+ }
+}
+
+static int __init nf_conntrack_tftp_init(void)
+{
+ int i, j, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = TFTP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ memset(&tftp[i], 0, sizeof(tftp[i]));
+
+ tftp[i][0].tuple.src.l3num = AF_INET;
+ tftp[i][1].tuple.src.l3num = AF_INET6;
+ for (j = 0; j < 2; j++) {
+ tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
+ tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
+ tftp[i][j].expect_policy = &tftp_exp_policy;
+ tftp[i][j].me = THIS_MODULE;
+ tftp[i][j].help = tftp_help;
+
+ tmpname = &tftp_names[i][j][0];
+ if (ports[i] == TFTP_PORT)
+ sprintf(tmpname, "tftp");
+ else
+ sprintf(tmpname, "tftp-%u", i);
+ tftp[i][j].name = tmpname;
+
+ ret = nf_conntrack_helper_register(&tftp[i][j]);
+ if (ret) {
+ printk("nf_ct_tftp: failed to register helper "
+ "for pf: %u port: %u\n",
+ tftp[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_tftp_fini();
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+module_init(nf_conntrack_tftp_init);
+module_exit(nf_conntrack_tftp_fini);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
new file mode 100644
index 0000000..bf66099
--- /dev/null
+++ b/net/netfilter/nf_internals.h
@@ -0,0 +1,38 @@
+#ifndef _NF_INTERNALS_H
+#define _NF_INTERNALS_H
+
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...) printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+
+/* core.c */
+extern unsigned int nf_iterate(struct list_head *head,
+ struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *indev,
+ const struct net_device *outdev,
+ struct list_head **i,
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh);
+
+/* nf_queue.c */
+extern int nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ u_int8_t pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ unsigned int queuenum);
+extern int __init netfilter_queue_init(void);
+
+/* nf_log.c */
+extern int __init netfilter_log_init(void);
+
+#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
new file mode 100644
index 0000000..fa8ae5d
--- /dev/null
+++ b/net/netfilter/nf_log.c
@@ -0,0 +1,172 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_log.h>
+
+#include "nf_internals.h"
+
+/* Internal logging interface, which relies on the real
+ LOG target modules */
+
+#define NF_LOG_PREFIXLEN 128
+
+static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
+static DEFINE_MUTEX(nf_log_mutex);
+
+/* return EBUSY if somebody else is registered, EEXIST if the same logger
+ * is registred, 0 on success. */
+int nf_log_register(u_int8_t pf, const struct nf_logger *logger)
+{
+ int ret;
+
+ if (pf >= ARRAY_SIZE(nf_loggers))
+ return -EINVAL;
+
+ /* Any setup of logging members must be done before
+ * substituting pointer. */
+ ret = mutex_lock_interruptible(&nf_log_mutex);
+ if (ret < 0)
+ return ret;
+
+ if (!nf_loggers[pf])
+ rcu_assign_pointer(nf_loggers[pf], logger);
+ else if (nf_loggers[pf] == logger)
+ ret = -EEXIST;
+ else
+ ret = -EBUSY;
+
+ mutex_unlock(&nf_log_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(nf_log_register);
+
+void nf_log_unregister_pf(u_int8_t pf)
+{
+ if (pf >= ARRAY_SIZE(nf_loggers))
+ return;
+ mutex_lock(&nf_log_mutex);
+ rcu_assign_pointer(nf_loggers[pf], NULL);
+ mutex_unlock(&nf_log_mutex);
+
+ /* Give time to concurrent readers. */
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unregister_pf);
+
+void nf_log_unregister(const struct nf_logger *logger)
+{
+ int i;
+
+ mutex_lock(&nf_log_mutex);
+ for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
+ if (nf_loggers[i] == logger)
+ rcu_assign_pointer(nf_loggers[i], NULL);
+ }
+ mutex_unlock(&nf_log_mutex);
+
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unregister);
+
+void nf_log_packet(u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *fmt, ...)
+{
+ va_list args;
+ char prefix[NF_LOG_PREFIXLEN];
+ const struct nf_logger *logger;
+
+ rcu_read_lock();
+ logger = rcu_dereference(nf_loggers[pf]);
+ if (logger) {
+ va_start(args, fmt);
+ vsnprintf(prefix, sizeof(prefix), fmt, args);
+ va_end(args);
+ logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_packet);
+
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+
+ if (*pos >= ARRAY_SIZE(nf_loggers))
+ return NULL;
+
+ return pos;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+
+ if (*pos >= ARRAY_SIZE(nf_loggers))
+ return NULL;
+
+ return pos;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+ loff_t *pos = v;
+ const struct nf_logger *logger;
+
+ logger = rcu_dereference(nf_loggers[*pos]);
+
+ if (!logger)
+ return seq_printf(s, "%2lld NONE\n", *pos);
+
+ return seq_printf(s, "%2lld %s\n", *pos, logger->name);
+}
+
+static const struct seq_operations nflog_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = seq_show,
+};
+
+static int nflog_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &nflog_seq_ops);
+}
+
+static const struct file_operations nflog_file_ops = {
+ .owner = THIS_MODULE,
+ .open = nflog_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#endif /* PROC_FS */
+
+
+int __init netfilter_log_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nf_log", S_IRUGO,
+ proc_net_netfilter, &nflog_file_ops))
+ return -1;
+#endif
+ return 0;
+}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
new file mode 100644
index 0000000..4f2310c
--- /dev/null
+++ b/net/netfilter/nf_queue.c
@@ -0,0 +1,357 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_queue.h>
+
+#include "nf_internals.h"
+
+/*
+ * A queue handler may be registered for each protocol. Each is protected by
+ * long term mutex. The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
+
+static DEFINE_MUTEX(queue_handler_mutex);
+
+/* return EBUSY when somebody else is registered, return EEXIST if the
+ * same handler is registered, return 0 in case of success. */
+int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+{
+ int ret;
+
+ if (pf >= ARRAY_SIZE(queue_handler))
+ return -EINVAL;
+
+ mutex_lock(&queue_handler_mutex);
+ if (queue_handler[pf] == qh)
+ ret = -EEXIST;
+ else if (queue_handler[pf])
+ ret = -EBUSY;
+ else {
+ rcu_assign_pointer(queue_handler[pf], qh);
+ ret = 0;
+ }
+ mutex_unlock(&queue_handler_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(nf_register_queue_handler);
+
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+{
+ if (pf >= ARRAY_SIZE(queue_handler))
+ return -EINVAL;
+
+ mutex_lock(&queue_handler_mutex);
+ if (queue_handler[pf] && queue_handler[pf] != qh) {
+ mutex_unlock(&queue_handler_mutex);
+ return -EINVAL;
+ }
+
+ rcu_assign_pointer(queue_handler[pf], NULL);
+ mutex_unlock(&queue_handler_mutex);
+
+ synchronize_rcu();
+
+ return 0;
+}
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+
+void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
+{
+ u_int8_t pf;
+
+ mutex_lock(&queue_handler_mutex);
+ for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
+ if (queue_handler[pf] == qh)
+ rcu_assign_pointer(queue_handler[pf], NULL);
+ }
+ mutex_unlock(&queue_handler_mutex);
+
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
+
+static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+{
+ /* Release those devices we held, or Alexey will kill me. */
+ if (entry->indev)
+ dev_put(entry->indev);
+ if (entry->outdev)
+ dev_put(entry->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+
+ if (nf_bridge->physindev)
+ dev_put(nf_bridge->physindev);
+ if (nf_bridge->physoutdev)
+ dev_put(nf_bridge->physoutdev);
+ }
+#endif
+ /* Drop reference to owner of hook which queued us. */
+ module_put(entry->elem->owner);
+}
+
+/*
+ * Any packet that leaves via this function must come back
+ * through nf_reinject().
+ */
+static int __nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ u_int8_t pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ unsigned int queuenum)
+{
+ int status;
+ struct nf_queue_entry *entry = NULL;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ struct net_device *physindev;
+ struct net_device *physoutdev;
+#endif
+ const struct nf_afinfo *afinfo;
+ const struct nf_queue_handler *qh;
+
+ /* QUEUE == DROP if noone is waiting, to be safe. */
+ rcu_read_lock();
+
+ qh = rcu_dereference(queue_handler[pf]);
+ if (!qh)
+ goto err_unlock;
+
+ afinfo = nf_get_afinfo(pf);
+ if (!afinfo)
+ goto err_unlock;
+
+ entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+ if (!entry)
+ goto err_unlock;
+
+ *entry = (struct nf_queue_entry) {
+ .skb = skb,
+ .elem = list_entry(elem, struct nf_hook_ops, list),
+ .pf = pf,
+ .hook = hook,
+ .indev = indev,
+ .outdev = outdev,
+ .okfn = okfn,
+ };
+
+ /* If it's going away, ignore hook. */
+ if (!try_module_get(entry->elem->owner)) {
+ rcu_read_unlock();
+ kfree(entry);
+ return 0;
+ }
+
+ /* Bump dev refs so they don't vanish while packet is out */
+ if (indev)
+ dev_hold(indev);
+ if (outdev)
+ dev_hold(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge) {
+ physindev = skb->nf_bridge->physindev;
+ if (physindev)
+ dev_hold(physindev);
+ physoutdev = skb->nf_bridge->physoutdev;
+ if (physoutdev)
+ dev_hold(physoutdev);
+ }
+#endif
+ afinfo->saveroute(skb, entry);
+ status = qh->outfn(entry, queuenum);
+
+ rcu_read_unlock();
+
+ if (status < 0) {
+ nf_queue_entry_release_refs(entry);
+ goto err;
+ }
+
+ return 1;
+
+err_unlock:
+ rcu_read_unlock();
+err:
+ kfree_skb(skb);
+ kfree(entry);
+ return 1;
+}
+
+int nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ u_int8_t pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *),
+ unsigned int queuenum)
+{
+ struct sk_buff *segs;
+
+ if (!skb_is_gso(skb))
+ return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+ queuenum);
+
+ switch (pf) {
+ case AF_INET:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ }
+
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (IS_ERR(segs))
+ return 1;
+
+ do {
+ struct sk_buff *nskb = segs->next;
+
+ segs->next = NULL;
+ if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
+ queuenum))
+ kfree_skb(segs);
+ segs = nskb;
+ } while (segs);
+ return 1;
+}
+
+void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+{
+ struct sk_buff *skb = entry->skb;
+ struct list_head *elem = &entry->elem->list;
+ const struct nf_afinfo *afinfo;
+
+ rcu_read_lock();
+
+ nf_queue_entry_release_refs(entry);
+
+ /* Continue traversal iff userspace said ok... */
+ if (verdict == NF_REPEAT) {
+ elem = elem->prev;
+ verdict = NF_ACCEPT;
+ }
+
+ if (verdict == NF_ACCEPT) {
+ afinfo = nf_get_afinfo(entry->pf);
+ if (!afinfo || afinfo->reroute(skb, entry) < 0)
+ verdict = NF_DROP;
+ }
+
+ if (verdict == NF_ACCEPT) {
+ next_hook:
+ verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
+ skb, entry->hook,
+ entry->indev, entry->outdev, &elem,
+ entry->okfn, INT_MIN);
+ }
+
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_STOP:
+ local_bh_disable();
+ entry->okfn(skb);
+ local_bh_enable();
+ case NF_STOLEN:
+ break;
+ case NF_QUEUE:
+ if (!__nf_queue(skb, elem, entry->pf, entry->hook,
+ entry->indev, entry->outdev, entry->okfn,
+ verdict >> NF_VERDICT_BITS))
+ goto next_hook;
+ break;
+ default:
+ kfree_skb(skb);
+ }
+ rcu_read_unlock();
+ kfree(entry);
+ return;
+}
+EXPORT_SYMBOL(nf_reinject);
+
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos >= ARRAY_SIZE(queue_handler))
+ return NULL;
+
+ return pos;
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+
+ if (*pos >= ARRAY_SIZE(queue_handler))
+ return NULL;
+
+ return pos;
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+{
+
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+ int ret;
+ loff_t *pos = v;
+ const struct nf_queue_handler *qh;
+
+ rcu_read_lock();
+ qh = rcu_dereference(queue_handler[*pos]);
+ if (!qh)
+ ret = seq_printf(s, "%2lld NONE\n", *pos);
+ else
+ ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static const struct seq_operations nfqueue_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = seq_show,
+};
+
+static int nfqueue_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &nfqueue_seq_ops);
+}
+
+static const struct file_operations nfqueue_file_ops = {
+ .owner = THIS_MODULE,
+ .open = nfqueue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* PROC_FS */
+
+
+int __init netfilter_queue_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nf_queue", S_IRUGO,
+ proc_net_netfilter, &nfqueue_file_ops))
+ return -1;
+#endif
+ return 0;
+}
+
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
new file mode 100644
index 0000000..8ab829f
--- /dev/null
+++ b/net/netfilter/nf_sockopt.c
@@ -0,0 +1,169 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/mutex.h>
+#include <net/sock.h>
+
+#include "nf_internals.h"
+
+/* Sockopts only registered and called from user context, so
+ net locking would be overkill. Also, [gs]etsockopt calls may
+ sleep. */
+static DEFINE_MUTEX(nf_sockopt_mutex);
+static LIST_HEAD(nf_sockopts);
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+ return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+ struct nf_sockopt_ops *ops;
+ int ret = 0;
+
+ if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+ list_for_each_entry(ops, &nf_sockopts, list) {
+ if (ops->pf == reg->pf
+ && (overlap(ops->set_optmin, ops->set_optmax,
+ reg->set_optmin, reg->set_optmax)
+ || overlap(ops->get_optmin, ops->get_optmax,
+ reg->get_optmin, reg->get_optmax))) {
+ NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ ops->set_optmin, ops->set_optmax,
+ ops->get_optmin, ops->get_optmax,
+ reg->set_optmin, reg->set_optmax,
+ reg->get_optmin, reg->get_optmax);
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ list_add(&reg->list, &nf_sockopts);
+out:
+ mutex_unlock(&nf_sockopt_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(nf_register_sockopt);
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+ mutex_lock(&nf_sockopt_mutex);
+ list_del(&reg->list);
+ mutex_unlock(&nf_sockopt_mutex);
+}
+EXPORT_SYMBOL(nf_unregister_sockopt);
+
+static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
+ int val, int get)
+{
+ struct nf_sockopt_ops *ops;
+
+ if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(ops, &nf_sockopts, list) {
+ if (ops->pf == pf) {
+ if (!try_module_get(ops->owner))
+ goto out_nosup;
+
+ if (get) {
+ if (val >= ops->get_optmin &&
+ val < ops->get_optmax)
+ goto out;
+ } else {
+ if (val >= ops->set_optmin &&
+ val < ops->set_optmax)
+ goto out;
+ }
+ module_put(ops->owner);
+ }
+ }
+out_nosup:
+ ops = ERR_PTR(-ENOPROTOOPT);
+out:
+ mutex_unlock(&nf_sockopt_mutex);
+ return ops;
+}
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
+ char __user *opt, int *len, int get)
+{
+ struct nf_sockopt_ops *ops;
+ int ret;
+
+ ops = nf_sockopt_find(sk, pf, val, get);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ if (get)
+ ret = ops->get(sk, val, opt, len);
+ else
+ ret = ops->set(sk, val, opt, *len);
+
+ module_put(ops->owner);
+ return ret;
+}
+
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+ int len)
+{
+ return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+EXPORT_SYMBOL(nf_setsockopt);
+
+int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+ int *len)
+{
+ return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+EXPORT_SYMBOL(nf_getsockopt);
+
+#ifdef CONFIG_COMPAT
+static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
+ char __user *opt, int *len, int get)
+{
+ struct nf_sockopt_ops *ops;
+ int ret;
+
+ ops = nf_sockopt_find(sk, pf, val, get);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ if (get) {
+ if (ops->compat_get)
+ ret = ops->compat_get(sk, val, opt, len);
+ else
+ ret = ops->get(sk, val, opt, len);
+ } else {
+ if (ops->compat_set)
+ ret = ops->compat_set(sk, val, opt, *len);
+ else
+ ret = ops->set(sk, val, opt, *len);
+ }
+
+ module_put(ops->owner);
+ return ret;
+}
+
+int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
+ int val, char __user *opt, int len)
+{
+ return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+EXPORT_SYMBOL(compat_nf_setsockopt);
+
+int compat_nf_getsockopt(struct sock *sk, u_int8_t pf,
+ int val, char __user *opt, int *len)
+{
+ return compat_nf_sockopt(sk, pf, val, opt, len, 1);
+}
+EXPORT_SYMBOL(compat_nf_getsockopt);
+#endif
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
new file mode 100644
index 0000000..cdc97f3
--- /dev/null
+++ b/net/netfilter/nf_tproxy_core.c
@@ -0,0 +1,95 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/net.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <net/udp.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in, bool listening_only)
+{
+ struct sock *sk;
+
+ /* look up socket */
+ switch (protocol) {
+ case IPPROTO_TCP:
+ if (listening_only)
+ sk = __inet_lookup_listener(net, &tcp_hashinfo,
+ daddr, ntohs(dport),
+ in->ifindex);
+ else
+ sk = __inet_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
+
+static void
+nf_tproxy_destructor(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ skb->sk = NULL;
+ skb->destructor = NULL;
+
+ if (sk)
+ nf_tproxy_put_sock(sk);
+}
+
+/* consumes sk */
+int
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+ if (inet_sk(sk)->transparent) {
+ skb->sk = sk;
+ skb->destructor = nf_tproxy_destructor;
+ return 1;
+ } else
+ nf_tproxy_put_sock(sk);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
+
+static int __init nf_tproxy_init(void)
+{
+ pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
+ pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
+ return 0;
+}
+
+module_init(nf_tproxy_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
new file mode 100644
index 0000000..9c0ba17
--- /dev/null
+++ b/net/netfilter/nfnetlink.c
@@ -0,0 +1,207 @@
+/* Netfilter messages via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>,
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Initial netfilter messages via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <linux/init.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+
+static char __initdata nfversion[] = "0.30";
+
+static struct sock *nfnl = NULL;
+static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
+static DEFINE_MUTEX(nfnl_mutex);
+
+void nfnl_lock(void)
+{
+ mutex_lock(&nfnl_mutex);
+}
+EXPORT_SYMBOL_GPL(nfnl_lock);
+
+void nfnl_unlock(void)
+{
+ mutex_unlock(&nfnl_mutex);
+}
+EXPORT_SYMBOL_GPL(nfnl_unlock);
+
+int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
+{
+ nfnl_lock();
+ if (subsys_table[n->subsys_id]) {
+ nfnl_unlock();
+ return -EBUSY;
+ }
+ subsys_table[n->subsys_id] = n;
+ nfnl_unlock();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
+
+int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
+{
+ nfnl_lock();
+ subsys_table[n->subsys_id] = NULL;
+ nfnl_unlock();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
+
+static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
+{
+ u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
+
+ if (subsys_id >= NFNL_SUBSYS_COUNT)
+ return NULL;
+
+ return subsys_table[subsys_id];
+}
+
+static inline const struct nfnl_callback *
+nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
+{
+ u_int8_t cb_id = NFNL_MSG_TYPE(type);
+
+ if (cb_id >= ss->cb_count)
+ return NULL;
+
+ return &ss->cb[cb_id];
+}
+
+int nfnetlink_has_listeners(unsigned int group)
+{
+ return netlink_has_listeners(nfnl, group);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
+
+int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+ return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+}
+EXPORT_SYMBOL_GPL(nfnetlink_send);
+
+int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+{
+ return netlink_unicast(nfnl, skb, pid, flags);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
+
+/* Process one complete nfnetlink message. */
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ const struct nfnl_callback *nc;
+ const struct nfnetlink_subsystem *ss;
+ int type, err;
+
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ /* All the messages must at least contain nfgenmsg */
+ if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
+ return 0;
+
+ type = nlh->nlmsg_type;
+replay:
+ ss = nfnetlink_get_subsys(type);
+ if (!ss) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock();
+ request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
+ nfnl_lock();
+ ss = nfnetlink_get_subsys(type);
+ if (!ss)
+#endif
+ return -EINVAL;
+ }
+
+ nc = nfnetlink_find_client(type, ss);
+ if (!nc)
+ return -EINVAL;
+
+ {
+ int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+ u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ u_int16_t attr_count = ss->cb[cb_id].attr_count;
+ struct nlattr *cda[attr_count+1];
+
+ if (likely(nlh->nlmsg_len >= min_len)) {
+ struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
+ int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+
+ err = nla_parse(cda, attr_count, attr, attrlen,
+ ss->cb[cb_id].policy);
+ if (err < 0)
+ return err;
+ } else
+ return -EINVAL;
+
+ err = nc->call(nfnl, skb, nlh, cda);
+ if (err == -EAGAIN)
+ goto replay;
+ return err;
+ }
+}
+
+static void nfnetlink_rcv(struct sk_buff *skb)
+{
+ nfnl_lock();
+ netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ nfnl_unlock();
+}
+
+static void __exit nfnetlink_exit(void)
+{
+ printk("Removing netfilter NETLINK layer.\n");
+ netlink_kernel_release(nfnl);
+ return;
+}
+
+static int __init nfnetlink_init(void)
+{
+ printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+
+ nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
+ nfnetlink_rcv, NULL, THIS_MODULE);
+ if (!nfnl) {
+ printk(KERN_ERR "cannot initialize nfnetlink!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+module_init(nfnetlink_init);
+module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
new file mode 100644
index 0000000..41e0105
--- /dev/null
+++ b/net/netfilter/nfnetlink_log.c
@@ -0,0 +1,982 @@
+/*
+ * This is a module which is used for logging packets to userspace via
+ * nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Based on the old ipv4-only ipt_ULOG.c:
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_log.h>
+
+#include <asm/atomic.h>
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+
+#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
+#define NFULNL_TIMEOUT_DEFAULT HZ /* every second */
+#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
+#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */
+
+#define PRINTR(x, args...) do { if (net_ratelimit()) \
+ printk(x, ## args); } while (0);
+
+struct nfulnl_instance {
+ struct hlist_node hlist; /* global list of instances */
+ spinlock_t lock;
+ atomic_t use; /* use count */
+
+ unsigned int qlen; /* number of nlmsgs in skb */
+ struct sk_buff *skb; /* pre-allocatd skb */
+ struct timer_list timer;
+ int peer_pid; /* PID of the peer process */
+
+ /* configurable parameters */
+ unsigned int flushtimeout; /* timeout until queue flush */
+ unsigned int nlbufsiz; /* netlink buffer allocation size */
+ unsigned int qthreshold; /* threshold of the queue */
+ u_int32_t copy_range;
+ u_int32_t seq; /* instance-local sequential counter */
+ u_int16_t group_num; /* number of this queue */
+ u_int16_t flags;
+ u_int8_t copy_mode;
+};
+
+static DEFINE_RWLOCK(instances_lock);
+static atomic_t global_seq;
+
+#define INSTANCE_BUCKETS 16
+static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static unsigned int hash_init;
+
+static inline u_int8_t instance_hashfn(u_int16_t group_num)
+{
+ return ((group_num & 0xff) % INSTANCE_BUCKETS);
+}
+
+static struct nfulnl_instance *
+__instance_lookup(u_int16_t group_num)
+{
+ struct hlist_head *head;
+ struct hlist_node *pos;
+ struct nfulnl_instance *inst;
+
+ head = &instance_table[instance_hashfn(group_num)];
+ hlist_for_each_entry(inst, pos, head, hlist) {
+ if (inst->group_num == group_num)
+ return inst;
+ }
+ return NULL;
+}
+
+static inline void
+instance_get(struct nfulnl_instance *inst)
+{
+ atomic_inc(&inst->use);
+}
+
+static struct nfulnl_instance *
+instance_lookup_get(u_int16_t group_num)
+{
+ struct nfulnl_instance *inst;
+
+ read_lock_bh(&instances_lock);
+ inst = __instance_lookup(group_num);
+ if (inst)
+ instance_get(inst);
+ read_unlock_bh(&instances_lock);
+
+ return inst;
+}
+
+static void
+instance_put(struct nfulnl_instance *inst)
+{
+ if (inst && atomic_dec_and_test(&inst->use)) {
+ kfree(inst);
+ module_put(THIS_MODULE);
+ }
+}
+
+static void nfulnl_timer(unsigned long data);
+
+static struct nfulnl_instance *
+instance_create(u_int16_t group_num, int pid)
+{
+ struct nfulnl_instance *inst;
+ int err;
+
+ write_lock_bh(&instances_lock);
+ if (__instance_lookup(group_num)) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
+ if (!inst) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ if (!try_module_get(THIS_MODULE)) {
+ kfree(inst);
+ err = -EAGAIN;
+ goto out_unlock;
+ }
+
+ INIT_HLIST_NODE(&inst->hlist);
+ spin_lock_init(&inst->lock);
+ /* needs to be two, since we _put() after creation */
+ atomic_set(&inst->use, 2);
+
+ setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
+
+ inst->peer_pid = pid;
+ inst->group_num = group_num;
+
+ inst->qthreshold = NFULNL_QTHRESH_DEFAULT;
+ inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT;
+ inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT;
+ inst->copy_mode = NFULNL_COPY_PACKET;
+ inst->copy_range = NFULNL_COPY_RANGE_MAX;
+
+ hlist_add_head(&inst->hlist,
+ &instance_table[instance_hashfn(group_num)]);
+
+ write_unlock_bh(&instances_lock);
+
+ return inst;
+
+out_unlock:
+ write_unlock_bh(&instances_lock);
+ return ERR_PTR(err);
+}
+
+static void __nfulnl_flush(struct nfulnl_instance *inst);
+
+static void
+__instance_destroy(struct nfulnl_instance *inst)
+{
+ /* first pull it out of the global list */
+ hlist_del(&inst->hlist);
+
+ /* then flush all pending packets from skb */
+
+ spin_lock_bh(&inst->lock);
+ if (inst->skb)
+ __nfulnl_flush(inst);
+ spin_unlock_bh(&inst->lock);
+
+ /* and finally put the refcount */
+ instance_put(inst);
+}
+
+static inline void
+instance_destroy(struct nfulnl_instance *inst)
+{
+ write_lock_bh(&instances_lock);
+ __instance_destroy(inst);
+ write_unlock_bh(&instances_lock);
+}
+
+static int
+nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
+ unsigned int range)
+{
+ int status = 0;
+
+ spin_lock_bh(&inst->lock);
+
+ switch (mode) {
+ case NFULNL_COPY_NONE:
+ case NFULNL_COPY_META:
+ inst->copy_mode = mode;
+ inst->copy_range = 0;
+ break;
+
+ case NFULNL_COPY_PACKET:
+ inst->copy_mode = mode;
+ inst->copy_range = min_t(unsigned int,
+ range, NFULNL_COPY_RANGE_MAX);
+ break;
+
+ default:
+ status = -EINVAL;
+ break;
+ }
+
+ spin_unlock_bh(&inst->lock);
+
+ return status;
+}
+
+static int
+nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
+{
+ int status;
+
+ spin_lock_bh(&inst->lock);
+ if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
+ status = -ERANGE;
+ else if (nlbufsiz > 131072)
+ status = -ERANGE;
+ else {
+ inst->nlbufsiz = nlbufsiz;
+ status = 0;
+ }
+ spin_unlock_bh(&inst->lock);
+
+ return status;
+}
+
+static int
+nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
+{
+ spin_lock_bh(&inst->lock);
+ inst->flushtimeout = timeout;
+ spin_unlock_bh(&inst->lock);
+
+ return 0;
+}
+
+static int
+nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
+{
+ spin_lock_bh(&inst->lock);
+ inst->qthreshold = qthresh;
+ spin_unlock_bh(&inst->lock);
+
+ return 0;
+}
+
+static int
+nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
+{
+ spin_lock_bh(&inst->lock);
+ inst->flags = flags;
+ spin_unlock_bh(&inst->lock);
+
+ return 0;
+}
+
+static struct sk_buff *
+nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
+{
+ struct sk_buff *skb;
+ unsigned int n;
+
+ /* alloc skb which should be big enough for a whole multipart
+ * message. WARNING: has to be <= 128k due to slab restrictions */
+
+ n = max(inst_size, pkt_size);
+ skb = alloc_skb(n, GFP_ATOMIC);
+ if (!skb) {
+ PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
+ inst_size);
+
+ if (n > pkt_size) {
+ /* try to allocate only as much as we need for current
+ * packet */
+
+ skb = alloc_skb(pkt_size, GFP_ATOMIC);
+ if (!skb)
+ PRINTR("nfnetlink_log: can't even alloc %u "
+ "bytes\n", pkt_size);
+ }
+ }
+
+ return skb;
+}
+
+static int
+__nfulnl_send(struct nfulnl_instance *inst)
+{
+ int status = -1;
+
+ if (inst->qlen > 1)
+ NLMSG_PUT(inst->skb, 0, 0,
+ NLMSG_DONE,
+ sizeof(struct nfgenmsg));
+
+ status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+
+ inst->qlen = 0;
+ inst->skb = NULL;
+
+nlmsg_failure:
+ return status;
+}
+
+static void
+__nfulnl_flush(struct nfulnl_instance *inst)
+{
+ /* timer holds a reference */
+ if (del_timer(&inst->timer))
+ instance_put(inst);
+ if (inst->skb)
+ __nfulnl_send(inst);
+}
+
+static void
+nfulnl_timer(unsigned long data)
+{
+ struct nfulnl_instance *inst = (struct nfulnl_instance *)data;
+
+ spin_lock_bh(&inst->lock);
+ if (inst->skb)
+ __nfulnl_send(inst);
+ spin_unlock_bh(&inst->lock);
+ instance_put(inst);
+}
+
+/* This is an inline function, we don't really care about a long
+ * list of arguments */
+static inline int
+__build_packet_message(struct nfulnl_instance *inst,
+ const struct sk_buff *skb,
+ unsigned int data_len,
+ u_int8_t pf,
+ unsigned int hooknum,
+ const struct net_device *indev,
+ const struct net_device *outdev,
+ const struct nf_loginfo *li,
+ const char *prefix, unsigned int plen)
+{
+ struct nfulnl_msg_packet_hdr pmsg;
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ __be32 tmp_uint;
+ sk_buff_data_t old_tail = inst->skb->tail;
+
+ nlh = NLMSG_PUT(inst->skb, 0, 0,
+ NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
+ sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+ nfmsg->nfgen_family = pf;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(inst->group_num);
+
+ pmsg.hw_protocol = skb->protocol;
+ pmsg.hook = hooknum;
+
+ NLA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg);
+
+ if (prefix)
+ NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix);
+
+ if (indev) {
+#ifndef CONFIG_BRIDGE_NETFILTER
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+ htonl(indev->ifindex));
+#else
+ if (pf == PF_BRIDGE) {
+ /* Case 1: outdev is physical input device, we need to
+ * look for bridge group (when called from
+ * netfilter_bridge) */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+ htonl(indev->ifindex));
+ /* this is the bridge group "brX" */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+ htonl(indev->br_port->br->dev->ifindex));
+ } else {
+ /* Case 2: indev is bridge group, we need to look for
+ * physical device (when called from ipv4) */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
+ htonl(indev->ifindex));
+ if (skb->nf_bridge && skb->nf_bridge->physindev)
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+ htonl(skb->nf_bridge->physindev->ifindex));
+ }
+#endif
+ }
+
+ if (outdev) {
+ tmp_uint = htonl(outdev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+ htonl(outdev->ifindex));
+#else
+ if (pf == PF_BRIDGE) {
+ /* Case 1: outdev is physical output device, we need to
+ * look for bridge group (when called from
+ * netfilter_bridge) */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+ htonl(outdev->ifindex));
+ /* this is the bridge group "brX" */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+ htonl(outdev->br_port->br->dev->ifindex));
+ } else {
+ /* Case 2: indev is a bridge group, we need to look
+ * for physical device (when called from ipv4) */
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
+ htonl(outdev->ifindex));
+ if (skb->nf_bridge && skb->nf_bridge->physoutdev)
+ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+ htonl(skb->nf_bridge->physoutdev->ifindex));
+ }
+#endif
+ }
+
+ if (skb->mark)
+ NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark));
+
+ if (indev && skb->dev) {
+ struct nfulnl_msg_packet_hw phw;
+ int len = dev_parse_header(skb, phw.hw_addr);
+ if (len > 0) {
+ phw.hw_addrlen = htons(len);
+ NLA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
+ }
+ }
+
+ if (indev && skb_mac_header_was_set(skb)) {
+ NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type));
+ NLA_PUT_BE16(inst->skb, NFULA_HWLEN,
+ htons(skb->dev->hard_header_len));
+ NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
+ skb_mac_header(skb));
+ }
+
+ if (skb->tstamp.tv64) {
+ struct nfulnl_msg_packet_timestamp ts;
+ struct timeval tv = ktime_to_timeval(skb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
+
+ NLA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
+ }
+
+ /* UID */
+ if (skb->sk) {
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
+ __be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
+ __be32 gid = htonl(skb->sk->sk_socket->file->f_gid);
+ /* need to unlock here since NLA_PUT may goto */
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
+ NLA_PUT_BE32(inst->skb, NFULA_GID, gid);
+ } else
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ }
+
+ /* local sequence number */
+ if (inst->flags & NFULNL_CFG_F_SEQ)
+ NLA_PUT_BE32(inst->skb, NFULA_SEQ, htonl(inst->seq++));
+
+ /* global sequence number */
+ if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
+ NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
+ htonl(atomic_inc_return(&global_seq)));
+
+ if (data_len) {
+ struct nlattr *nla;
+ int size = nla_attr_size(data_len);
+
+ if (skb_tailroom(inst->skb) < nla_total_size(data_len)) {
+ printk(KERN_WARNING "nfnetlink_log: no tailroom!\n");
+ goto nlmsg_failure;
+ }
+
+ nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len));
+ nla->nla_type = NFULA_PAYLOAD;
+ nla->nla_len = size;
+
+ if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
+ BUG();
+ }
+
+ nlh->nlmsg_len = inst->skb->tail - old_tail;
+ return 0;
+
+nlmsg_failure:
+nla_put_failure:
+ PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
+ return -1;
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_ULOG,
+ .u = {
+ .ulog = {
+ .copy_len = 0xffff,
+ .group = 0,
+ .qthreshold = 1,
+ },
+ },
+};
+
+/* log handler for internal netfilter logging api */
+static void
+nfulnl_log_packet(u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *li_user,
+ const char *prefix)
+{
+ unsigned int size, data_len;
+ struct nfulnl_instance *inst;
+ const struct nf_loginfo *li;
+ unsigned int qthreshold;
+ unsigned int plen;
+
+ if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
+ li = li_user;
+ else
+ li = &default_loginfo;
+
+ inst = instance_lookup_get(li->u.ulog.group);
+ if (!inst)
+ return;
+
+ plen = 0;
+ if (prefix)
+ plen = strlen(prefix) + 1;
+
+ /* FIXME: do we want to make the size calculation conditional based on
+ * what is actually present? way more branches and checks, but more
+ * memory efficient... */
+ size = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+#endif
+ + nla_total_size(sizeof(u_int32_t)) /* mark */
+ + nla_total_size(sizeof(u_int32_t)) /* uid */
+ + nla_total_size(sizeof(u_int32_t)) /* gid */
+ + nla_total_size(plen) /* prefix */
+ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
+ + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
+
+ spin_lock_bh(&inst->lock);
+
+ if (inst->flags & NFULNL_CFG_F_SEQ)
+ size += nla_total_size(sizeof(u_int32_t));
+ if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
+ size += nla_total_size(sizeof(u_int32_t));
+
+ qthreshold = inst->qthreshold;
+ /* per-rule qthreshold overrides per-instance */
+ if (qthreshold > li->u.ulog.qthreshold)
+ qthreshold = li->u.ulog.qthreshold;
+
+ switch (inst->copy_mode) {
+ case NFULNL_COPY_META:
+ case NFULNL_COPY_NONE:
+ data_len = 0;
+ break;
+
+ case NFULNL_COPY_PACKET:
+ if (inst->copy_range == 0
+ || inst->copy_range > skb->len)
+ data_len = skb->len;
+ else
+ data_len = inst->copy_range;
+
+ size += nla_total_size(data_len);
+ break;
+
+ default:
+ goto unlock_and_release;
+ }
+
+ if (inst->skb &&
+ size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
+ /* either the queue len is too high or we don't have
+ * enough room in the skb left. flush to userspace. */
+ __nfulnl_flush(inst);
+ }
+
+ if (!inst->skb) {
+ inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+ if (!inst->skb)
+ goto alloc_failure;
+ }
+
+ inst->qlen++;
+
+ __build_packet_message(inst, skb, data_len, pf,
+ hooknum, in, out, li, prefix, plen);
+
+ if (inst->qlen >= qthreshold)
+ __nfulnl_flush(inst);
+ /* timer_pending always called within inst->lock, so there
+ * is no chance of a race here */
+ else if (!timer_pending(&inst->timer)) {
+ instance_get(inst);
+ inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
+ add_timer(&inst->timer);
+ }
+
+unlock_and_release:
+ spin_unlock_bh(&inst->lock);
+ instance_put(inst);
+ return;
+
+alloc_failure:
+ /* FIXME: statistics */
+ goto unlock_and_release;
+}
+
+static int
+nfulnl_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_NETFILTER && n->pid) {
+ int i;
+
+ /* destroy all instances for this pid */
+ write_lock_bh(&instances_lock);
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct hlist_node *tmp, *t2;
+ struct nfulnl_instance *inst;
+ struct hlist_head *head = &instance_table[i];
+
+ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+ if ((n->net == &init_net) &&
+ (n->pid == inst->peer_pid))
+ __instance_destroy(inst);
+ }
+ }
+ write_unlock_bh(&instances_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfulnl_rtnl_notifier = {
+ .notifier_call = nfulnl_rcv_nl_event,
+};
+
+static int
+nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *nfqa[])
+{
+ return -ENOTSUPP;
+}
+
+static const struct nf_logger nfulnl_logger = {
+ .name = "nfnetlink_log",
+ .logfn = &nfulnl_log_packet,
+ .me = THIS_MODULE,
+};
+
+static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
+ [NFULA_CFG_CMD] = { .len = sizeof(struct nfulnl_msg_config_cmd) },
+ [NFULA_CFG_MODE] = { .len = sizeof(struct nfulnl_msg_config_mode) },
+ [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 },
+ [NFULA_CFG_QTHRESH] = { .type = NLA_U32 },
+ [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 },
+ [NFULA_CFG_FLAGS] = { .type = NLA_U16 },
+};
+
+static int
+nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *nfula[])
+{
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int16_t group_num = ntohs(nfmsg->res_id);
+ struct nfulnl_instance *inst;
+ struct nfulnl_msg_config_cmd *cmd = NULL;
+ int ret = 0;
+
+ if (nfula[NFULA_CFG_CMD]) {
+ u_int8_t pf = nfmsg->nfgen_family;
+ cmd = nla_data(nfula[NFULA_CFG_CMD]);
+
+ /* Commands without queue context */
+ switch (cmd->command) {
+ case NFULNL_CFG_CMD_PF_BIND:
+ return nf_log_register(pf, &nfulnl_logger);
+ case NFULNL_CFG_CMD_PF_UNBIND:
+ nf_log_unregister_pf(pf);
+ return 0;
+ }
+ }
+
+ inst = instance_lookup_get(group_num);
+ if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
+ ret = -EPERM;
+ goto out_put;
+ }
+
+ if (cmd != NULL) {
+ switch (cmd->command) {
+ case NFULNL_CFG_CMD_BIND:
+ if (inst) {
+ ret = -EBUSY;
+ goto out_put;
+ }
+
+ inst = instance_create(group_num,
+ NETLINK_CB(skb).pid);
+ if (IS_ERR(inst)) {
+ ret = PTR_ERR(inst);
+ goto out;
+ }
+ break;
+ case NFULNL_CFG_CMD_UNBIND:
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ instance_destroy(inst);
+ goto out;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ }
+
+ if (nfula[NFULA_CFG_MODE]) {
+ struct nfulnl_msg_config_mode *params;
+ params = nla_data(nfula[NFULA_CFG_MODE]);
+
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+ nfulnl_set_mode(inst, params->copy_mode,
+ ntohl(params->copy_range));
+ }
+
+ if (nfula[NFULA_CFG_TIMEOUT]) {
+ __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
+
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+ nfulnl_set_timeout(inst, ntohl(timeout));
+ }
+
+ if (nfula[NFULA_CFG_NLBUFSIZ]) {
+ __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
+
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+ nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
+ }
+
+ if (nfula[NFULA_CFG_QTHRESH]) {
+ __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
+
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+ nfulnl_set_qthresh(inst, ntohl(qthresh));
+ }
+
+ if (nfula[NFULA_CFG_FLAGS]) {
+ __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
+
+ if (!inst) {
+ ret = -ENODEV;
+ goto out;
+ }
+ nfulnl_set_flags(inst, ntohs(flags));
+ }
+
+out_put:
+ instance_put(inst);
+out:
+ return ret;
+}
+
+static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
+ [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp,
+ .attr_count = NFULA_MAX, },
+ [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config,
+ .attr_count = NFULA_CFG_MAX,
+ .policy = nfula_cfg_policy },
+};
+
+static const struct nfnetlink_subsystem nfulnl_subsys = {
+ .name = "log",
+ .subsys_id = NFNL_SUBSYS_ULOG,
+ .cb_count = NFULNL_MSG_MAX,
+ .cb = nfulnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+ unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct iter_state *st)
+{
+ if (!st)
+ return NULL;
+
+ for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+ if (!hlist_empty(&instance_table[st->bucket]))
+ return instance_table[st->bucket].first;
+ }
+ return NULL;
+}
+
+static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
+{
+ h = h->next;
+ while (!h) {
+ if (++st->bucket >= INSTANCE_BUCKETS)
+ return NULL;
+
+ h = instance_table[st->bucket].first;
+ }
+ return h;
+}
+
+static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
+{
+ struct hlist_node *head;
+ head = get_first(st);
+
+ if (head)
+ while (pos && (head = get_next(st, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(instances_lock)
+{
+ read_lock_bh(&instances_lock);
+ return get_idx(seq->private, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return get_next(s->private, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+ __releases(instances_lock)
+{
+ read_unlock_bh(&instances_lock);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+ const struct nfulnl_instance *inst = v;
+
+ return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
+ inst->group_num,
+ inst->peer_pid, inst->qlen,
+ inst->copy_mode, inst->copy_range,
+ inst->flushtimeout, atomic_read(&inst->use));
+}
+
+static const struct seq_operations nful_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = seq_show,
+};
+
+static int nful_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &nful_seq_ops,
+ sizeof(struct iter_state));
+}
+
+static const struct file_operations nful_file_ops = {
+ .owner = THIS_MODULE,
+ .open = nful_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+#endif /* PROC_FS */
+
+static int __init nfnetlink_log_init(void)
+{
+ int i, status = -ENOMEM;
+
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ INIT_HLIST_HEAD(&instance_table[i]);
+
+ /* it's not really all that important to have a random value, so
+ * we can do this from the init function, even if there hasn't
+ * been that much entropy yet */
+ get_random_bytes(&hash_init, sizeof(hash_init));
+
+ netlink_register_notifier(&nfulnl_rtnl_notifier);
+ status = nfnetlink_subsys_register(&nfulnl_subsys);
+ if (status < 0) {
+ printk(KERN_ERR "log: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_log", 0440,
+ proc_net_netfilter, &nful_file_ops))
+ goto cleanup_subsys;
+#endif
+ return status;
+
+#ifdef CONFIG_PROC_FS
+cleanup_subsys:
+ nfnetlink_subsys_unregister(&nfulnl_subsys);
+#endif
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+ return status;
+}
+
+static void __exit nfnetlink_log_fini(void)
+{
+ nf_log_unregister(&nfulnl_logger);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nfnetlink_log", proc_net_netfilter);
+#endif
+ nfnetlink_subsys_unregister(&nfulnl_subsys);
+ netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+}
+
+MODULE_DESCRIPTION("netfilter userspace logging");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+
+module_init(nfnetlink_log_init);
+module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 0000000..8c86011
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,943 @@
+/*
+ * This is a module which is used for queueing packets and communicating with
+ * userspace via nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2007 by Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the old ipv4-only ip_queue.c:
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_queue.h>
+
+#include <asm/atomic.h>
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+
+#define NFQNL_QMAX_DEFAULT 1024
+
+struct nfqnl_instance {
+ struct hlist_node hlist; /* global list of queues */
+ struct rcu_head rcu;
+
+ int peer_pid;
+ unsigned int queue_maxlen;
+ unsigned int copy_range;
+ unsigned int queue_total;
+ unsigned int queue_dropped;
+ unsigned int queue_user_dropped;
+
+ unsigned int id_sequence; /* 'sequence' of pkt ids */
+
+ u_int16_t queue_num; /* number of this queue */
+ u_int8_t copy_mode;
+
+ spinlock_t lock;
+
+ struct list_head queue_list; /* packets in queue */
+};
+
+typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
+
+static DEFINE_SPINLOCK(instances_lock);
+
+#define INSTANCE_BUCKETS 16
+static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
+
+static inline u_int8_t instance_hashfn(u_int16_t queue_num)
+{
+ return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+}
+
+static struct nfqnl_instance *
+instance_lookup(u_int16_t queue_num)
+{
+ struct hlist_head *head;
+ struct hlist_node *pos;
+ struct nfqnl_instance *inst;
+
+ head = &instance_table[instance_hashfn(queue_num)];
+ hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+ if (inst->queue_num == queue_num)
+ return inst;
+ }
+ return NULL;
+}
+
+static struct nfqnl_instance *
+instance_create(u_int16_t queue_num, int pid)
+{
+ struct nfqnl_instance *inst;
+ unsigned int h;
+ int err;
+
+ spin_lock(&instances_lock);
+ if (instance_lookup(queue_num)) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
+ if (!inst) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ inst->queue_num = queue_num;
+ inst->peer_pid = pid;
+ inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
+ inst->copy_range = 0xfffff;
+ inst->copy_mode = NFQNL_COPY_NONE;
+ spin_lock_init(&inst->lock);
+ INIT_LIST_HEAD(&inst->queue_list);
+ INIT_RCU_HEAD(&inst->rcu);
+
+ if (!try_module_get(THIS_MODULE)) {
+ err = -EAGAIN;
+ goto out_free;
+ }
+
+ h = instance_hashfn(queue_num);
+ hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
+
+ spin_unlock(&instances_lock);
+
+ return inst;
+
+out_free:
+ kfree(inst);
+out_unlock:
+ spin_unlock(&instances_lock);
+ return ERR_PTR(err);
+}
+
+static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
+ unsigned long data);
+
+static void
+instance_destroy_rcu(struct rcu_head *head)
+{
+ struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
+ rcu);
+
+ nfqnl_flush(inst, NULL, 0);
+ kfree(inst);
+ module_put(THIS_MODULE);
+}
+
+static void
+__instance_destroy(struct nfqnl_instance *inst)
+{
+ hlist_del_rcu(&inst->hlist);
+ call_rcu(&inst->rcu, instance_destroy_rcu);
+}
+
+static void
+instance_destroy(struct nfqnl_instance *inst)
+{
+ spin_lock(&instances_lock);
+ __instance_destroy(inst);
+ spin_unlock(&instances_lock);
+}
+
+static inline void
+__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+ list_add_tail(&entry->list, &queue->queue_list);
+ queue->queue_total++;
+}
+
+static struct nf_queue_entry *
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+{
+ struct nf_queue_entry *entry = NULL, *i;
+
+ spin_lock_bh(&queue->lock);
+
+ list_for_each_entry(i, &queue->queue_list, list) {
+ if (i->id == id) {
+ entry = i;
+ break;
+ }
+ }
+
+ if (entry) {
+ list_del(&entry->list);
+ queue->queue_total--;
+ }
+
+ spin_unlock_bh(&queue->lock);
+
+ return entry;
+}
+
+static void
+nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
+{
+ struct nf_queue_entry *entry, *next;
+
+ spin_lock_bh(&queue->lock);
+ list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
+ if (!cmpfn || cmpfn(entry, data)) {
+ list_del(&entry->list);
+ queue->queue_total--;
+ nf_reinject(entry, NF_DROP);
+ }
+ }
+ spin_unlock_bh(&queue->lock);
+}
+
+static struct sk_buff *
+nfqnl_build_packet_message(struct nfqnl_instance *queue,
+ struct nf_queue_entry *entry)
+{
+ sk_buff_data_t old_tail;
+ size_t size;
+ size_t data_len = 0;
+ struct sk_buff *skb;
+ struct nfqnl_msg_packet_hdr pmsg;
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ struct sk_buff *entskb = entry->skb;
+ struct net_device *indev;
+ struct net_device *outdev;
+
+ size = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ + nla_total_size(sizeof(u_int32_t)) /* ifindex */
+#endif
+ + nla_total_size(sizeof(u_int32_t)) /* mark */
+ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
+
+ outdev = entry->outdev;
+
+ spin_lock_bh(&queue->lock);
+
+ switch ((enum nfqnl_config_mode)queue->copy_mode) {
+ case NFQNL_COPY_META:
+ case NFQNL_COPY_NONE:
+ break;
+
+ case NFQNL_COPY_PACKET:
+ if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
+ entskb->ip_summed == CHECKSUM_COMPLETE) &&
+ skb_checksum_help(entskb)) {
+ spin_unlock_bh(&queue->lock);
+ return NULL;
+ }
+ if (queue->copy_range == 0
+ || queue->copy_range > entskb->len)
+ data_len = entskb->len;
+ else
+ data_len = queue->copy_range;
+
+ size += nla_total_size(data_len);
+ break;
+ }
+
+ entry->id = queue->id_sequence++;
+
+ spin_unlock_bh(&queue->lock);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ goto nlmsg_failure;
+
+ old_tail = skb->tail;
+ nlh = NLMSG_PUT(skb, 0, 0,
+ NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
+ sizeof(struct nfgenmsg));
+ nfmsg = NLMSG_DATA(nlh);
+ nfmsg->nfgen_family = entry->pf;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(queue->queue_num);
+
+ pmsg.packet_id = htonl(entry->id);
+ pmsg.hw_protocol = entskb->protocol;
+ pmsg.hook = entry->hook;
+
+ NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
+
+ indev = entry->indev;
+ if (indev) {
+#ifndef CONFIG_BRIDGE_NETFILTER
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
+#else
+ if (entry->pf == PF_BRIDGE) {
+ /* Case 1: indev is physical input device, we need to
+ * look for bridge group (when called from
+ * netfilter_bridge) */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+ htonl(indev->ifindex));
+ /* this is the bridge group "brX" */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+ htonl(indev->br_port->br->dev->ifindex));
+ } else {
+ /* Case 2: indev is bridge group, we need to look for
+ * physical device (when called from ipv4) */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+ htonl(indev->ifindex));
+ if (entskb->nf_bridge && entskb->nf_bridge->physindev)
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+ htonl(entskb->nf_bridge->physindev->ifindex));
+ }
+#endif
+ }
+
+ if (outdev) {
+#ifndef CONFIG_BRIDGE_NETFILTER
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
+#else
+ if (entry->pf == PF_BRIDGE) {
+ /* Case 1: outdev is physical output device, we need to
+ * look for bridge group (when called from
+ * netfilter_bridge) */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+ htonl(outdev->ifindex));
+ /* this is the bridge group "brX" */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+ htonl(outdev->br_port->br->dev->ifindex));
+ } else {
+ /* Case 2: outdev is bridge group, we need to look for
+ * physical output device (when called from ipv4) */
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+ htonl(outdev->ifindex));
+ if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
+ NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+ htonl(entskb->nf_bridge->physoutdev->ifindex));
+ }
+#endif
+ }
+
+ if (entskb->mark)
+ NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
+
+ if (indev && entskb->dev) {
+ struct nfqnl_msg_packet_hw phw;
+ int len = dev_parse_header(entskb, phw.hw_addr);
+ if (len) {
+ phw.hw_addrlen = htons(len);
+ NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
+ }
+ }
+
+ if (entskb->tstamp.tv64) {
+ struct nfqnl_msg_packet_timestamp ts;
+ struct timeval tv = ktime_to_timeval(entskb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
+
+ NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
+ }
+
+ if (data_len) {
+ struct nlattr *nla;
+ int sz = nla_attr_size(data_len);
+
+ if (skb_tailroom(skb) < nla_total_size(data_len)) {
+ printk(KERN_WARNING "nf_queue: no tailroom!\n");
+ goto nlmsg_failure;
+ }
+
+ nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
+ nla->nla_type = NFQA_PAYLOAD;
+ nla->nla_len = sz;
+
+ if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
+ BUG();
+ }
+
+ nlh->nlmsg_len = skb->tail - old_tail;
+ return skb;
+
+nlmsg_failure:
+nla_put_failure:
+ if (skb)
+ kfree_skb(skb);
+ if (net_ratelimit())
+ printk(KERN_ERR "nf_queue: error creating packet message\n");
+ return NULL;
+}
+
+static int
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+ struct sk_buff *nskb;
+ struct nfqnl_instance *queue;
+ int err;
+
+ /* rcu_read_lock()ed by nf_hook_slow() */
+ queue = instance_lookup(queuenum);
+ if (!queue)
+ goto err_out;
+
+ if (queue->copy_mode == NFQNL_COPY_NONE)
+ goto err_out;
+
+ nskb = nfqnl_build_packet_message(queue, entry);
+ if (nskb == NULL)
+ goto err_out;
+
+ spin_lock_bh(&queue->lock);
+
+ if (!queue->peer_pid)
+ goto err_out_free_nskb;
+
+ if (queue->queue_total >= queue->queue_maxlen) {
+ queue->queue_dropped++;
+ if (net_ratelimit())
+ printk(KERN_WARNING "nf_queue: full at %d entries, "
+ "dropping packets(s). Dropped: %d\n",
+ queue->queue_total, queue->queue_dropped);
+ goto err_out_free_nskb;
+ }
+
+ /* nfnetlink_unicast will either free the nskb or add it to a socket */
+ err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+ if (err < 0) {
+ queue->queue_user_dropped++;
+ goto err_out_unlock;
+ }
+
+ __enqueue_entry(queue, entry);
+
+ spin_unlock_bh(&queue->lock);
+ return 0;
+
+err_out_free_nskb:
+ kfree_skb(nskb);
+err_out_unlock:
+ spin_unlock_bh(&queue->lock);
+err_out:
+ return -1;
+}
+
+static int
+nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
+{
+ struct sk_buff *nskb;
+ int diff;
+
+ diff = data_len - e->skb->len;
+ if (diff < 0) {
+ if (pskb_trim(e->skb, data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+ if (data_len > 0xFFFF)
+ return -EINVAL;
+ if (diff > skb_tailroom(e->skb)) {
+ nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+ diff, GFP_ATOMIC);
+ if (!nskb) {
+ printk(KERN_WARNING "nf_queue: OOM "
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
+ }
+ kfree_skb(e->skb);
+ e->skb = nskb;
+ }
+ skb_put(e->skb, diff);
+ }
+ if (!skb_make_writable(e->skb, data_len))
+ return -ENOMEM;
+ skb_copy_to_linear_data(e->skb, data, data_len);
+ e->skb->ip_summed = CHECKSUM_NONE;
+ return 0;
+}
+
+static int
+nfqnl_set_mode(struct nfqnl_instance *queue,
+ unsigned char mode, unsigned int range)
+{
+ int status = 0;
+
+ spin_lock_bh(&queue->lock);
+ switch (mode) {
+ case NFQNL_COPY_NONE:
+ case NFQNL_COPY_META:
+ queue->copy_mode = mode;
+ queue->copy_range = 0;
+ break;
+
+ case NFQNL_COPY_PACKET:
+ queue->copy_mode = mode;
+ /* we're using struct nlattr which has 16bit nla_len */
+ if (range > 0xffff)
+ queue->copy_range = 0xffff;
+ else
+ queue->copy_range = range;
+ break;
+
+ default:
+ status = -EINVAL;
+
+ }
+ spin_unlock_bh(&queue->lock);
+
+ return status;
+}
+
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+ if (entry->indev)
+ if (entry->indev->ifindex == ifindex)
+ return 1;
+ if (entry->outdev)
+ if (entry->outdev->ifindex == ifindex)
+ return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ if (entry->skb->nf_bridge->physindev &&
+ entry->skb->nf_bridge->physindev->ifindex == ifindex)
+ return 1;
+ if (entry->skb->nf_bridge->physoutdev &&
+ entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/* drop all packets with either indev or outdev == ifindex from all queue
+ * instances */
+static void
+nfqnl_dev_drop(int ifindex)
+{
+ int i;
+
+ rcu_read_lock();
+
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct hlist_node *tmp;
+ struct nfqnl_instance *inst;
+ struct hlist_head *head = &instance_table[i];
+
+ hlist_for_each_entry_rcu(inst, tmp, head, hlist)
+ nfqnl_flush(inst, dev_cmp, ifindex);
+ }
+
+ rcu_read_unlock();
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static int
+nfqnl_rcv_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ nfqnl_dev_drop(dev->ifindex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_dev_notifier = {
+ .notifier_call = nfqnl_rcv_dev_event,
+};
+
+static int
+nfqnl_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_NETFILTER && n->pid) {
+ int i;
+
+ /* destroy all instances for this pid */
+ spin_lock(&instances_lock);
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct hlist_node *tmp, *t2;
+ struct nfqnl_instance *inst;
+ struct hlist_head *head = &instance_table[i];
+
+ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+ if ((n->net == &init_net) &&
+ (n->pid == inst->peer_pid))
+ __instance_destroy(inst);
+ }
+ }
+ spin_unlock(&instances_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfqnl_rtnl_notifier = {
+ .notifier_call = nfqnl_rcv_nl_event,
+};
+
+static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
+ [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+ [NFQA_MARK] = { .type = NLA_U32 },
+ [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
+};
+
+static int
+nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *nfqa[])
+{
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int16_t queue_num = ntohs(nfmsg->res_id);
+
+ struct nfqnl_msg_verdict_hdr *vhdr;
+ struct nfqnl_instance *queue;
+ unsigned int verdict;
+ struct nf_queue_entry *entry;
+ int err;
+
+ rcu_read_lock();
+ queue = instance_lookup(queue_num);
+ if (!queue) {
+ err = -ENODEV;
+ goto err_out_unlock;
+ }
+
+ if (queue->peer_pid != NETLINK_CB(skb).pid) {
+ err = -EPERM;
+ goto err_out_unlock;
+ }
+
+ if (!nfqa[NFQA_VERDICT_HDR]) {
+ err = -EINVAL;
+ goto err_out_unlock;
+ }
+
+ vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
+ verdict = ntohl(vhdr->verdict);
+
+ if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
+ err = -EINVAL;
+ goto err_out_unlock;
+ }
+
+ entry = find_dequeue_entry(queue, ntohl(vhdr->id));
+ if (entry == NULL) {
+ err = -ENOENT;
+ goto err_out_unlock;
+ }
+ rcu_read_unlock();
+
+ if (nfqa[NFQA_PAYLOAD]) {
+ if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
+ nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0)
+ verdict = NF_DROP;
+ }
+
+ if (nfqa[NFQA_MARK])
+ entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+
+ nf_reinject(entry, verdict);
+ return 0;
+
+err_out_unlock:
+ rcu_read_unlock();
+ return err;
+}
+
+static int
+nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *nfqa[])
+{
+ return -ENOTSUPP;
+}
+
+static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
+ [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
+ [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
+};
+
+static const struct nf_queue_handler nfqh = {
+ .name = "nf_queue",
+ .outfn = &nfqnl_enqueue_packet,
+};
+
+static int
+nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *nfqa[])
+{
+ struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ u_int16_t queue_num = ntohs(nfmsg->res_id);
+ struct nfqnl_instance *queue;
+ struct nfqnl_msg_config_cmd *cmd = NULL;
+ int ret = 0;
+
+ if (nfqa[NFQA_CFG_CMD]) {
+ cmd = nla_data(nfqa[NFQA_CFG_CMD]);
+
+ /* Commands without queue context - might sleep */
+ switch (cmd->command) {
+ case NFQNL_CFG_CMD_PF_BIND:
+ return nf_register_queue_handler(ntohs(cmd->pf),
+ &nfqh);
+ case NFQNL_CFG_CMD_PF_UNBIND:
+ return nf_unregister_queue_handler(ntohs(cmd->pf),
+ &nfqh);
+ }
+ }
+
+ rcu_read_lock();
+ queue = instance_lookup(queue_num);
+ if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
+ ret = -EPERM;
+ goto err_out_unlock;
+ }
+
+ if (cmd != NULL) {
+ switch (cmd->command) {
+ case NFQNL_CFG_CMD_BIND:
+ if (queue) {
+ ret = -EBUSY;
+ goto err_out_unlock;
+ }
+ queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+ if (IS_ERR(queue)) {
+ ret = PTR_ERR(queue);
+ goto err_out_unlock;
+ }
+ break;
+ case NFQNL_CFG_CMD_UNBIND:
+ if (!queue) {
+ ret = -ENODEV;
+ goto err_out_unlock;
+ }
+ instance_destroy(queue);
+ break;
+ case NFQNL_CFG_CMD_PF_BIND:
+ case NFQNL_CFG_CMD_PF_UNBIND:
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ }
+
+ if (nfqa[NFQA_CFG_PARAMS]) {
+ struct nfqnl_msg_config_params *params;
+
+ if (!queue) {
+ ret = -ENODEV;
+ goto err_out_unlock;
+ }
+ params = nla_data(nfqa[NFQA_CFG_PARAMS]);
+ nfqnl_set_mode(queue, params->copy_mode,
+ ntohl(params->copy_range));
+ }
+
+ if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
+ __be32 *queue_maxlen;
+
+ if (!queue) {
+ ret = -ENODEV;
+ goto err_out_unlock;
+ }
+ queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
+ spin_lock_bh(&queue->lock);
+ queue->queue_maxlen = ntohl(*queue_maxlen);
+ spin_unlock_bh(&queue->lock);
+ }
+
+err_out_unlock:
+ rcu_read_unlock();
+ return ret;
+}
+
+static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
+ [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp,
+ .attr_count = NFQA_MAX, },
+ [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict,
+ .attr_count = NFQA_MAX,
+ .policy = nfqa_verdict_policy },
+ [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config,
+ .attr_count = NFQA_CFG_MAX,
+ .policy = nfqa_cfg_policy },
+};
+
+static const struct nfnetlink_subsystem nfqnl_subsys = {
+ .name = "nf_queue",
+ .subsys_id = NFNL_SUBSYS_QUEUE,
+ .cb_count = NFQNL_MSG_MAX,
+ .cb = nfqnl_cb,
+};
+
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+ unsigned int bucket;
+};
+
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+ struct iter_state *st = seq->private;
+
+ if (!st)
+ return NULL;
+
+ for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+ if (!hlist_empty(&instance_table[st->bucket]))
+ return instance_table[st->bucket].first;
+ }
+ return NULL;
+}
+
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+ struct iter_state *st = seq->private;
+
+ h = h->next;
+ while (!h) {
+ if (++st->bucket >= INSTANCE_BUCKETS)
+ return NULL;
+
+ h = instance_table[st->bucket].first;
+ }
+ return h;
+}
+
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head;
+ head = get_first(seq);
+
+ if (head)
+ while (pos && (head = get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(instances_lock)
+{
+ spin_lock(&instances_lock);
+ return get_idx(seq, *pos);
+}
+
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return get_next(s, v);
+}
+
+static void seq_stop(struct seq_file *s, void *v)
+ __releases(instances_lock)
+{
+ spin_unlock(&instances_lock);
+}
+
+static int seq_show(struct seq_file *s, void *v)
+{
+ const struct nfqnl_instance *inst = v;
+
+ return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ inst->queue_num,
+ inst->peer_pid, inst->queue_total,
+ inst->copy_mode, inst->copy_range,
+ inst->queue_dropped, inst->queue_user_dropped,
+ inst->id_sequence, 1);
+}
+
+static const struct seq_operations nfqnl_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = seq_show,
+};
+
+static int nfqnl_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &nfqnl_seq_ops,
+ sizeof(struct iter_state));
+}
+
+static const struct file_operations nfqnl_file_ops = {
+ .owner = THIS_MODULE,
+ .open = nfqnl_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+#endif /* PROC_FS */
+
+static int __init nfnetlink_queue_init(void)
+{
+ int i, status = -ENOMEM;
+
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ INIT_HLIST_HEAD(&instance_table[i]);
+
+ netlink_register_notifier(&nfqnl_rtnl_notifier);
+ status = nfnetlink_subsys_register(&nfqnl_subsys);
+ if (status < 0) {
+ printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_queue", 0440,
+ proc_net_netfilter, &nfqnl_file_ops))
+ goto cleanup_subsys;
+#endif
+
+ register_netdevice_notifier(&nfqnl_dev_notifier);
+ return status;
+
+#ifdef CONFIG_PROC_FS
+cleanup_subsys:
+ nfnetlink_subsys_unregister(&nfqnl_subsys);
+#endif
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ return status;
+}
+
+static void __exit nfnetlink_queue_fini(void)
+{
+ nf_unregister_queue_handlers(&nfqh);
+ unregister_netdevice_notifier(&nfqnl_dev_notifier);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
+#endif
+ nfnetlink_subsys_unregister(&nfqnl_subsys);
+ netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+}
+
+MODULE_DESCRIPTION("netfilter packet queue handler");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
+
+module_init(nfnetlink_queue_init);
+module_exit(nfnetlink_queue_fini);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
new file mode 100644
index 0000000..bfbf521
--- /dev/null
+++ b/net/netfilter/x_tables.c
@@ -0,0 +1,1078 @@
+/*
+ * x_tables core - Backend for {ip,ip6,arp}_tables
+ *
+ * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
+ *
+ * Based on existing ip_tables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp.h>
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
+
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+struct compat_delta {
+ struct compat_delta *next;
+ unsigned int offset;
+ short delta;
+};
+
+struct xt_af {
+ struct mutex mutex;
+ struct list_head match;
+ struct list_head target;
+#ifdef CONFIG_COMPAT
+ struct mutex compat_mutex;
+ struct compat_delta *compat_offsets;
+#endif
+};
+
+static struct xt_af *xt;
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
+ [NFPROTO_UNSPEC] = "x",
+ [NFPROTO_IPV4] = "ip",
+ [NFPROTO_ARP] = "arp",
+ [NFPROTO_BRIDGE] = "eb",
+ [NFPROTO_IPV6] = "ip6",
+};
+
+/* Registration hooks for targets. */
+int
+xt_register_target(struct xt_target *target)
+{
+ u_int8_t af = target->family;
+ int ret;
+
+ ret = mutex_lock_interruptible(&xt[af].mutex);
+ if (ret != 0)
+ return ret;
+ list_add(&target->list, &xt[af].target);
+ mutex_unlock(&xt[af].mutex);
+ return ret;
+}
+EXPORT_SYMBOL(xt_register_target);
+
+void
+xt_unregister_target(struct xt_target *target)
+{
+ u_int8_t af = target->family;
+
+ mutex_lock(&xt[af].mutex);
+ list_del(&target->list);
+ mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_target);
+
+int
+xt_register_targets(struct xt_target *target, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = xt_register_target(&target[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ xt_unregister_targets(target, i);
+ return err;
+}
+EXPORT_SYMBOL(xt_register_targets);
+
+void
+xt_unregister_targets(struct xt_target *target, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ xt_unregister_target(&target[i]);
+}
+EXPORT_SYMBOL(xt_unregister_targets);
+
+int
+xt_register_match(struct xt_match *match)
+{
+ u_int8_t af = match->family;
+ int ret;
+
+ ret = mutex_lock_interruptible(&xt[af].mutex);
+ if (ret != 0)
+ return ret;
+
+ list_add(&match->list, &xt[af].match);
+ mutex_unlock(&xt[af].mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(xt_register_match);
+
+void
+xt_unregister_match(struct xt_match *match)
+{
+ u_int8_t af = match->family;
+
+ mutex_lock(&xt[af].mutex);
+ list_del(&match->list);
+ mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_match);
+
+int
+xt_register_matches(struct xt_match *match, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = xt_register_match(&match[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ xt_unregister_matches(match, i);
+ return err;
+}
+EXPORT_SYMBOL(xt_register_matches);
+
+void
+xt_unregister_matches(struct xt_match *match, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ xt_unregister_match(&match[i]);
+}
+EXPORT_SYMBOL(xt_unregister_matches);
+
+
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find match, grabs ref. Returns ERR_PTR() on error. */
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
+{
+ struct xt_match *m;
+ int err = 0;
+
+ if (mutex_lock_interruptible(&xt[af].mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(m, &xt[af].match, list) {
+ if (strcmp(m->name, name) == 0) {
+ if (m->revision == revision) {
+ if (try_module_get(m->me)) {
+ mutex_unlock(&xt[af].mutex);
+ return m;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
+ }
+ mutex_unlock(&xt[af].mutex);
+
+ if (af != NFPROTO_UNSPEC)
+ /* Try searching again in the family-independent list */
+ return xt_find_match(NFPROTO_UNSPEC, name, revision);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(xt_find_match);
+
+/* Find target, grabs ref. Returns ERR_PTR() on error. */
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
+{
+ struct xt_target *t;
+ int err = 0;
+
+ if (mutex_lock_interruptible(&xt[af].mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &xt[af].target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision == revision) {
+ if (try_module_get(t->me)) {
+ mutex_unlock(&xt[af].mutex);
+ return t;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
+ }
+ mutex_unlock(&xt[af].mutex);
+
+ if (af != NFPROTO_UNSPEC)
+ /* Try searching again in the family-independent list */
+ return xt_find_target(NFPROTO_UNSPEC, name, revision);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(xt_find_target);
+
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
+{
+ struct xt_target *target;
+
+ target = try_then_request_module(xt_find_target(af, name, revision),
+ "%st_%s", xt_prefix[af], name);
+ if (IS_ERR(target) || !target)
+ return NULL;
+ return target;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_target);
+
+static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
+{
+ const struct xt_match *m;
+ int have_rev = 0;
+
+ list_for_each_entry(m, &xt[af].match, list) {
+ if (strcmp(m->name, name) == 0) {
+ if (m->revision > *bestp)
+ *bestp = m->revision;
+ if (m->revision == revision)
+ have_rev = 1;
+ }
+ }
+
+ if (af != NFPROTO_UNSPEC && !have_rev)
+ return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
+
+ return have_rev;
+}
+
+static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
+{
+ const struct xt_target *t;
+ int have_rev = 0;
+
+ list_for_each_entry(t, &xt[af].target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision > *bestp)
+ *bestp = t->revision;
+ if (t->revision == revision)
+ have_rev = 1;
+ }
+ }
+
+ if (af != NFPROTO_UNSPEC && !have_rev)
+ return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
+
+ return have_rev;
+}
+
+/* Returns true or false (if no such extension at all) */
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
+ int *err)
+{
+ int have_rev, best = -1;
+
+ if (mutex_lock_interruptible(&xt[af].mutex) != 0) {
+ *err = -EINTR;
+ return 1;
+ }
+ if (target == 1)
+ have_rev = target_revfn(af, name, revision, &best);
+ else
+ have_rev = match_revfn(af, name, revision, &best);
+ mutex_unlock(&xt[af].mutex);
+
+ /* Nothing at all? Return 0 to try loading module. */
+ if (best == -1) {
+ *err = -ENOENT;
+ return 0;
+ }
+
+ *err = best;
+ if (!have_rev)
+ *err = -EPROTONOSUPPORT;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(xt_find_revision);
+
+int xt_check_match(struct xt_mtchk_param *par,
+ unsigned int size, u_int8_t proto, bool inv_proto)
+{
+ if (XT_ALIGN(par->match->matchsize) != size &&
+ par->match->matchsize != -1) {
+ /*
+ * ebt_among is exempt from centralized matchsize checking
+ * because it uses a dynamic-size data set.
+ */
+ printk("%s_tables: %s match: invalid size %Zu != %u\n",
+ xt_prefix[par->family], par->match->name,
+ XT_ALIGN(par->match->matchsize), size);
+ return -EINVAL;
+ }
+ if (par->match->table != NULL &&
+ strcmp(par->match->table, par->table) != 0) {
+ printk("%s_tables: %s match: only valid in %s table, not %s\n",
+ xt_prefix[par->family], par->match->name,
+ par->match->table, par->table);
+ return -EINVAL;
+ }
+ if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
+ printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
+ xt_prefix[par->family], par->match->name,
+ par->hook_mask, par->match->hooks);
+ return -EINVAL;
+ }
+ if (par->match->proto && (par->match->proto != proto || inv_proto)) {
+ printk("%s_tables: %s match: only valid for protocol %u\n",
+ xt_prefix[par->family], par->match->name,
+ par->match->proto);
+ return -EINVAL;
+ }
+ if (par->match->checkentry != NULL && !par->match->checkentry(par))
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_check_match);
+
+#ifdef CONFIG_COMPAT
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+{
+ struct compat_delta *tmp;
+
+ tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ tmp->offset = offset;
+ tmp->delta = delta;
+
+ if (xt[af].compat_offsets) {
+ tmp->next = xt[af].compat_offsets->next;
+ xt[af].compat_offsets->next = tmp;
+ } else {
+ xt[af].compat_offsets = tmp;
+ tmp->next = NULL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_add_offset);
+
+void xt_compat_flush_offsets(u_int8_t af)
+{
+ struct compat_delta *tmp, *next;
+
+ if (xt[af].compat_offsets) {
+ for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
+ next = tmp->next;
+ kfree(tmp);
+ }
+ xt[af].compat_offsets = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
+
+short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+{
+ struct compat_delta *tmp;
+ short delta;
+
+ for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
+ if (tmp->offset < offset)
+ delta += tmp->delta;
+ return delta;
+}
+EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
+
+int xt_compat_match_offset(const struct xt_match *match)
+{
+ u_int16_t csize = match->compatsize ? : match->matchsize;
+ return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_offset);
+
+int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ unsigned int *size)
+{
+ const struct xt_match *match = m->u.kernel.match;
+ struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+ int pad, off = xt_compat_match_offset(match);
+ u_int16_t msize = cm->u.user.match_size;
+
+ m = *dstptr;
+ memcpy(m, cm, sizeof(*cm));
+ if (match->compat_from_user)
+ match->compat_from_user(m->data, cm->data);
+ else
+ memcpy(m->data, cm->data, msize - sizeof(*cm));
+ pad = XT_ALIGN(match->matchsize) - match->matchsize;
+ if (pad > 0)
+ memset(m->data + match->matchsize, 0, pad);
+
+ msize += off;
+ m->u.user.match_size = msize;
+
+ *size += off;
+ *dstptr += msize;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+
+int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
+ unsigned int *size)
+{
+ const struct xt_match *match = m->u.kernel.match;
+ struct compat_xt_entry_match __user *cm = *dstptr;
+ int off = xt_compat_match_offset(match);
+ u_int16_t msize = m->u.user.match_size - off;
+
+ if (copy_to_user(cm, m, sizeof(*cm)) ||
+ put_user(msize, &cm->u.user.match_size) ||
+ copy_to_user(cm->u.user.name, m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name) + 1))
+ return -EFAULT;
+
+ if (match->compat_to_user) {
+ if (match->compat_to_user((void __user *)cm->data, m->data))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
+ return -EFAULT;
+ }
+
+ *size -= off;
+ *dstptr += msize;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+#endif /* CONFIG_COMPAT */
+
+int xt_check_target(struct xt_tgchk_param *par,
+ unsigned int size, u_int8_t proto, bool inv_proto)
+{
+ if (XT_ALIGN(par->target->targetsize) != size) {
+ printk("%s_tables: %s target: invalid size %Zu != %u\n",
+ xt_prefix[par->family], par->target->name,
+ XT_ALIGN(par->target->targetsize), size);
+ return -EINVAL;
+ }
+ if (par->target->table != NULL &&
+ strcmp(par->target->table, par->table) != 0) {
+ printk("%s_tables: %s target: only valid in %s table, not %s\n",
+ xt_prefix[par->family], par->target->name,
+ par->target->table, par->table);
+ return -EINVAL;
+ }
+ if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
+ printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
+ xt_prefix[par->family], par->target->name,
+ par->hook_mask, par->target->hooks);
+ return -EINVAL;
+ }
+ if (par->target->proto && (par->target->proto != proto || inv_proto)) {
+ printk("%s_tables: %s target: only valid for protocol %u\n",
+ xt_prefix[par->family], par->target->name,
+ par->target->proto);
+ return -EINVAL;
+ }
+ if (par->target->checkentry != NULL && !par->target->checkentry(par))
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_check_target);
+
+#ifdef CONFIG_COMPAT
+int xt_compat_target_offset(const struct xt_target *target)
+{
+ u_int16_t csize = target->compatsize ? : target->targetsize;
+ return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_offset);
+
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+ unsigned int *size)
+{
+ const struct xt_target *target = t->u.kernel.target;
+ struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+ int pad, off = xt_compat_target_offset(target);
+ u_int16_t tsize = ct->u.user.target_size;
+
+ t = *dstptr;
+ memcpy(t, ct, sizeof(*ct));
+ if (target->compat_from_user)
+ target->compat_from_user(t->data, ct->data);
+ else
+ memcpy(t->data, ct->data, tsize - sizeof(*ct));
+ pad = XT_ALIGN(target->targetsize) - target->targetsize;
+ if (pad > 0)
+ memset(t->data + target->targetsize, 0, pad);
+
+ tsize += off;
+ t->u.user.target_size = tsize;
+
+ *size += off;
+ *dstptr += tsize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
+
+int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
+ unsigned int *size)
+{
+ const struct xt_target *target = t->u.kernel.target;
+ struct compat_xt_entry_target __user *ct = *dstptr;
+ int off = xt_compat_target_offset(target);
+ u_int16_t tsize = t->u.user.target_size - off;
+
+ if (copy_to_user(ct, t, sizeof(*ct)) ||
+ put_user(tsize, &ct->u.user.target_size) ||
+ copy_to_user(ct->u.user.name, t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name) + 1))
+ return -EFAULT;
+
+ if (target->compat_to_user) {
+ if (target->compat_to_user((void __user *)ct->data, t->data))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
+ return -EFAULT;
+ }
+
+ *size -= off;
+ *dstptr += tsize;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
+#endif
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size)
+{
+ struct xt_table_info *newinfo;
+ int cpu;
+
+ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return NULL;
+
+ newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
+ if (!newinfo)
+ return NULL;
+
+ newinfo->size = size;
+
+ for_each_possible_cpu(cpu) {
+ if (size <= PAGE_SIZE)
+ newinfo->entries[cpu] = kmalloc_node(size,
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ else
+ newinfo->entries[cpu] = vmalloc_node(size,
+ cpu_to_node(cpu));
+
+ if (newinfo->entries[cpu] == NULL) {
+ xt_free_table_info(newinfo);
+ return NULL;
+ }
+ }
+
+ return newinfo;
+}
+EXPORT_SYMBOL(xt_alloc_table_info);
+
+void xt_free_table_info(struct xt_table_info *info)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (info->size <= PAGE_SIZE)
+ kfree(info->entries[cpu]);
+ else
+ vfree(info->entries[cpu]);
+ }
+ kfree(info);
+}
+EXPORT_SYMBOL(xt_free_table_info);
+
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+ const char *name)
+{
+ struct xt_table *t;
+
+ if (mutex_lock_interruptible(&xt[af].mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &net->xt.tables[af], list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ mutex_unlock(&xt[af].mutex);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xt_find_table_lock);
+
+void xt_table_unlock(struct xt_table *table)
+{
+ mutex_unlock(&xt[table->af].mutex);
+}
+EXPORT_SYMBOL_GPL(xt_table_unlock);
+
+#ifdef CONFIG_COMPAT
+void xt_compat_lock(u_int8_t af)
+{
+ mutex_lock(&xt[af].compat_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_compat_lock);
+
+void xt_compat_unlock(u_int8_t af)
+{
+ mutex_unlock(&xt[af].compat_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_compat_unlock);
+#endif
+
+struct xt_table_info *
+xt_replace_table(struct xt_table *table,
+ unsigned int num_counters,
+ struct xt_table_info *newinfo,
+ int *error)
+{
+ struct xt_table_info *oldinfo, *private;
+
+ /* Do the substitution. */
+ write_lock_bh(&table->lock);
+ private = table->private;
+ /* Check inside lock: is the old number correct? */
+ if (num_counters != private->number) {
+ duprintf("num_counters != table->private->number (%u/%u)\n",
+ num_counters, private->number);
+ write_unlock_bh(&table->lock);
+ *error = -EAGAIN;
+ return NULL;
+ }
+ oldinfo = private;
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+
+ return oldinfo;
+}
+EXPORT_SYMBOL_GPL(xt_replace_table);
+
+struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
+ struct xt_table_info *bootstrap,
+ struct xt_table_info *newinfo)
+{
+ int ret;
+ struct xt_table_info *private;
+ struct xt_table *t;
+
+ /* Don't add one object to multiple lists. */
+ table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL);
+ if (!table) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mutex_lock_interruptible(&xt[table->af].mutex);
+ if (ret != 0)
+ goto out_free;
+
+ /* Don't autoload: we'd eat our tail... */
+ list_for_each_entry(t, &net->xt.tables[table->af], list) {
+ if (strcmp(t->name, table->name) == 0) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ /* Simplifies replace_table code. */
+ table->private = bootstrap;
+ rwlock_init(&table->lock);
+ if (!xt_replace_table(table, 0, newinfo, &ret))
+ goto unlock;
+
+ private = table->private;
+ duprintf("table->private->number = %u\n", private->number);
+
+ /* save number of initial entries */
+ private->initial_entries = private->number;
+
+ list_add(&table->list, &net->xt.tables[table->af]);
+ mutex_unlock(&xt[table->af].mutex);
+ return table;
+
+ unlock:
+ mutex_unlock(&xt[table->af].mutex);
+out_free:
+ kfree(table);
+out:
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(xt_register_table);
+
+void *xt_unregister_table(struct xt_table *table)
+{
+ struct xt_table_info *private;
+
+ mutex_lock(&xt[table->af].mutex);
+ private = table->private;
+ list_del(&table->list);
+ mutex_unlock(&xt[table->af].mutex);
+ kfree(table);
+
+ return private;
+}
+EXPORT_SYMBOL_GPL(xt_unregister_table);
+
+#ifdef CONFIG_PROC_FS
+struct xt_names_priv {
+ struct seq_net_private p;
+ u_int8_t af;
+};
+static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct xt_names_priv *priv = seq->private;
+ struct net *net = seq_file_net(seq);
+ u_int8_t af = priv->af;
+
+ mutex_lock(&xt[af].mutex);
+ return seq_list_start(&net->xt.tables[af], *pos);
+}
+
+static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct xt_names_priv *priv = seq->private;
+ struct net *net = seq_file_net(seq);
+ u_int8_t af = priv->af;
+
+ return seq_list_next(v, &net->xt.tables[af], pos);
+}
+
+static void xt_table_seq_stop(struct seq_file *seq, void *v)
+{
+ struct xt_names_priv *priv = seq->private;
+ u_int8_t af = priv->af;
+
+ mutex_unlock(&xt[af].mutex);
+}
+
+static int xt_table_seq_show(struct seq_file *seq, void *v)
+{
+ struct xt_table *table = list_entry(v, struct xt_table, list);
+
+ if (strlen(table->name))
+ return seq_printf(seq, "%s\n", table->name);
+ else
+ return 0;
+}
+
+static const struct seq_operations xt_table_seq_ops = {
+ .start = xt_table_seq_start,
+ .next = xt_table_seq_next,
+ .stop = xt_table_seq_stop,
+ .show = xt_table_seq_show,
+};
+
+static int xt_table_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct xt_names_priv *priv;
+
+ ret = seq_open_net(inode, file, &xt_table_seq_ops,
+ sizeof(struct xt_names_priv));
+ if (!ret) {
+ priv = ((struct seq_file *)file->private_data)->private;
+ priv->af = (unsigned long)PDE(inode)->data;
+ }
+ return ret;
+}
+
+static const struct file_operations xt_table_ops = {
+ .owner = THIS_MODULE,
+ .open = xt_table_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ mutex_lock(&xt[af].mutex);
+ return seq_list_start(&xt[af].match, *pos);
+}
+
+static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ return seq_list_next(v, &xt[af].match, pos);
+}
+
+static void xt_match_seq_stop(struct seq_file *seq, void *v)
+{
+ struct proc_dir_entry *pde = seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ mutex_unlock(&xt[af].mutex);
+}
+
+static int xt_match_seq_show(struct seq_file *seq, void *v)
+{
+ struct xt_match *match = list_entry(v, struct xt_match, list);
+
+ if (strlen(match->name))
+ return seq_printf(seq, "%s\n", match->name);
+ else
+ return 0;
+}
+
+static const struct seq_operations xt_match_seq_ops = {
+ .start = xt_match_seq_start,
+ .next = xt_match_seq_next,
+ .stop = xt_match_seq_stop,
+ .show = xt_match_seq_show,
+};
+
+static int xt_match_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &xt_match_seq_ops);
+ if (!ret) {
+ struct seq_file *seq = file->private_data;
+
+ seq->private = PDE(inode);
+ }
+ return ret;
+}
+
+static const struct file_operations xt_match_ops = {
+ .owner = THIS_MODULE,
+ .open = xt_match_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ mutex_lock(&xt[af].mutex);
+ return seq_list_start(&xt[af].target, *pos);
+}
+
+static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ return seq_list_next(v, &xt[af].target, pos);
+}
+
+static void xt_target_seq_stop(struct seq_file *seq, void *v)
+{
+ struct proc_dir_entry *pde = seq->private;
+ u_int16_t af = (unsigned long)pde->data;
+
+ mutex_unlock(&xt[af].mutex);
+}
+
+static int xt_target_seq_show(struct seq_file *seq, void *v)
+{
+ struct xt_target *target = list_entry(v, struct xt_target, list);
+
+ if (strlen(target->name))
+ return seq_printf(seq, "%s\n", target->name);
+ else
+ return 0;
+}
+
+static const struct seq_operations xt_target_seq_ops = {
+ .start = xt_target_seq_start,
+ .next = xt_target_seq_next,
+ .stop = xt_target_seq_stop,
+ .show = xt_target_seq_show,
+};
+
+static int xt_target_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &xt_target_seq_ops);
+ if (!ret) {
+ struct seq_file *seq = file->private_data;
+
+ seq->private = PDE(inode);
+ }
+ return ret;
+}
+
+static const struct file_operations xt_target_ops = {
+ .owner = THIS_MODULE,
+ .open = xt_target_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#define FORMAT_TABLES "_tables_names"
+#define FORMAT_MATCHES "_tables_matches"
+#define FORMAT_TARGETS "_tables_targets"
+
+#endif /* CONFIG_PROC_FS */
+
+int xt_proto_init(struct net *net, u_int8_t af)
+{
+#ifdef CONFIG_PROC_FS
+ char buf[XT_FUNCTION_MAXNAMELEN];
+ struct proc_dir_entry *proc;
+#endif
+
+ if (af >= ARRAY_SIZE(xt_prefix))
+ return -EINVAL;
+
+
+#ifdef CONFIG_PROC_FS
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+ proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
+ (void *)(unsigned long)af);
+ if (!proc)
+ goto out;
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+ proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops,
+ (void *)(unsigned long)af);
+ if (!proc)
+ goto out_remove_tables;
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+ proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops,
+ (void *)(unsigned long)af);
+ if (!proc)
+ goto out_remove_matches;
+#endif
+
+ return 0;
+
+#ifdef CONFIG_PROC_FS
+out_remove_matches:
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+ proc_net_remove(net, buf);
+
+out_remove_tables:
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+ proc_net_remove(net, buf);
+out:
+ return -1;
+#endif
+}
+EXPORT_SYMBOL_GPL(xt_proto_init);
+
+void xt_proto_fini(struct net *net, u_int8_t af)
+{
+#ifdef CONFIG_PROC_FS
+ char buf[XT_FUNCTION_MAXNAMELEN];
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+ proc_net_remove(net, buf);
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+ proc_net_remove(net, buf);
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+ proc_net_remove(net, buf);
+#endif /*CONFIG_PROC_FS*/
+}
+EXPORT_SYMBOL_GPL(xt_proto_fini);
+
+static int __net_init xt_net_init(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ INIT_LIST_HEAD(&net->xt.tables[i]);
+ return 0;
+}
+
+static struct pernet_operations xt_net_ops = {
+ .init = xt_net_init,
+};
+
+static int __init xt_init(void)
+{
+ int i, rv;
+
+ xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
+ if (!xt)
+ return -ENOMEM;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ mutex_init(&xt[i].mutex);
+#ifdef CONFIG_COMPAT
+ mutex_init(&xt[i].compat_mutex);
+ xt[i].compat_offsets = NULL;
+#endif
+ INIT_LIST_HEAD(&xt[i].target);
+ INIT_LIST_HEAD(&xt[i].match);
+ }
+ rv = register_pernet_subsys(&xt_net_ops);
+ if (rv < 0)
+ kfree(xt);
+ return rv;
+}
+
+static void __exit xt_fini(void)
+{
+ unregister_pernet_subsys(&xt_net_ops);
+ kfree(xt);
+}
+
+module_init(xt_init);
+module_exit(xt_fini);
+
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
new file mode 100644
index 0000000..011bc80
--- /dev/null
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -0,0 +1,61 @@
+/*
+ * This is a module which is used for setting the skb->priority field
+ * of an skb for qdisc classification.
+ */
+
+/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CLASSIFY.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Qdisc classification");
+MODULE_ALIAS("ipt_CLASSIFY");
+MODULE_ALIAS("ip6t_CLASSIFY");
+
+static unsigned int
+classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_classify_target_info *clinfo = par->targinfo;
+
+ skb->priority = clinfo->priority;
+ return XT_CONTINUE;
+}
+
+static struct xt_target classify_tg_reg __read_mostly = {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .table = "mangle",
+ .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_POST_ROUTING),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+};
+
+static int __init classify_tg_init(void)
+{
+ return xt_register_target(&classify_tg_reg);
+}
+
+static void __exit classify_tg_exit(void)
+{
+ xt_unregister_target(&classify_tg_reg);
+}
+
+module_init(classify_tg_init);
+module_exit(classify_tg_exit);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
new file mode 100644
index 0000000..d6e5ab4
--- /dev/null
+++ b/net/netfilter/xt_CONNMARK.c
@@ -0,0 +1,225 @@
+/*
+ * xt_CONNMARK - Netfilter module to modify the connection mark values
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ * Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
+MODULE_DESCRIPTION("Xtables: connection mark modification");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
+MODULE_ALIAS("ip6t_CONNMARK");
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CONNMARK.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+static unsigned int
+connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_connmark_target_info *markinfo = par->targinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t diff;
+ u_int32_t mark;
+ u_int32_t newmark;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ switch(markinfo->mode) {
+ case XT_CONNMARK_SET:
+ newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != ct->mark) {
+ ct->mark = newmark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+ case XT_CONNMARK_SAVE:
+ newmark = (ct->mark & ~markinfo->mask) |
+ (skb->mark & markinfo->mask);
+ if (ct->mark != newmark) {
+ ct->mark = newmark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+ case XT_CONNMARK_RESTORE:
+ mark = skb->mark;
+ diff = (ct->mark ^ mark) & markinfo->mask;
+ skb->mark = mark ^ diff;
+ break;
+ }
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_connmark_tginfo1 *info = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u_int32_t newmark;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return XT_CONTINUE;
+
+ switch (info->mode) {
+ case XT_CONNMARK_SET:
+ newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+ if (ct->mark != newmark) {
+ ct->mark = newmark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+ case XT_CONNMARK_SAVE:
+ newmark = (ct->mark & ~info->ctmask) ^
+ (skb->mark & info->nfmask);
+ if (ct->mark != newmark) {
+ ct->mark = newmark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+ case XT_CONNMARK_RESTORE:
+ newmark = (skb->mark & ~info->nfmask) ^
+ (ct->mark & info->ctmask);
+ skb->mark = newmark;
+ break;
+ }
+
+ return XT_CONTINUE;
+}
+
+static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ const struct xt_connmark_target_info *matchinfo = par->targinfo;
+
+ if (matchinfo->mode == XT_CONNMARK_RESTORE) {
+ if (strcmp(par->table, "mangle") != 0) {
+ printk(KERN_WARNING "CONNMARK: restore can only be "
+ "called from \"mangle\" table, not \"%s\"\n",
+ par->table);
+ return false;
+ }
+ }
+ if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
+ printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
+ return false;
+ }
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
+{
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "cannot load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_target_info {
+ compat_ulong_t mark, mask;
+ u_int8_t mode;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
+};
+
+static void connmark_tg_compat_from_user_v0(void *dst, void *src)
+{
+ const struct compat_xt_connmark_target_info *cm = src;
+ struct xt_connmark_target_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .mode = cm->mode,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
+{
+ const struct xt_connmark_target_info *m = src;
+ struct compat_xt_connmark_target_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .mode = m->mode,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target connmark_tg_reg[] __read_mostly = {
+ {
+ .name = "CONNMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_tg_check_v0,
+ .destroy = connmark_tg_destroy,
+ .target = connmark_tg_v0,
+ .targetsize = sizeof(struct xt_connmark_target_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_connmark_target_info),
+ .compat_from_user = connmark_tg_compat_from_user_v0,
+ .compat_to_user = connmark_tg_compat_to_user_v0,
+#endif
+ .me = THIS_MODULE
+ },
+ {
+ .name = "CONNMARK",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg,
+ .targetsize = sizeof(struct xt_connmark_tginfo1),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init connmark_tg_init(void)
+{
+ return xt_register_targets(connmark_tg_reg,
+ ARRAY_SIZE(connmark_tg_reg));
+}
+
+static void __exit connmark_tg_exit(void)
+{
+ xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
+}
+
+module_init(connmark_tg_init);
+module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
new file mode 100644
index 0000000..b54c375
--- /dev/null
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -0,0 +1,144 @@
+/*
+ * This module is used to copy security markings from packets
+ * to connections, and restore security markings from connections
+ * back to packets. This would normally be performed in conjunction
+ * with the SECMARK target and state match.
+ *
+ * Based somewhat on CONNMARK:
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CONNSECMARK.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+#define PFX "CONNSECMARK: "
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
+MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
+MODULE_ALIAS("ipt_CONNSECMARK");
+MODULE_ALIAS("ip6t_CONNSECMARK");
+
+/*
+ * If the packet has a security mark and the connection does not, copy
+ * the security mark from the packet to the connection.
+ */
+static void secmark_save(const struct sk_buff *skb)
+{
+ if (skb->secmark) {
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && !ct->secmark) {
+ ct->secmark = skb->secmark;
+ nf_conntrack_event_cache(IPCT_SECMARK, ct);
+ }
+ }
+}
+
+/*
+ * If packet has no security mark, and the connection does, restore the
+ * security mark from the connection to the packet.
+ */
+static void secmark_restore(struct sk_buff *skb)
+{
+ if (!skb->secmark) {
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && ct->secmark)
+ skb->secmark = ct->secmark;
+ }
+}
+
+static unsigned int
+connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_connsecmark_target_info *info = par->targinfo;
+
+ switch (info->mode) {
+ case CONNSECMARK_SAVE:
+ secmark_save(skb);
+ break;
+
+ case CONNSECMARK_RESTORE:
+ secmark_restore(skb);
+ break;
+
+ default:
+ BUG();
+ }
+
+ return XT_CONTINUE;
+}
+
+static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_connsecmark_target_info *info = par->targinfo;
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "security") != 0) {
+ printk(KERN_INFO PFX "target only valid in the \'mangle\' "
+ "or \'security\' tables, not \'%s\'.\n", par->table);
+ return false;
+ }
+
+ switch (info->mode) {
+ case CONNSECMARK_SAVE:
+ case CONNSECMARK_RESTORE:
+ break;
+
+ default:
+ printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+ return false;
+ }
+
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target connsecmark_tg_reg __read_mostly = {
+ .name = "CONNSECMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connsecmark_tg_check,
+ .destroy = connsecmark_tg_destroy,
+ .target = connsecmark_tg,
+ .targetsize = sizeof(struct xt_connsecmark_target_info),
+ .me = THIS_MODULE,
+};
+
+static int __init connsecmark_tg_init(void)
+{
+ return xt_register_target(&connsecmark_tg_reg);
+}
+
+static void __exit connsecmark_tg_exit(void)
+{
+ xt_unregister_target(&connsecmark_tg_reg);
+}
+
+module_init(connsecmark_tg_init);
+module_exit(connsecmark_tg_exit);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 0000000..6a347e7
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,210 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_DSCP.h>
+#include <linux/netfilter_ipv4/ipt_TOS.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_DSCP");
+MODULE_ALIAS("ip6t_DSCP");
+MODULE_ALIAS("ipt_TOS");
+MODULE_ALIAS("ip6t_TOS");
+
+static unsigned int
+dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_DSCP_info *dinfo = par->targinfo;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
+
+ if (dscp != dinfo->dscp) {
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
+ dinfo->dscp << XT_DSCP_SHIFT);
+
+ }
+ return XT_CONTINUE;
+}
+
+static unsigned int
+dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_DSCP_info *dinfo = par->targinfo;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
+
+ if (dscp != dinfo->dscp) {
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ return NF_DROP;
+
+ ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
+ dinfo->dscp << XT_DSCP_SHIFT);
+ }
+ return XT_CONTINUE;
+}
+
+static bool dscp_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_DSCP_info *info = par->targinfo;
+
+ if (info->dscp > XT_DSCP_MAX) {
+ printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct ipt_tos_target_info *info = par->targinfo;
+ struct iphdr *iph = ip_hdr(skb);
+ u_int8_t oldtos;
+
+ if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ iph = ip_hdr(skb);
+ oldtos = iph->tos;
+ iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
+ csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+ }
+
+ return XT_CONTINUE;
+}
+
+static bool tos_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ const struct ipt_tos_target_info *info = par->targinfo;
+ const uint8_t tos = info->tos;
+
+ if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
+ tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
+ tos != IPTOS_NORMALSVC) {
+ printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+ return false;
+ }
+
+ return true;
+}
+
+static unsigned int
+tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_tos_target_info *info = par->targinfo;
+ struct iphdr *iph = ip_hdr(skb);
+ u_int8_t orig, nv;
+
+ orig = ipv4_get_dsfield(iph);
+ nv = (orig & ~info->tos_mask) ^ info->tos_value;
+
+ if (orig != nv) {
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return NF_DROP;
+ iph = ip_hdr(skb);
+ ipv4_change_dsfield(iph, 0, nv);
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int
+tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_tos_target_info *info = par->targinfo;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ u_int8_t orig, nv;
+
+ orig = ipv6_get_dsfield(iph);
+ nv = (orig & info->tos_mask) ^ info->tos_value;
+
+ if (orig != nv) {
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return NF_DROP;
+ iph = ipv6_hdr(skb);
+ ipv6_change_dsfield(iph, 0, nv);
+ }
+
+ return XT_CONTINUE;
+}
+
+static struct xt_target dscp_tg_reg[] __read_mostly = {
+ {
+ .name = "DSCP",
+ .family = NFPROTO_IPV4,
+ .checkentry = dscp_tg_check,
+ .target = dscp_tg,
+ .targetsize = sizeof(struct xt_DSCP_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DSCP",
+ .family = NFPROTO_IPV6,
+ .checkentry = dscp_tg_check,
+ .target = dscp_tg6,
+ .targetsize = sizeof(struct xt_DSCP_info),
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "TOS",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .table = "mangle",
+ .target = tos_tg_v0,
+ .targetsize = sizeof(struct ipt_tos_target_info),
+ .checkentry = tos_tg_check_v0,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "TOS",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .table = "mangle",
+ .target = tos_tg,
+ .targetsize = sizeof(struct xt_tos_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "TOS",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .table = "mangle",
+ .target = tos_tg6,
+ .targetsize = sizeof(struct xt_tos_target_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init dscp_tg_init(void)
+{
+ return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
+}
+
+static void __exit dscp_tg_exit(void)
+{
+ xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
+}
+
+module_init(dscp_tg_init);
+module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
new file mode 100644
index 0000000..67574bc
--- /dev/null
+++ b/net/netfilter/xt_MARK.c
@@ -0,0 +1,201 @@
+/*
+ * xt_MARK - Netfilter module to modify the NFMARK field of an skb
+ *
+ * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ * Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_MARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: packet mark modification");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
+
+static unsigned int
+mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_mark_target_info *markinfo = par->targinfo;
+
+ skb->mark = markinfo->mark;
+ return XT_CONTINUE;
+}
+
+static unsigned int
+mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
+ int mark = 0;
+
+ switch (markinfo->mode) {
+ case XT_MARK_SET:
+ mark = markinfo->mark;
+ break;
+
+ case XT_MARK_AND:
+ mark = skb->mark & markinfo->mark;
+ break;
+
+ case XT_MARK_OR:
+ mark = skb->mark | markinfo->mark;
+ break;
+ }
+
+ skb->mark = mark;
+ return XT_CONTINUE;
+}
+
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_mark_tginfo2 *info = par->targinfo;
+
+ skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+ return XT_CONTINUE;
+}
+
+static bool mark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ const struct xt_mark_target_info *markinfo = par->targinfo;
+
+ if (markinfo->mark > 0xffffffff) {
+ printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+ return false;
+ }
+ return true;
+}
+
+static bool mark_tg_check_v1(const struct xt_tgchk_param *par)
+{
+ const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
+
+ if (markinfo->mode != XT_MARK_SET
+ && markinfo->mode != XT_MARK_AND
+ && markinfo->mode != XT_MARK_OR) {
+ printk(KERN_WARNING "MARK: unknown mode %u\n",
+ markinfo->mode);
+ return false;
+ }
+ if (markinfo->mark > 0xffffffff) {
+ printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+ return false;
+ }
+ return true;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info {
+ compat_ulong_t mark;
+};
+
+static void mark_tg_compat_from_user_v0(void *dst, void *src)
+{
+ const struct compat_xt_mark_target_info *cm = src;
+ struct xt_mark_target_info m = {
+ .mark = cm->mark,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int mark_tg_compat_to_user_v0(void __user *dst, void *src)
+{
+ const struct xt_mark_target_info *m = src;
+ struct compat_xt_mark_target_info cm = {
+ .mark = m->mark,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+
+struct compat_xt_mark_target_info_v1 {
+ compat_ulong_t mark;
+ u_int8_t mode;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
+};
+
+static void mark_tg_compat_from_user_v1(void *dst, void *src)
+{
+ const struct compat_xt_mark_target_info_v1 *cm = src;
+ struct xt_mark_target_info_v1 m = {
+ .mark = cm->mark,
+ .mode = cm->mode,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
+{
+ const struct xt_mark_target_info_v1 *m = src;
+ struct compat_xt_mark_target_info_v1 cm = {
+ .mark = m->mark,
+ .mode = m->mode,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target mark_tg_reg[] __read_mostly = {
+ {
+ .name = "MARK",
+ .family = NFPROTO_UNSPEC,
+ .revision = 0,
+ .checkentry = mark_tg_check_v0,
+ .target = mark_tg_v0,
+ .targetsize = sizeof(struct xt_mark_target_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_mark_target_info),
+ .compat_from_user = mark_tg_compat_from_user_v0,
+ .compat_to_user = mark_tg_compat_to_user_v0,
+#endif
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "MARK",
+ .family = NFPROTO_UNSPEC,
+ .revision = 1,
+ .checkentry = mark_tg_check_v1,
+ .target = mark_tg_v1,
+ .targetsize = sizeof(struct xt_mark_target_info_v1),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
+ .compat_from_user = mark_tg_compat_from_user_v1,
+ .compat_to_user = mark_tg_compat_to_user_v1,
+#endif
+ .table = "mangle",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "MARK",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .target = mark_tg,
+ .targetsize = sizeof(struct xt_mark_tginfo2),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init mark_tg_init(void)
+{
+ return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+}
+
+static void __exit mark_tg_exit(void)
+{
+ xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+}
+
+module_init(mark_tg_init);
+module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
new file mode 100644
index 0000000..50e3a52
--- /dev/null
+++ b/net/netfilter/xt_NFLOG.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFLOG.h>
+#include <net/netfilter/nf_log.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFLOG");
+MODULE_ALIAS("ip6t_NFLOG");
+
+static unsigned int
+nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_nflog_info *info = par->targinfo;
+ struct nf_loginfo li;
+
+ li.type = NF_LOG_TYPE_ULOG;
+ li.u.ulog.copy_len = info->len;
+ li.u.ulog.group = info->group;
+ li.u.ulog.qthreshold = info->threshold;
+
+ nf_log_packet(par->family, par->hooknum, skb, par->in,
+ par->out, &li, "%s", info->prefix);
+ return XT_CONTINUE;
+}
+
+static bool nflog_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_nflog_info *info = par->targinfo;
+
+ if (info->flags & ~XT_NFLOG_MASK)
+ return false;
+ if (info->prefix[sizeof(info->prefix) - 1] != '\0')
+ return false;
+ return true;
+}
+
+static struct xt_target nflog_tg_reg __read_mostly = {
+ .name = "NFLOG",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nflog_tg_check,
+ .target = nflog_tg,
+ .targetsize = sizeof(struct xt_nflog_info),
+ .me = THIS_MODULE,
+};
+
+static int __init nflog_tg_init(void)
+{
+ return xt_register_target(&nflog_tg_reg);
+}
+
+static void __exit nflog_tg_exit(void)
+{
+ xt_unregister_target(&nflog_tg_reg);
+}
+
+module_init(nflog_tg_init);
+module_exit(nflog_tg_exit);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
new file mode 100644
index 0000000..f9977b3
--- /dev/null
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -0,0 +1,69 @@
+/* iptables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFQUEUE.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFQUEUE");
+MODULE_ALIAS("ip6t_NFQUEUE");
+MODULE_ALIAS("arpt_NFQUEUE");
+
+static unsigned int
+nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_NFQ_info *tinfo = par->targinfo;
+
+ return NF_QUEUE_NR(tinfo->queuenum);
+}
+
+static struct xt_target nfqueue_tg_reg[] __read_mostly = {
+ {
+ .name = "NFQUEUE",
+ .family = NFPROTO_IPV4,
+ .target = nfqueue_tg,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NFQUEUE",
+ .family = NFPROTO_IPV6,
+ .target = nfqueue_tg,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NFQUEUE",
+ .family = NFPROTO_ARP,
+ .target = nfqueue_tg,
+ .targetsize = sizeof(struct xt_NFQ_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init nfqueue_tg_init(void)
+{
+ return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
+}
+
+static void __exit nfqueue_tg_exit(void)
+{
+ xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
+}
+
+module_init(nfqueue_tg_init);
+module_exit(nfqueue_tg_exit);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
new file mode 100644
index 0000000..e7a0a54
--- /dev/null
+++ b/net/netfilter/xt_NOTRACK.c
@@ -0,0 +1,53 @@
+/* This is a module which is used for setting up fake conntracks
+ * on packets so that they are not seen by the conntrack/NAT code.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+
+MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NOTRACK");
+MODULE_ALIAS("ip6t_NOTRACK");
+
+static unsigned int
+notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ /* Previously seen (loopback)? Ignore. */
+ if (skb->nfct != NULL)
+ return XT_CONTINUE;
+
+ /* Attach fake conntrack entry.
+ If there is a real ct entry correspondig to this packet,
+ it'll hang aroun till timing out. We don't deal with it
+ for performance reasons. JK */
+ skb->nfct = &nf_conntrack_untracked.ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+
+ return XT_CONTINUE;
+}
+
+static struct xt_target notrack_tg_reg __read_mostly = {
+ .name = "NOTRACK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .target = notrack_tg,
+ .table = "raw",
+ .me = THIS_MODULE,
+};
+
+static int __init notrack_tg_init(void)
+{
+ return xt_register_target(&notrack_tg_reg);
+}
+
+static void __exit notrack_tg_exit(void)
+{
+ xt_unregister_target(&notrack_tg_reg);
+}
+
+module_init(notrack_tg_init);
+module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
new file mode 100644
index 0000000..43f5676
--- /dev/null
+++ b/net/netfilter/xt_RATEEST.c
@@ -0,0 +1,183 @@
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+#include <linux/jhash.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+#include <net/gen_stats.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_RATEEST.h>
+#include <net/netfilter/xt_rateest.h>
+
+static DEFINE_MUTEX(xt_rateest_mutex);
+
+#define RATEEST_HSIZE 16
+static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
+static unsigned int jhash_rnd __read_mostly;
+
+static unsigned int xt_rateest_hash(const char *name)
+{
+ return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
+ (RATEEST_HSIZE - 1);
+}
+
+static void xt_rateest_hash_insert(struct xt_rateest *est)
+{
+ unsigned int h;
+
+ h = xt_rateest_hash(est->name);
+ hlist_add_head(&est->list, &rateest_hash[h]);
+}
+
+struct xt_rateest *xt_rateest_lookup(const char *name)
+{
+ struct xt_rateest *est;
+ struct hlist_node *n;
+ unsigned int h;
+
+ h = xt_rateest_hash(name);
+ mutex_lock(&xt_rateest_mutex);
+ hlist_for_each_entry(est, n, &rateest_hash[h], list) {
+ if (strcmp(est->name, name) == 0) {
+ est->refcnt++;
+ mutex_unlock(&xt_rateest_mutex);
+ return est;
+ }
+ }
+ mutex_unlock(&xt_rateest_mutex);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xt_rateest_lookup);
+
+void xt_rateest_put(struct xt_rateest *est)
+{
+ mutex_lock(&xt_rateest_mutex);
+ if (--est->refcnt == 0) {
+ hlist_del(&est->list);
+ gen_kill_estimator(&est->bstats, &est->rstats);
+ kfree(est);
+ }
+ mutex_unlock(&xt_rateest_mutex);
+}
+EXPORT_SYMBOL_GPL(xt_rateest_put);
+
+static unsigned int
+xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_rateest_target_info *info = par->targinfo;
+ struct gnet_stats_basic *stats = &info->est->bstats;
+
+ spin_lock_bh(&info->est->lock);
+ stats->bytes += skb->len;
+ stats->packets++;
+ spin_unlock_bh(&info->est->lock);
+
+ return XT_CONTINUE;
+}
+
+static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+{
+ struct xt_rateest_target_info *info = par->targinfo;
+ struct xt_rateest *est;
+ struct {
+ struct nlattr opt;
+ struct gnet_estimator est;
+ } cfg;
+
+ est = xt_rateest_lookup(info->name);
+ if (est) {
+ /*
+ * If estimator parameters are specified, they must match the
+ * existing estimator.
+ */
+ if ((!info->interval && !info->ewma_log) ||
+ (info->interval != est->params.interval ||
+ info->ewma_log != est->params.ewma_log)) {
+ xt_rateest_put(est);
+ return false;
+ }
+ info->est = est;
+ return true;
+ }
+
+ est = kzalloc(sizeof(*est), GFP_KERNEL);
+ if (!est)
+ goto err1;
+
+ strlcpy(est->name, info->name, sizeof(est->name));
+ spin_lock_init(&est->lock);
+ est->refcnt = 1;
+ est->params.interval = info->interval;
+ est->params.ewma_log = info->ewma_log;
+
+ cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est));
+ cfg.opt.nla_type = TCA_STATS_RATE_EST;
+ cfg.est.interval = info->interval;
+ cfg.est.ewma_log = info->ewma_log;
+
+ if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
+ &cfg.opt) < 0)
+ goto err2;
+
+ info->est = est;
+ xt_rateest_hash_insert(est);
+
+ return true;
+
+err2:
+ kfree(est);
+err1:
+ return false;
+}
+
+static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ struct xt_rateest_target_info *info = par->targinfo;
+
+ xt_rateest_put(info->est);
+}
+
+static struct xt_target xt_rateest_tg_reg __read_mostly = {
+ .name = "RATEEST",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .target = xt_rateest_tg,
+ .checkentry = xt_rateest_tg_checkentry,
+ .destroy = xt_rateest_tg_destroy,
+ .targetsize = sizeof(struct xt_rateest_target_info),
+ .me = THIS_MODULE,
+};
+
+static int __init xt_rateest_tg_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
+ INIT_HLIST_HEAD(&rateest_hash[i]);
+
+ get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+ return xt_register_target(&xt_rateest_tg_reg);
+}
+
+static void __exit xt_rateest_tg_fini(void)
+{
+ xt_unregister_target(&xt_rateest_tg_reg);
+}
+
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: packet rate estimator");
+MODULE_ALIAS("ipt_RATEEST");
+MODULE_ALIAS("ip6t_RATEEST");
+module_init(xt_rateest_tg_init);
+module_exit(xt_rateest_tg_fini);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
new file mode 100644
index 0000000..7a6f9e6
--- /dev/null
+++ b/net/netfilter/xt_SECMARK.c
@@ -0,0 +1,146 @@
+/*
+ * Module for modifying the secmark field of the skb, for use by
+ * security subsystems.
+ *
+ * Based on the nfmark match by:
+ * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/selinux.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SECMARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
+MODULE_DESCRIPTION("Xtables: packet security mark modification");
+MODULE_ALIAS("ipt_SECMARK");
+MODULE_ALIAS("ip6t_SECMARK");
+
+#define PFX "SECMARK: "
+
+static u8 mode;
+
+static unsigned int
+secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ u32 secmark = 0;
+ const struct xt_secmark_target_info *info = par->targinfo;
+
+ BUG_ON(info->mode != mode);
+
+ switch (mode) {
+ case SECMARK_MODE_SEL:
+ secmark = info->u.sel.selsid;
+ break;
+
+ default:
+ BUG();
+ }
+
+ skb->secmark = secmark;
+ return XT_CONTINUE;
+}
+
+static bool checkentry_selinux(struct xt_secmark_target_info *info)
+{
+ int err;
+ struct xt_secmark_target_selinux_info *sel = &info->u.sel;
+
+ sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0';
+
+ err = selinux_string_to_sid(sel->selctx, &sel->selsid);
+ if (err) {
+ if (err == -EINVAL)
+ printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n",
+ sel->selctx);
+ return false;
+ }
+
+ if (!sel->selsid) {
+ printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n",
+ sel->selctx);
+ return false;
+ }
+
+ err = selinux_secmark_relabel_packet_permission(sel->selsid);
+ if (err) {
+ printk(KERN_INFO PFX "unable to obtain relabeling permission\n");
+ return false;
+ }
+
+ selinux_secmark_refcount_inc();
+ return true;
+}
+
+static bool secmark_tg_check(const struct xt_tgchk_param *par)
+{
+ struct xt_secmark_target_info *info = par->targinfo;
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "security") != 0) {
+ printk(KERN_INFO PFX "target only valid in the \'mangle\' "
+ "or \'security\' tables, not \'%s\'.\n", par->table);
+ return false;
+ }
+
+ if (mode && mode != info->mode) {
+ printk(KERN_INFO PFX "mode already set to %hu cannot mix with "
+ "rules for mode %hu\n", mode, info->mode);
+ return false;
+ }
+
+ switch (info->mode) {
+ case SECMARK_MODE_SEL:
+ if (!checkentry_selinux(info))
+ return false;
+ break;
+
+ default:
+ printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+ return false;
+ }
+
+ if (!mode)
+ mode = info->mode;
+ return true;
+}
+
+static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ switch (mode) {
+ case SECMARK_MODE_SEL:
+ selinux_secmark_refcount_dec();
+ }
+}
+
+static struct xt_target secmark_tg_reg __read_mostly = {
+ .name = "SECMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .me = THIS_MODULE,
+};
+
+static int __init secmark_tg_init(void)
+{
+ return xt_register_target(&secmark_tg_reg);
+}
+
+static void __exit secmark_tg_exit(void)
+{
+ xt_unregister_target(&secmark_tg_reg);
+}
+
+module_init(secmark_tg_init);
+module_exit(secmark_tg_exit);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
new file mode 100644
index 0000000..4f3b1f8
--- /dev/null
+++ b/net/netfilter/xt_TCPMSS.c
@@ -0,0 +1,314 @@
+/*
+ * This is a module which is used for setting the MSS option in TCP packets.
+ *
+ * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/ipv6.h>
+#include <net/route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_TCPMSS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
+MODULE_ALIAS("ipt_TCPMSS");
+MODULE_ALIAS("ip6t_TCPMSS");
+
+static inline unsigned int
+optlen(const u_int8_t *opt, unsigned int offset)
+{
+ /* Beware zero-length options: make finite progress */
+ if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+ return 1;
+ else
+ return opt[offset+1];
+}
+
+static int
+tcpmss_mangle_packet(struct sk_buff *skb,
+ const struct xt_tcpmss_info *info,
+ unsigned int in_mtu,
+ unsigned int tcphoff,
+ unsigned int minlen)
+{
+ struct tcphdr *tcph;
+ unsigned int tcplen, i;
+ __be16 oldval;
+ u16 newmss;
+ u8 *opt;
+
+ if (!skb_make_writable(skb, skb->len))
+ return -1;
+
+ tcplen = skb->len - tcphoff;
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+
+ /* Since it passed flags test in tcp match, we know it is is
+ not a fragment, and has data >= tcp header length. SYN
+ packets should not contain data: if they did, then we risk
+ running over MTU, sending Frag Needed and breaking things
+ badly. --RR */
+ if (tcplen != tcph->doff*4) {
+ if (net_ratelimit())
+ printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
+ skb->len);
+ return -1;
+ }
+
+ if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
+ if (dst_mtu(skb->dst) <= minlen) {
+ if (net_ratelimit())
+ printk(KERN_ERR "xt_TCPMSS: "
+ "unknown or invalid path-MTU (%u)\n",
+ dst_mtu(skb->dst));
+ return -1;
+ }
+ if (in_mtu <= minlen) {
+ if (net_ratelimit())
+ printk(KERN_ERR "xt_TCPMSS: unknown or "
+ "invalid path-MTU (%u)\n", in_mtu);
+ return -1;
+ }
+ newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
+ } else
+ newmss = info->mss;
+
+ opt = (u_int8_t *)tcph;
+ for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+ if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+ opt[i+1] == TCPOLEN_MSS) {
+ u_int16_t oldmss;
+
+ oldmss = (opt[i+2] << 8) | opt[i+3];
+
+ /* Never increase MSS, even when setting it, as
+ * doing so results in problems for hosts that rely
+ * on MSS being set correctly.
+ */
+ if (oldmss <= newmss)
+ return 0;
+
+ opt[i+2] = (newmss & 0xff00) >> 8;
+ opt[i+3] = newmss & 0x00ff;
+
+ inet_proto_csum_replace2(&tcph->check, skb,
+ htons(oldmss), htons(newmss),
+ 0);
+ return 0;
+ }
+ }
+
+ /*
+ * MSS Option not found ?! add it..
+ */
+ if (skb_tailroom(skb) < TCPOLEN_MSS) {
+ if (pskb_expand_head(skb, 0,
+ TCPOLEN_MSS - skb_tailroom(skb),
+ GFP_ATOMIC))
+ return -1;
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ }
+
+ skb_put(skb, TCPOLEN_MSS);
+
+ opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
+ memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+
+ inet_proto_csum_replace2(&tcph->check, skb,
+ htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
+ opt[0] = TCPOPT_MSS;
+ opt[1] = TCPOLEN_MSS;
+ opt[2] = (newmss & 0xff00) >> 8;
+ opt[3] = newmss & 0x00ff;
+
+ inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
+
+ oldval = ((__be16 *)tcph)[6];
+ tcph->doff += TCPOLEN_MSS/4;
+ inet_proto_csum_replace2(&tcph->check, skb,
+ oldval, ((__be16 *)tcph)[6], 0);
+ return TCPOLEN_MSS;
+}
+
+static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
+ unsigned int family)
+{
+ struct flowi fl = {};
+ const struct nf_afinfo *ai;
+ struct rtable *rt = NULL;
+ u_int32_t mtu = ~0U;
+
+ if (family == PF_INET)
+ fl.fl4_dst = ip_hdr(skb)->saddr;
+ else
+ fl.fl6_dst = ipv6_hdr(skb)->saddr;
+
+ rcu_read_lock();
+ ai = nf_get_afinfo(family);
+ if (ai != NULL)
+ ai->route((struct dst_entry **)&rt, &fl);
+ rcu_read_unlock();
+
+ if (rt != NULL) {
+ mtu = dst_mtu(&rt->u.dst);
+ dst_release(&rt->u.dst);
+ }
+ return mtu;
+}
+
+static unsigned int
+tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ __be16 newlen;
+ int ret;
+
+ ret = tcpmss_mangle_packet(skb, par->targinfo,
+ tcpmss_reverse_mtu(skb, PF_INET),
+ iph->ihl * 4,
+ sizeof(*iph) + sizeof(struct tcphdr));
+ if (ret < 0)
+ return NF_DROP;
+ if (ret > 0) {
+ iph = ip_hdr(skb);
+ newlen = htons(ntohs(iph->tot_len) + ret);
+ csum_replace2(&iph->check, iph->tot_len, newlen);
+ iph->tot_len = newlen;
+ }
+ return XT_CONTINUE;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static unsigned int
+tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ u8 nexthdr;
+ int tcphoff;
+ int ret;
+
+ nexthdr = ipv6h->nexthdr;
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
+ if (tcphoff < 0)
+ return NF_DROP;
+ ret = tcpmss_mangle_packet(skb, par->targinfo,
+ tcpmss_reverse_mtu(skb, PF_INET6),
+ tcphoff,
+ sizeof(*ipv6h) + sizeof(struct tcphdr));
+ if (ret < 0)
+ return NF_DROP;
+ if (ret > 0) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+ }
+ return XT_CONTINUE;
+}
+#endif
+
+#define TH_SYN 0x02
+
+/* Must specify -p tcp --syn */
+static inline bool find_syn_match(const struct xt_entry_match *m)
+{
+ const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
+
+ if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+ tcpinfo->flg_cmp & TH_SYN &&
+ !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
+ return true;
+
+ return false;
+}
+
+static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_tcpmss_info *info = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+ (par->hook_mask & ~((1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING))) != 0) {
+ printk("xt_TCPMSS: path-MTU clamping only supported in "
+ "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return false;
+ }
+ if (IPT_MATCH_ITERATE(e, find_syn_match))
+ return true;
+ printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+ return false;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_tcpmss_info *info = par->targinfo;
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+ (par->hook_mask & ~((1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING))) != 0) {
+ printk("xt_TCPMSS: path-MTU clamping only supported in "
+ "FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return false;
+ }
+ if (IP6T_MATCH_ITERATE(e, find_syn_match))
+ return true;
+ printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+ return false;
+}
+#endif
+
+static struct xt_target tcpmss_tg_reg[] __read_mostly = {
+ {
+ .family = NFPROTO_IPV4,
+ .name = "TCPMSS",
+ .checkentry = tcpmss_tg4_check,
+ .target = tcpmss_tg4,
+ .targetsize = sizeof(struct xt_tcpmss_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ {
+ .family = NFPROTO_IPV6,
+ .name = "TCPMSS",
+ .checkentry = tcpmss_tg6_check,
+ .target = tcpmss_tg6,
+ .targetsize = sizeof(struct xt_tcpmss_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+#endif
+};
+
+static int __init tcpmss_tg_init(void)
+{
+ return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
+}
+
+static void __exit tcpmss_tg_exit(void)
+{
+ xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
+}
+
+module_init(tcpmss_tg_init);
+module_exit(tcpmss_tg_exit);
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
new file mode 100644
index 0000000..9dd8c8e
--- /dev/null
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -0,0 +1,143 @@
+/*
+ * A module for stripping a specific TCP option from TCP packets.
+ *
+ * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
+ * Copyright © CC Computer Consultants GmbH, 2007
+ * Contact: Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TCPOPTSTRIP.h>
+
+static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
+{
+ /* Beware zero-length options: make finite progress */
+ if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+ return 1;
+ else
+ return opt[offset+1];
+}
+
+static unsigned int
+tcpoptstrip_mangle_packet(struct sk_buff *skb,
+ const struct xt_tcpoptstrip_target_info *info,
+ unsigned int tcphoff, unsigned int minlen)
+{
+ unsigned int optl, i, j;
+ struct tcphdr *tcph;
+ u_int16_t n, o;
+ u_int8_t *opt;
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ opt = (u_int8_t *)tcph;
+
+ /*
+ * Walk through all TCP options - if we find some option to remove,
+ * set all octets to %TCPOPT_NOP and adjust checksum.
+ */
+ for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
+ optl = optlen(opt, i);
+
+ if (i + optl > tcp_hdrlen(skb))
+ break;
+
+ if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
+ continue;
+
+ for (j = 0; j < optl; ++j) {
+ o = opt[i+j];
+ n = TCPOPT_NOP;
+ if ((i + j) % 2 == 0) {
+ o <<= 8;
+ n <<= 8;
+ }
+ inet_proto_csum_replace2(&tcph->check, skb, htons(o),
+ htons(n), 0);
+ }
+ memset(opt + i, TCPOPT_NOP, optl);
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
+ sizeof(struct iphdr) + sizeof(struct tcphdr));
+}
+
+#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+static unsigned int
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int tcphoff;
+ u_int8_t nexthdr;
+
+ nexthdr = ipv6h->nexthdr;
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
+ if (tcphoff < 0)
+ return NF_DROP;
+
+ return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff,
+ sizeof(*ipv6h) + sizeof(struct tcphdr));
+}
+#endif
+
+static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
+ {
+ .name = "TCPOPTSTRIP",
+ .family = NFPROTO_IPV4,
+ .table = "mangle",
+ .proto = IPPROTO_TCP,
+ .target = tcpoptstrip_tg4,
+ .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+ .me = THIS_MODULE,
+ },
+#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+ {
+ .name = "TCPOPTSTRIP",
+ .family = NFPROTO_IPV6,
+ .table = "mangle",
+ .proto = IPPROTO_TCP,
+ .target = tcpoptstrip_tg6,
+ .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
+ .me = THIS_MODULE,
+ },
+#endif
+};
+
+static int __init tcpoptstrip_tg_init(void)
+{
+ return xt_register_targets(tcpoptstrip_tg_reg,
+ ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+static void __exit tcpoptstrip_tg_exit(void)
+{
+ xt_unregister_targets(tcpoptstrip_tg_reg,
+ ARRAY_SIZE(tcpoptstrip_tg_reg));
+}
+
+module_init(tcpoptstrip_tg_init);
+module_exit(tcpoptstrip_tg_exit);
+MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: TCP option stripping");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TCPOPTSTRIP");
+MODULE_ALIAS("ip6t_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
new file mode 100644
index 0000000..1340c2f
--- /dev/null
+++ b/net/netfilter/xt_TPROXY.c
@@ -0,0 +1,102 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/inet_sock.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/xt_TPROXY.h>
+
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+static unsigned int
+tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct xt_tproxy_target_info *tgi = par->targinfo;
+ struct udphdr _hdr, *hp;
+ struct sock *sk;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return NF_DROP;
+
+ sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
+ hp->source, tgi->lport ? tgi->lport : hp->dest,
+ par->in, true);
+
+ /* NOTE: assign_sock consumes our sk reference */
+ if (sk && nf_tproxy_assign_sock(skb, sk)) {
+ /* This should be in a separate target, but we don't do multiple
+ targets on the same rule yet */
+ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
+
+ pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+ iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+ ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+ return NF_ACCEPT;
+ }
+
+ pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+ iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+ ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+ return NF_DROP;
+}
+
+static bool tproxy_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_ip *i = par->entryinfo;
+
+ if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+ && !(i->invflags & IPT_INV_PROTO))
+ return true;
+
+ pr_info("xt_TPROXY: Can be used only in combination with "
+ "either -p tcp or -p udp\n");
+ return false;
+}
+
+static struct xt_target tproxy_tg_reg __read_mostly = {
+ .name = "TPROXY",
+ .family = AF_INET,
+ .table = "mangle",
+ .target = tproxy_tg,
+ .targetsize = sizeof(struct xt_tproxy_target_info),
+ .checkentry = tproxy_tg_check,
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init tproxy_tg_init(void)
+{
+ nf_defrag_ipv4_enable();
+ return xt_register_target(&tproxy_tg_reg);
+}
+
+static void __exit tproxy_tg_exit(void)
+{
+ xt_unregister_target(&tproxy_tg_reg);
+}
+
+module_init(tproxy_tg_init);
+module_exit(tproxy_tg_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
+MODULE_ALIAS("ipt_TPROXY");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
new file mode 100644
index 0000000..fbb04b8
--- /dev/null
+++ b/net/netfilter/xt_TRACE.c
@@ -0,0 +1,40 @@
+/* This is a module which is used to mark packets for tracing.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+
+MODULE_DESCRIPTION("Xtables: packet flow tracing");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TRACE");
+MODULE_ALIAS("ip6t_TRACE");
+
+static unsigned int
+trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ skb->nf_trace = 1;
+ return XT_CONTINUE;
+}
+
+static struct xt_target trace_tg_reg __read_mostly = {
+ .name = "TRACE",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .table = "raw",
+ .target = trace_tg,
+ .me = THIS_MODULE,
+};
+
+static int __init trace_tg_init(void)
+{
+ return xt_register_target(&trace_tg_reg);
+}
+
+static void __exit trace_tg_exit(void)
+{
+ xt_unregister_target(&trace_tg_reg);
+}
+
+module_init(trace_tg_init);
+module_exit(trace_tg_exit);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
new file mode 100644
index 0000000..e821798
--- /dev/null
+++ b/net/netfilter/xt_comment.c
@@ -0,0 +1,45 @@
+/*
+ * Implements a dummy match to allow attaching comments to rules
+ *
+ * 2003-05-13 Brad Fisher (brad@info-link.net)
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_comment.h>
+
+MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
+MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a comment");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_comment");
+MODULE_ALIAS("ip6t_comment");
+
+static bool
+comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ /* We always match */
+ return true;
+}
+
+static struct xt_match comment_mt_reg __read_mostly = {
+ .name = "comment",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = comment_mt,
+ .matchsize = sizeof(struct xt_comment_info),
+ .me = THIS_MODULE,
+};
+
+static int __init comment_mt_init(void)
+{
+ return xt_register_match(&comment_mt_reg);
+}
+
+static void __exit comment_mt_exit(void)
+{
+ xt_unregister_match(&comment_mt_reg);
+}
+
+module_init(comment_mt_init);
+module_exit(comment_mt_exit);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
new file mode 100644
index 0000000..955e659
--- /dev/null
+++ b/net/netfilter/xt_connbytes.c
@@ -0,0 +1,145 @@
+/* Kernel module to match connection tracking byte counter.
+ * GPL (C) 2002 Martin Devera (devik@cdi.cz).
+ */
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/math64.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
+MODULE_ALIAS("ipt_connbytes");
+MODULE_ALIAS("ip6t_connbytes");
+
+static bool
+connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_connbytes_info *sinfo = par->matchinfo;
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int64_t what = 0; /* initialize to make gcc happy */
+ u_int64_t bytes = 0;
+ u_int64_t pkts = 0;
+ const struct nf_conn_counter *counters;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+
+ counters = nf_conn_acct_find(ct);
+ if (!counters)
+ return false;
+
+ switch (sinfo->what) {
+ case XT_CONNBYTES_PKTS:
+ switch (sinfo->direction) {
+ case XT_CONNBYTES_DIR_ORIGINAL:
+ what = counters[IP_CT_DIR_ORIGINAL].packets;
+ break;
+ case XT_CONNBYTES_DIR_REPLY:
+ what = counters[IP_CT_DIR_REPLY].packets;
+ break;
+ case XT_CONNBYTES_DIR_BOTH:
+ what = counters[IP_CT_DIR_ORIGINAL].packets;
+ what += counters[IP_CT_DIR_REPLY].packets;
+ break;
+ }
+ break;
+ case XT_CONNBYTES_BYTES:
+ switch (sinfo->direction) {
+ case XT_CONNBYTES_DIR_ORIGINAL:
+ what = counters[IP_CT_DIR_ORIGINAL].bytes;
+ break;
+ case XT_CONNBYTES_DIR_REPLY:
+ what = counters[IP_CT_DIR_REPLY].bytes;
+ break;
+ case XT_CONNBYTES_DIR_BOTH:
+ what = counters[IP_CT_DIR_ORIGINAL].bytes;
+ what += counters[IP_CT_DIR_REPLY].bytes;
+ break;
+ }
+ break;
+ case XT_CONNBYTES_AVGPKT:
+ switch (sinfo->direction) {
+ case XT_CONNBYTES_DIR_ORIGINAL:
+ bytes = counters[IP_CT_DIR_ORIGINAL].bytes;
+ pkts = counters[IP_CT_DIR_ORIGINAL].packets;
+ break;
+ case XT_CONNBYTES_DIR_REPLY:
+ bytes = counters[IP_CT_DIR_REPLY].bytes;
+ pkts = counters[IP_CT_DIR_REPLY].packets;
+ break;
+ case XT_CONNBYTES_DIR_BOTH:
+ bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
+ counters[IP_CT_DIR_REPLY].bytes;
+ pkts = counters[IP_CT_DIR_ORIGINAL].packets +
+ counters[IP_CT_DIR_REPLY].packets;
+ break;
+ }
+ if (pkts != 0)
+ what = div64_u64(bytes, pkts);
+ break;
+ }
+
+ if (sinfo->count.to)
+ return what <= sinfo->count.to && what >= sinfo->count.from;
+ else
+ return what >= sinfo->count.from;
+}
+
+static bool connbytes_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_connbytes_info *sinfo = par->matchinfo;
+
+ if (sinfo->what != XT_CONNBYTES_PKTS &&
+ sinfo->what != XT_CONNBYTES_BYTES &&
+ sinfo->what != XT_CONNBYTES_AVGPKT)
+ return false;
+
+ if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
+ sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
+ sinfo->direction != XT_CONNBYTES_DIR_BOTH)
+ return false;
+
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+
+ return true;
+}
+
+static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_match connbytes_mt_reg __read_mostly = {
+ .name = "connbytes",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connbytes_mt_check,
+ .match = connbytes_mt,
+ .destroy = connbytes_mt_destroy,
+ .matchsize = sizeof(struct xt_connbytes_info),
+ .me = THIS_MODULE,
+};
+
+static int __init connbytes_mt_init(void)
+{
+ return xt_register_match(&connbytes_mt_reg);
+}
+
+static void __exit connbytes_mt_exit(void)
+{
+ xt_unregister_match(&connbytes_mt_reg);
+}
+
+module_init(connbytes_mt_init);
+module_exit(connbytes_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
new file mode 100644
index 0000000..7f404cc
--- /dev/null
+++ b/net/netfilter/xt_connlimit.c
@@ -0,0 +1,296 @@
+/*
+ * netfilter module to limit the number of parallel tcp
+ * connections per IP address.
+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ * only ignore TIME_WAIT or gone connections
+ * (C) CC Computer Consultants GmbH, 2007
+ * Contact: <jengelh@computergmbh.de>
+ *
+ * based on ...
+ *
+ * Kernel module to match connection tracking information.
+ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
+ */
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connlimit.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+/* we will save the tuples of all connections we care about */
+struct xt_connlimit_conn {
+ struct list_head list;
+ struct nf_conntrack_tuple tuple;
+};
+
+struct xt_connlimit_data {
+ struct list_head iphash[256];
+ spinlock_t lock;
+};
+
+static u_int32_t connlimit_rnd;
+static bool connlimit_rnd_inited;
+
+static inline unsigned int connlimit_iphash(__be32 addr)
+{
+ if (unlikely(!connlimit_rnd_inited)) {
+ get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
+ connlimit_rnd_inited = true;
+ }
+ return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
+}
+
+static inline unsigned int
+connlimit_iphash6(const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask)
+{
+ union nf_inet_addr res;
+ unsigned int i;
+
+ if (unlikely(!connlimit_rnd_inited)) {
+ get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
+ connlimit_rnd_inited = true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
+ res.ip6[i] = addr->ip6[i] & mask->ip6[i];
+
+ return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF;
+}
+
+static inline bool already_closed(const struct nf_conn *conn)
+{
+ if (nf_ct_protonum(conn) == IPPROTO_TCP)
+ return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
+ conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
+ else
+ return 0;
+}
+
+static inline unsigned int
+same_source_net(const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ const union nf_inet_addr *u3, u_int8_t family)
+{
+ if (family == NFPROTO_IPV4) {
+ return (addr->ip & mask->ip) == (u3->ip & mask->ip);
+ } else {
+ union nf_inet_addr lh, rh;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
+ lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
+ rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
+ }
+
+ return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0;
+ }
+}
+
+static int count_them(struct xt_connlimit_data *data,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ const struct xt_match *match)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ struct xt_connlimit_conn *conn;
+ struct xt_connlimit_conn *tmp;
+ const struct nf_conn *found_ct;
+ struct list_head *hash;
+ bool addit = true;
+ int matches = 0;
+
+
+ if (match->family == NFPROTO_IPV6)
+ hash = &data->iphash[connlimit_iphash6(addr, mask)];
+ else
+ hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
+
+ rcu_read_lock();
+
+ /* check the saved connections */
+ list_for_each_entry_safe(conn, tmp, hash, list) {
+ found = __nf_conntrack_find(&init_net, &conn->tuple);
+ found_ct = NULL;
+
+ if (found != NULL)
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
+
+ if (found_ct != NULL &&
+ nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ !already_closed(found_ct))
+ /*
+ * Just to be sure we have it only once in the list.
+ * We should not see tuples twice unless someone hooks
+ * this into a table without "-p tcp --syn".
+ */
+ addit = false;
+
+ if (found == NULL) {
+ /* this one is gone */
+ list_del(&conn->list);
+ kfree(conn);
+ continue;
+ }
+
+ if (already_closed(found_ct)) {
+ /*
+ * we do not care about connections which are
+ * closed already -> ditch it
+ */
+ list_del(&conn->list);
+ kfree(conn);
+ continue;
+ }
+
+ if (same_source_net(addr, mask, &conn->tuple.src.u3,
+ match->family))
+ /* same source network -> be counted! */
+ ++matches;
+ }
+
+ rcu_read_unlock();
+
+ if (addit) {
+ /* save the new connection in our list */
+ conn = kzalloc(sizeof(*conn), GFP_ATOMIC);
+ if (conn == NULL)
+ return -ENOMEM;
+ conn->tuple = *tuple;
+ list_add(&conn->list, hash);
+ ++matches;
+ }
+
+ return matches;
+}
+
+static bool
+connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_connlimit_info *info = par->matchinfo;
+ union nf_inet_addr addr;
+ struct nf_conntrack_tuple tuple;
+ const struct nf_conntrack_tuple *tuple_ptr = &tuple;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ int connections;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct != NULL)
+ tuple_ptr = &ct->tuplehash[0].tuple;
+ else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ par->family, &tuple))
+ goto hotdrop;
+
+ if (par->family == NFPROTO_IPV6) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+ } else {
+ const struct iphdr *iph = ip_hdr(skb);
+ addr.ip = iph->saddr;
+ }
+
+ spin_lock_bh(&info->data->lock);
+ connections = count_them(info->data, tuple_ptr, &addr,
+ &info->mask, par->match);
+ spin_unlock_bh(&info->data->lock);
+
+ if (connections < 0) {
+ /* kmalloc failed, drop it entirely */
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return (connections > info->limit) ^ info->inverse;
+
+ hotdrop:
+ *par->hotdrop = true;
+ return false;
+}
+
+static bool connlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_connlimit_info *info = par->matchinfo;
+ unsigned int i;
+
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "cannot load conntrack support for "
+ "address family %u\n", par->family);
+ return false;
+ }
+
+ /* init private data */
+ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
+ if (info->data == NULL) {
+ nf_ct_l3proto_module_put(par->family);
+ return false;
+ }
+
+ spin_lock_init(&info->data->lock);
+ for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
+ INIT_LIST_HEAD(&info->data->iphash[i]);
+
+ return true;
+}
+
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_connlimit_info *info = par->matchinfo;
+ struct xt_connlimit_conn *conn;
+ struct xt_connlimit_conn *tmp;
+ struct list_head *hash = info->data->iphash;
+ unsigned int i;
+
+ nf_ct_l3proto_module_put(par->family);
+
+ for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
+ list_for_each_entry_safe(conn, tmp, &hash[i], list) {
+ list_del(&conn->list);
+ kfree(conn);
+ }
+ }
+
+ kfree(info->data);
+}
+
+static struct xt_match connlimit_mt_reg __read_mostly = {
+ .name = "connlimit",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connlimit_mt_check,
+ .match = connlimit_mt,
+ .matchsize = sizeof(struct xt_connlimit_info),
+ .destroy = connlimit_mt_destroy,
+ .me = THIS_MODULE,
+};
+
+static int __init connlimit_mt_init(void)
+{
+ return xt_register_match(&connlimit_mt_reg);
+}
+
+static void __exit connlimit_mt_exit(void)
+{
+ xt_unregister_match(&connlimit_mt_reg);
+}
+
+module_init(connlimit_mt_init);
+module_exit(connlimit_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Number of connections matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_connlimit");
+MODULE_ALIAS("ip6t_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
new file mode 100644
index 0000000..86cacab
--- /dev/null
+++ b/net/netfilter/xt_connmark.c
@@ -0,0 +1,166 @@
+/*
+ * xt_connmark - Netfilter module to match connection mark values
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ * Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
+MODULE_DESCRIPTION("Xtables: connection mark match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_connmark");
+MODULE_ALIAS("ip6t_connmark");
+
+static bool
+connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_connmark_mtinfo1 *info = par->matchinfo;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return false;
+
+ return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static bool
+connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_connmark_info *info = par->matchinfo;
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+
+ return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct xt_connmark_info *cm = par->matchinfo;
+
+ if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
+ printk(KERN_WARNING "connmark: only support 32bit mark\n");
+ return false;
+ }
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static bool connmark_mt_check(const struct xt_mtchk_param *par)
+{
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "cannot load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_info {
+ compat_ulong_t mark, mask;
+ u_int8_t invert;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
+};
+
+static void connmark_mt_compat_from_user_v0(void *dst, void *src)
+{
+ const struct compat_xt_connmark_info *cm = src;
+ struct xt_connmark_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .invert = cm->invert,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int connmark_mt_compat_to_user_v0(void __user *dst, void *src)
+{
+ const struct xt_connmark_info *m = src;
+ struct compat_xt_connmark_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .invert = m->invert,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match connmark_mt_reg[] __read_mostly = {
+ {
+ .name = "connmark",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_mt_check_v0,
+ .match = connmark_mt_v0,
+ .destroy = connmark_mt_destroy,
+ .matchsize = sizeof(struct xt_connmark_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_connmark_info),
+ .compat_from_user = connmark_mt_compat_from_user_v0,
+ .compat_to_user = connmark_mt_compat_to_user_v0,
+#endif
+ .me = THIS_MODULE
+ },
+ {
+ .name = "connmark",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = connmark_mt_check,
+ .match = connmark_mt,
+ .matchsize = sizeof(struct xt_connmark_mtinfo1),
+ .destroy = connmark_mt_destroy,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init connmark_mt_init(void)
+{
+ return xt_register_matches(connmark_mt_reg,
+ ARRAY_SIZE(connmark_mt_reg));
+}
+
+static void __exit connmark_mt_exit(void)
+{
+ xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg));
+}
+
+module_init(connmark_mt_init);
+module_exit(connmark_mt_exit);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
new file mode 100644
index 0000000..0b7139f
--- /dev/null
+++ b/net/netfilter/xt_conntrack.c
@@ -0,0 +1,385 @@
+/*
+ * xt_conntrack - Netfilter module to match connection tracking
+ * information. (Superset of Rusty's minimalistic state match.)
+ *
+ * (C) 2001 Marc Boucher (marc@mbsi.ca).
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_conntrack.h>
+#include <net/netfilter/nf_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: connection tracking state match");
+MODULE_ALIAS("ipt_conntrack");
+MODULE_ALIAS("ip6t_conntrack");
+
+static bool
+conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_conntrack_info *sinfo = par->matchinfo;
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int statebit;
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg)))
+
+ if (ct == &nf_conntrack_untracked)
+ statebit = XT_CONNTRACK_STATE_UNTRACKED;
+ else if (ct)
+ statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+ else
+ statebit = XT_CONNTRACK_STATE_INVALID;
+
+ if (sinfo->flags & XT_CONNTRACK_STATE) {
+ if (ct) {
+ if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
+ statebit |= XT_CONNTRACK_STATE_SNAT;
+ if (test_bit(IPS_DST_NAT_BIT, &ct->status))
+ statebit |= XT_CONNTRACK_STATE_DNAT;
+ }
+ if (FWINV((statebit & sinfo->statemask) == 0,
+ XT_CONNTRACK_STATE))
+ return false;
+ }
+
+ if (ct == NULL) {
+ if (sinfo->flags & ~XT_CONNTRACK_STATE)
+ return false;
+ return true;
+ }
+
+ if (sinfo->flags & XT_CONNTRACK_PROTO &&
+ FWINV(nf_ct_protonum(ct) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+ XT_CONNTRACK_PROTO))
+ return false;
+
+ if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
+ sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+ XT_CONNTRACK_ORIGSRC))
+ return false;
+
+ if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
+ sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+ XT_CONNTRACK_ORIGDST))
+ return false;
+
+ if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
+ sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+ XT_CONNTRACK_REPLSRC))
+ return false;
+
+ if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+ FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
+ sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+ sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+ XT_CONNTRACK_REPLDST))
+ return false;
+
+ if (sinfo->flags & XT_CONNTRACK_STATUS &&
+ FWINV((ct->status & sinfo->statusmask) == 0,
+ XT_CONNTRACK_STATUS))
+ return false;
+
+ if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
+ unsigned long expires = timer_pending(&ct->timeout) ?
+ (ct->timeout.expires - jiffies)/HZ : 0;
+
+ if (FWINV(!(expires >= sinfo->expires_min &&
+ expires <= sinfo->expires_max),
+ XT_CONNTRACK_EXPIRES))
+ return false;
+ }
+ return true;
+#undef FWINV
+}
+
+static bool
+conntrack_addrcmp(const union nf_inet_addr *kaddr,
+ const union nf_inet_addr *uaddr,
+ const union nf_inet_addr *umask, unsigned int l3proto)
+{
+ if (l3proto == NFPROTO_IPV4)
+ return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+ else if (l3proto == NFPROTO_IPV6)
+ return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+ &uaddr->in6) == 0;
+ else
+ return false;
+}
+
+static inline bool
+conntrack_mt_origsrc(const struct nf_conn *ct,
+ const struct xt_conntrack_mtinfo1 *info,
+ u_int8_t family)
+{
+ return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+ &info->origsrc_addr, &info->origsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_origdst(const struct nf_conn *ct,
+ const struct xt_conntrack_mtinfo1 *info,
+ u_int8_t family)
+{
+ return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
+ &info->origdst_addr, &info->origdst_mask, family);
+}
+
+static inline bool
+conntrack_mt_replsrc(const struct nf_conn *ct,
+ const struct xt_conntrack_mtinfo1 *info,
+ u_int8_t family)
+{
+ return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
+ &info->replsrc_addr, &info->replsrc_mask, family);
+}
+
+static inline bool
+conntrack_mt_repldst(const struct nf_conn *ct,
+ const struct xt_conntrack_mtinfo1 *info,
+ u_int8_t family)
+{
+ return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
+ &info->repldst_addr, &info->repldst_mask, family);
+}
+
+static inline bool
+ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
+ const struct nf_conn *ct)
+{
+ const struct nf_conntrack_tuple *tuple;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+ (nf_ct_protonum(ct) == info->l4proto) ^
+ !(info->invert_flags & XT_CONNTRACK_PROTO))
+ return false;
+
+ /* Shortcut to match all recognized protocols by using ->src.all. */
+ if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+ (tuple->src.u.all == info->origsrc_port) ^
+ !(info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+ return false;
+
+ if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+ (tuple->dst.u.all == info->origdst_port) ^
+ !(info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+ return false;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+ (tuple->src.u.all == info->replsrc_port) ^
+ !(info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+ return false;
+
+ if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+ (tuple->dst.u.all == info->repldst_port) ^
+ !(info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+ return false;
+
+ return true;
+}
+
+static bool
+conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int statebit;
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ if (ct == &nf_conntrack_untracked)
+ statebit = XT_CONNTRACK_STATE_UNTRACKED;
+ else if (ct != NULL)
+ statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+ else
+ statebit = XT_CONNTRACK_STATE_INVALID;
+
+ if (info->match_flags & XT_CONNTRACK_STATE) {
+ if (ct != NULL) {
+ if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
+ statebit |= XT_CONNTRACK_STATE_SNAT;
+ if (test_bit(IPS_DST_NAT_BIT, &ct->status))
+ statebit |= XT_CONNTRACK_STATE_DNAT;
+ }
+ if (!!(info->state_mask & statebit) ^
+ !(info->invert_flags & XT_CONNTRACK_STATE))
+ return false;
+ }
+
+ if (ct == NULL)
+ return info->match_flags & XT_CONNTRACK_STATE;
+ if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
+ (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
+ !!(info->invert_flags & XT_CONNTRACK_DIRECTION))
+ return false;
+
+ if (info->match_flags & XT_CONNTRACK_ORIGSRC)
+ if (conntrack_mt_origsrc(ct, info, par->family) ^
+ !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
+ return false;
+
+ if (info->match_flags & XT_CONNTRACK_ORIGDST)
+ if (conntrack_mt_origdst(ct, info, par->family) ^
+ !(info->invert_flags & XT_CONNTRACK_ORIGDST))
+ return false;
+
+ if (info->match_flags & XT_CONNTRACK_REPLSRC)
+ if (conntrack_mt_replsrc(ct, info, par->family) ^
+ !(info->invert_flags & XT_CONNTRACK_REPLSRC))
+ return false;
+
+ if (info->match_flags & XT_CONNTRACK_REPLDST)
+ if (conntrack_mt_repldst(ct, info, par->family) ^
+ !(info->invert_flags & XT_CONNTRACK_REPLDST))
+ return false;
+
+ if (!ct_proto_port_check(info, ct))
+ return false;
+
+ if ((info->match_flags & XT_CONNTRACK_STATUS) &&
+ (!!(info->status_mask & ct->status) ^
+ !(info->invert_flags & XT_CONNTRACK_STATUS)))
+ return false;
+
+ if (info->match_flags & XT_CONNTRACK_EXPIRES) {
+ unsigned long expires = 0;
+
+ if (timer_pending(&ct->timeout))
+ expires = (ct->timeout.expires - jiffies) / HZ;
+ if ((expires >= info->expires_min &&
+ expires <= info->expires_max) ^
+ !(info->invert_flags & XT_CONNTRACK_EXPIRES))
+ return false;
+ }
+ return true;
+}
+
+static bool conntrack_mt_check(const struct xt_mtchk_param *par)
+{
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ return true;
+}
+
+static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_conntrack_info
+{
+ compat_uint_t statemask;
+ compat_uint_t statusmask;
+ struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX];
+ struct in_addr sipmsk[IP_CT_DIR_MAX];
+ struct in_addr dipmsk[IP_CT_DIR_MAX];
+ compat_ulong_t expires_min;
+ compat_ulong_t expires_max;
+ u_int8_t flags;
+ u_int8_t invflags;
+};
+
+static void conntrack_mt_compat_from_user_v0(void *dst, void *src)
+{
+ const struct compat_xt_conntrack_info *cm = src;
+ struct xt_conntrack_info m = {
+ .statemask = cm->statemask,
+ .statusmask = cm->statusmask,
+ .expires_min = cm->expires_min,
+ .expires_max = cm->expires_max,
+ .flags = cm->flags,
+ .invflags = cm->invflags,
+ };
+ memcpy(m.tuple, cm->tuple, sizeof(m.tuple));
+ memcpy(m.sipmsk, cm->sipmsk, sizeof(m.sipmsk));
+ memcpy(m.dipmsk, cm->dipmsk, sizeof(m.dipmsk));
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src)
+{
+ const struct xt_conntrack_info *m = src;
+ struct compat_xt_conntrack_info cm = {
+ .statemask = m->statemask,
+ .statusmask = m->statusmask,
+ .expires_min = m->expires_min,
+ .expires_max = m->expires_max,
+ .flags = m->flags,
+ .invflags = m->invflags,
+ };
+ memcpy(cm.tuple, m->tuple, sizeof(cm.tuple));
+ memcpy(cm.sipmsk, m->sipmsk, sizeof(cm.sipmsk));
+ memcpy(cm.dipmsk, m->dipmsk, sizeof(cm.dipmsk));
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif
+
+static struct xt_match conntrack_mt_reg[] __read_mostly = {
+ {
+ .name = "conntrack",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = conntrack_mt_v0,
+ .checkentry = conntrack_mt_check,
+ .destroy = conntrack_mt_destroy,
+ .matchsize = sizeof(struct xt_conntrack_info),
+ .me = THIS_MODULE,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_conntrack_info),
+ .compat_from_user = conntrack_mt_compat_from_user_v0,
+ .compat_to_user = conntrack_mt_compat_to_user_v0,
+#endif
+ },
+ {
+ .name = "conntrack",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .matchsize = sizeof(struct xt_conntrack_mtinfo1),
+ .match = conntrack_mt,
+ .checkentry = conntrack_mt_check,
+ .destroy = conntrack_mt_destroy,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init conntrack_mt_init(void)
+{
+ return xt_register_matches(conntrack_mt_reg,
+ ARRAY_SIZE(conntrack_mt_reg));
+}
+
+static void __exit conntrack_mt_exit(void)
+{
+ xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg));
+}
+
+module_init(conntrack_mt_init);
+module_exit(conntrack_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
new file mode 100644
index 0000000..e5d3e86
--- /dev/null
+++ b/net/netfilter/xt_dccp.c
@@ -0,0 +1,181 @@
+/*
+ * iptables module for DCCP protocol header matching
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <net/ip.h>
+#include <linux/dccp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dccp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DCCP protocol packet match");
+MODULE_ALIAS("ipt_dccp");
+MODULE_ALIAS("ip6t_dccp");
+
+#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+ || (!!((invflag) & (option)) ^ (cond)))
+
+static unsigned char *dccp_optbuf;
+static DEFINE_SPINLOCK(dccp_buflock);
+
+static inline bool
+dccp_find_option(u_int8_t option,
+ const struct sk_buff *skb,
+ unsigned int protoff,
+ const struct dccp_hdr *dh,
+ bool *hotdrop)
+{
+ /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+ const unsigned char *op;
+ unsigned int optoff = __dccp_hdr_len(dh);
+ unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
+ unsigned int i;
+
+ if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
+ *hotdrop = true;
+ return false;
+ }
+
+ if (!optlen)
+ return false;
+
+ spin_lock_bh(&dccp_buflock);
+ op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf);
+ if (op == NULL) {
+ /* If we don't have the whole header, drop packet. */
+ spin_unlock_bh(&dccp_buflock);
+ *hotdrop = true;
+ return false;
+ }
+
+ for (i = 0; i < optlen; ) {
+ if (op[i] == option) {
+ spin_unlock_bh(&dccp_buflock);
+ return true;
+ }
+
+ if (op[i] < 2)
+ i++;
+ else
+ i += op[i+1]?:1;
+ }
+
+ spin_unlock_bh(&dccp_buflock);
+ return false;
+}
+
+
+static inline bool
+match_types(const struct dccp_hdr *dh, u_int16_t typemask)
+{
+ return typemask & (1 << dh->dccph_type);
+}
+
+static inline bool
+match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
+ const struct dccp_hdr *dh, bool *hotdrop)
+{
+ return dccp_find_option(option, skb, protoff, dh, hotdrop);
+}
+
+static bool
+dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_dccp_info *info = par->matchinfo;
+ const struct dccp_hdr *dh;
+ struct dccp_hdr _dh;
+
+ if (par->fragoff != 0)
+ return false;
+
+ dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
+ if (dh == NULL) {
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0]
+ && ntohs(dh->dccph_sport) <= info->spts[1],
+ XT_DCCP_SRC_PORTS, info->flags, info->invflags)
+ && DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0]
+ && ntohs(dh->dccph_dport) <= info->dpts[1],
+ XT_DCCP_DEST_PORTS, info->flags, info->invflags)
+ && DCCHECK(match_types(dh, info->typemask),
+ XT_DCCP_TYPE, info->flags, info->invflags)
+ && DCCHECK(match_option(info->option, skb, par->thoff, dh,
+ par->hotdrop),
+ XT_DCCP_OPTION, info->flags, info->invflags);
+}
+
+static bool dccp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_dccp_info *info = par->matchinfo;
+
+ return !(info->flags & ~XT_DCCP_VALID_FLAGS)
+ && !(info->invflags & ~XT_DCCP_VALID_FLAGS)
+ && !(info->invflags & ~info->flags);
+}
+
+static struct xt_match dccp_mt_reg[] __read_mostly = {
+ {
+ .name = "dccp",
+ .family = NFPROTO_IPV4,
+ .checkentry = dccp_mt_check,
+ .match = dccp_mt,
+ .matchsize = sizeof(struct xt_dccp_info),
+ .proto = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "dccp",
+ .family = NFPROTO_IPV6,
+ .checkentry = dccp_mt_check,
+ .match = dccp_mt,
+ .matchsize = sizeof(struct xt_dccp_info),
+ .proto = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init dccp_mt_init(void)
+{
+ int ret;
+
+ /* doff is 8 bits, so the maximum option size is (4*256). Don't put
+ * this in BSS since DaveM is worried about locked TLB's for kernel
+ * BSS. */
+ dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
+ if (!dccp_optbuf)
+ return -ENOMEM;
+ ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
+ if (ret)
+ goto out_kfree;
+ return ret;
+
+out_kfree:
+ kfree(dccp_optbuf);
+ return ret;
+}
+
+static void __exit dccp_mt_exit(void)
+{
+ xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
+ kfree(dccp_optbuf);
+}
+
+module_init(dccp_mt_init);
+module_exit(dccp_mt_exit);
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 0000000..c3f8085
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,132 @@
+/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_dscp.h>
+#include <linux/netfilter_ipv4/ipt_tos.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_dscp");
+MODULE_ALIAS("ip6t_dscp");
+MODULE_ALIAS("ipt_tos");
+MODULE_ALIAS("ip6t_tos");
+
+static bool
+dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_dscp_info *info = par->matchinfo;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
+
+ return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static bool
+dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_dscp_info *info = par->matchinfo;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
+
+ return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static bool dscp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_dscp_info *info = par->matchinfo;
+
+ if (info->dscp > XT_DSCP_MAX) {
+ printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_tos_info *info = par->matchinfo;
+
+ return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
+}
+
+static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_tos_match_info *info = par->matchinfo;
+
+ if (par->match->family == NFPROTO_IPV4)
+ return ((ip_hdr(skb)->tos & info->tos_mask) ==
+ info->tos_value) ^ !!info->invert;
+ else
+ return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
+ info->tos_value) ^ !!info->invert;
+}
+
+static struct xt_match dscp_mt_reg[] __read_mostly = {
+ {
+ .name = "dscp",
+ .family = NFPROTO_IPV4,
+ .checkentry = dscp_mt_check,
+ .match = dscp_mt,
+ .matchsize = sizeof(struct xt_dscp_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "dscp",
+ .family = NFPROTO_IPV6,
+ .checkentry = dscp_mt_check,
+ .match = dscp_mt6,
+ .matchsize = sizeof(struct xt_dscp_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tos",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = tos_mt_v0,
+ .matchsize = sizeof(struct ipt_tos_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tos",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .match = tos_mt,
+ .matchsize = sizeof(struct xt_tos_match_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tos",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .match = tos_mt,
+ .matchsize = sizeof(struct xt_tos_match_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init dscp_mt_init(void)
+{
+ return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
+}
+
+static void __exit dscp_mt_exit(void)
+{
+ xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
+}
+
+module_init(dscp_mt_init);
+module_exit(dscp_mt_exit);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
new file mode 100644
index 0000000..6094399
--- /dev/null
+++ b/net/netfilter/xt_esp.c
@@ -0,0 +1,113 @@
+/* Kernel module to match ESP parameters. */
+
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter/xt_esp.h>
+#include <linux/netfilter/x_tables.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
+MODULE_ALIAS("ipt_esp");
+MODULE_ALIAS("ip6t_esp");
+
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+ bool r;
+ duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
+ min, spi, max);
+ r = (spi >= min && spi <= max) ^ invert;
+ duprintf(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ip_esp_hdr *eh;
+ struct ip_esp_hdr _esp;
+ const struct xt_esp *espinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp);
+ if (eh == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ESP tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi),
+ !!(espinfo->invflags & XT_ESP_INV_SPI));
+}
+
+static bool esp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_esp *espinfo = par->matchinfo;
+
+ if (espinfo->invflags & ~XT_ESP_INV_MASK) {
+ duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct xt_match esp_mt_reg[] __read_mostly = {
+ {
+ .name = "esp",
+ .family = NFPROTO_IPV4,
+ .checkentry = esp_mt_check,
+ .match = esp_mt,
+ .matchsize = sizeof(struct xt_esp),
+ .proto = IPPROTO_ESP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "esp",
+ .family = NFPROTO_IPV6,
+ .checkentry = esp_mt_check,
+ .match = esp_mt,
+ .matchsize = sizeof(struct xt_esp),
+ .proto = IPPROTO_ESP,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init esp_mt_init(void)
+{
+ return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
+}
+
+static void __exit esp_mt_exit(void)
+{
+ xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
+}
+
+module_init(esp_mt_init);
+module_exit(esp_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
new file mode 100644
index 0000000..6fc4292
--- /dev/null
+++ b/net/netfilter/xt_hashlimit.c
@@ -0,0 +1,1020 @@
+/*
+ * xt_hashlimit - Netfilter module to limit the number of packets per time
+ * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#endif
+
+#include <net/net_namespace.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_hashlimit.h>
+#include <linux/mutex.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
+MODULE_ALIAS("ipt_hashlimit");
+MODULE_ALIAS("ip6t_hashlimit");
+
+/* need to declare this at the top */
+static struct proc_dir_entry *hashlimit_procdir4;
+static struct proc_dir_entry *hashlimit_procdir6;
+static const struct file_operations dl_file_ops;
+
+/* hash table crap */
+struct dsthash_dst {
+ union {
+ struct {
+ __be32 src;
+ __be32 dst;
+ } ip;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ struct {
+ __be32 src[4];
+ __be32 dst[4];
+ } ip6;
+#endif
+ };
+ __be16 src_port;
+ __be16 dst_port;
+};
+
+struct dsthash_ent {
+ /* static / read-only parts in the beginning */
+ struct hlist_node node;
+ struct dsthash_dst dst;
+
+ /* modified structure members in the end */
+ unsigned long expires; /* precalculated expiry time */
+ struct {
+ unsigned long prev; /* last modification */
+ u_int32_t credit;
+ u_int32_t credit_cap, cost;
+ } rateinfo;
+};
+
+struct xt_hashlimit_htable {
+ struct hlist_node node; /* global list of all htables */
+ atomic_t use;
+ u_int8_t family;
+
+ struct hashlimit_cfg1 cfg; /* config */
+
+ /* used internally */
+ spinlock_t lock; /* lock for list_head */
+ u_int32_t rnd; /* random seed for hash */
+ int rnd_initialized;
+ unsigned int count; /* number entries in table */
+ struct timer_list timer; /* timer for gc */
+
+ /* seq_file stuff */
+ struct proc_dir_entry *pde;
+
+ struct hlist_head hash[0]; /* hashtable itself */
+};
+
+static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
+static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */
+static HLIST_HEAD(hashlimit_htables);
+static struct kmem_cache *hashlimit_cachep __read_mostly;
+
+static inline bool dst_cmp(const struct dsthash_ent *ent,
+ const struct dsthash_dst *b)
+{
+ return !memcmp(&ent->dst, b, sizeof(ent->dst));
+}
+
+static u_int32_t
+hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
+{
+ u_int32_t hash = jhash2((const u32 *)dst,
+ sizeof(*dst)/sizeof(u32),
+ ht->rnd);
+ /*
+ * Instead of returning hash % ht->cfg.size (implying a divide)
+ * we return the high 32 bits of the (hash * ht->cfg.size) that will
+ * give results between [0 and cfg.size-1] and same hash distribution,
+ * but using a multiply, less expensive than a divide
+ */
+ return ((u64)hash * ht->cfg.size) >> 32;
+}
+
+static struct dsthash_ent *
+dsthash_find(const struct xt_hashlimit_htable *ht,
+ const struct dsthash_dst *dst)
+{
+ struct dsthash_ent *ent;
+ struct hlist_node *pos;
+ u_int32_t hash = hash_dst(ht, dst);
+
+ if (!hlist_empty(&ht->hash[hash])) {
+ hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
+ if (dst_cmp(ent, dst))
+ return ent;
+ }
+ return NULL;
+}
+
+/* allocate dsthash_ent, initialize dst, put in htable and lock it */
+static struct dsthash_ent *
+dsthash_alloc_init(struct xt_hashlimit_htable *ht,
+ const struct dsthash_dst *dst)
+{
+ struct dsthash_ent *ent;
+
+ /* initialize hash with random val at the time we allocate
+ * the first hashtable entry */
+ if (!ht->rnd_initialized) {
+ get_random_bytes(&ht->rnd, 4);
+ ht->rnd_initialized = 1;
+ }
+
+ if (ht->cfg.max && ht->count >= ht->cfg.max) {
+ /* FIXME: do something. question is what.. */
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "xt_hashlimit: max count of %u reached\n",
+ ht->cfg.max);
+ return NULL;
+ }
+
+ ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+ if (!ent) {
+ if (net_ratelimit())
+ printk(KERN_ERR
+ "xt_hashlimit: can't allocate dsthash_ent\n");
+ return NULL;
+ }
+ memcpy(&ent->dst, dst, sizeof(ent->dst));
+
+ hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+ ht->count++;
+ return ent;
+}
+
+static inline void
+dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
+{
+ hlist_del(&ent->node);
+ kmem_cache_free(hashlimit_cachep, ent);
+ ht->count--;
+}
+static void htable_gc(unsigned long htlong);
+
+static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
+{
+ struct xt_hashlimit_htable *hinfo;
+ unsigned int size;
+ unsigned int i;
+
+ if (minfo->cfg.size)
+ size = minfo->cfg.size;
+ else {
+ size = ((num_physpages << PAGE_SHIFT) / 16384) /
+ sizeof(struct list_head);
+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ size = 8192;
+ if (size < 16)
+ size = 16;
+ }
+ /* FIXME: don't use vmalloc() here or anywhere else -HW */
+ hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
+ sizeof(struct list_head) * size);
+ if (!hinfo) {
+ printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+ return -1;
+ }
+ minfo->hinfo = hinfo;
+
+ /* copy match config into hashtable config */
+ hinfo->cfg.mode = minfo->cfg.mode;
+ hinfo->cfg.avg = minfo->cfg.avg;
+ hinfo->cfg.burst = minfo->cfg.burst;
+ hinfo->cfg.max = minfo->cfg.max;
+ hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
+ hinfo->cfg.expire = minfo->cfg.expire;
+
+ if (family == NFPROTO_IPV4)
+ hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
+ else
+ hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
+
+ hinfo->cfg.size = size;
+ if (!hinfo->cfg.max)
+ hinfo->cfg.max = 8 * hinfo->cfg.size;
+ else if (hinfo->cfg.max < hinfo->cfg.size)
+ hinfo->cfg.max = hinfo->cfg.size;
+
+ for (i = 0; i < hinfo->cfg.size; i++)
+ INIT_HLIST_HEAD(&hinfo->hash[i]);
+
+ atomic_set(&hinfo->use, 1);
+ hinfo->count = 0;
+ hinfo->family = family;
+ hinfo->rnd_initialized = 0;
+ spin_lock_init(&hinfo->lock);
+ hinfo->pde = proc_create_data(minfo->name, 0,
+ (family == NFPROTO_IPV4) ?
+ hashlimit_procdir4 : hashlimit_procdir6,
+ &dl_file_ops, hinfo);
+ if (!hinfo->pde) {
+ vfree(hinfo);
+ return -1;
+ }
+
+ setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
+ hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
+ add_timer(&hinfo->timer);
+
+ spin_lock_bh(&hashlimit_lock);
+ hlist_add_head(&hinfo->node, &hashlimit_htables);
+ spin_unlock_bh(&hashlimit_lock);
+
+ return 0;
+}
+
+static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
+{
+ struct xt_hashlimit_htable *hinfo;
+ unsigned int size;
+ unsigned int i;
+
+ if (minfo->cfg.size) {
+ size = minfo->cfg.size;
+ } else {
+ size = (num_physpages << PAGE_SHIFT) / 16384 /
+ sizeof(struct list_head);
+ if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE)
+ size = 8192;
+ if (size < 16)
+ size = 16;
+ }
+ /* FIXME: don't use vmalloc() here or anywhere else -HW */
+ hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
+ sizeof(struct list_head) * size);
+ if (hinfo == NULL) {
+ printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+ return -1;
+ }
+ minfo->hinfo = hinfo;
+
+ /* copy match config into hashtable config */
+ memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+ hinfo->cfg.size = size;
+ if (hinfo->cfg.max == 0)
+ hinfo->cfg.max = 8 * hinfo->cfg.size;
+ else if (hinfo->cfg.max < hinfo->cfg.size)
+ hinfo->cfg.max = hinfo->cfg.size;
+
+ for (i = 0; i < hinfo->cfg.size; i++)
+ INIT_HLIST_HEAD(&hinfo->hash[i]);
+
+ atomic_set(&hinfo->use, 1);
+ hinfo->count = 0;
+ hinfo->family = family;
+ hinfo->rnd_initialized = 0;
+ spin_lock_init(&hinfo->lock);
+
+ hinfo->pde = proc_create_data(minfo->name, 0,
+ (family == NFPROTO_IPV4) ?
+ hashlimit_procdir4 : hashlimit_procdir6,
+ &dl_file_ops, hinfo);
+ if (hinfo->pde == NULL) {
+ vfree(hinfo);
+ return -1;
+ }
+
+ setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
+ hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
+ add_timer(&hinfo->timer);
+
+ spin_lock_bh(&hashlimit_lock);
+ hlist_add_head(&hinfo->node, &hashlimit_htables);
+ spin_unlock_bh(&hashlimit_lock);
+
+ return 0;
+}
+
+static bool select_all(const struct xt_hashlimit_htable *ht,
+ const struct dsthash_ent *he)
+{
+ return 1;
+}
+
+static bool select_gc(const struct xt_hashlimit_htable *ht,
+ const struct dsthash_ent *he)
+{
+ return time_after_eq(jiffies, he->expires);
+}
+
+static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
+ bool (*select)(const struct xt_hashlimit_htable *ht,
+ const struct dsthash_ent *he))
+{
+ unsigned int i;
+
+ /* lock hash table and iterate over it */
+ spin_lock_bh(&ht->lock);
+ for (i = 0; i < ht->cfg.size; i++) {
+ struct dsthash_ent *dh;
+ struct hlist_node *pos, *n;
+ hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
+ if ((*select)(ht, dh))
+ dsthash_free(ht, dh);
+ }
+ }
+ spin_unlock_bh(&ht->lock);
+}
+
+/* hash table garbage collector, run by timer */
+static void htable_gc(unsigned long htlong)
+{
+ struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
+
+ htable_selective_cleanup(ht, select_gc);
+
+ /* re-add the timer accordingly */
+ ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
+ add_timer(&ht->timer);
+}
+
+static void htable_destroy(struct xt_hashlimit_htable *hinfo)
+{
+ del_timer_sync(&hinfo->timer);
+
+ /* remove proc entry */
+ remove_proc_entry(hinfo->pde->name,
+ hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
+ hashlimit_procdir6);
+ htable_selective_cleanup(hinfo, select_all);
+ vfree(hinfo);
+}
+
+static struct xt_hashlimit_htable *htable_find_get(const char *name,
+ u_int8_t family)
+{
+ struct xt_hashlimit_htable *hinfo;
+ struct hlist_node *pos;
+
+ spin_lock_bh(&hashlimit_lock);
+ hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
+ if (!strcmp(name, hinfo->pde->name) &&
+ hinfo->family == family) {
+ atomic_inc(&hinfo->use);
+ spin_unlock_bh(&hashlimit_lock);
+ return hinfo;
+ }
+ }
+ spin_unlock_bh(&hashlimit_lock);
+ return NULL;
+}
+
+static void htable_put(struct xt_hashlimit_htable *hinfo)
+{
+ if (atomic_dec_and_test(&hinfo->use)) {
+ spin_lock_bh(&hashlimit_lock);
+ hlist_del(&hinfo->node);
+ spin_unlock_bh(&hashlimit_lock);
+ htable_destroy(hinfo);
+ }
+}
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+ this algorithm. The `average rate' in jiffies becomes your initial
+ amount of credit `credit' and the most credit you can ever have
+ `credit_cap'. The `peak rate' becomes the cost of passing the
+ test, `cost'.
+
+ `prev' tracks the last packet hit: you gain one credit per jiffy.
+ If you get credit balance more than this, the extra credit is
+ discarded. Every time the match passes, you lose `cost' credits;
+ if you don't have that many, the test fails.
+
+ See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+ To get the maximum range, we multiply by this factor (ie. you get N
+ credits per jiffy). We want to allow a rate as low as 1 per day
+ (slowest userspace tool allows), which means
+ CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
+*/
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+/* Precision saver. */
+static inline u_int32_t
+user2credits(u_int32_t user)
+{
+ /* If multiplying would overflow... */
+ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+ /* Divide first. */
+ return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+ return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
+}
+
+static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
+{
+ dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
+ if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
+ dh->rateinfo.credit = dh->rateinfo.credit_cap;
+ dh->rateinfo.prev = now;
+}
+
+static inline __be32 maskl(__be32 a, unsigned int l)
+{
+ return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
+{
+ switch (p) {
+ case 0 ... 31:
+ i[0] = maskl(i[0], p);
+ i[1] = i[2] = i[3] = 0;
+ break;
+ case 32 ... 63:
+ i[1] = maskl(i[1], p - 32);
+ i[2] = i[3] = 0;
+ break;
+ case 64 ... 95:
+ i[2] = maskl(i[2], p - 64);
+ i[3] = 0;
+ case 96 ... 127:
+ i[3] = maskl(i[3], p - 96);
+ break;
+ case 128:
+ break;
+ }
+}
+#endif
+
+static int
+hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
+ struct dsthash_dst *dst,
+ const struct sk_buff *skb, unsigned int protoff)
+{
+ __be16 _ports[2], *ports;
+ u8 nexthdr;
+
+ memset(dst, 0, sizeof(*dst));
+
+ switch (hinfo->family) {
+ case NFPROTO_IPV4:
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
+ dst->ip.dst = maskl(ip_hdr(skb)->daddr,
+ hinfo->cfg.dstmask);
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
+ dst->ip.src = maskl(ip_hdr(skb)->saddr,
+ hinfo->cfg.srcmask);
+
+ if (!(hinfo->cfg.mode &
+ (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+ return 0;
+ nexthdr = ip_hdr(skb)->protocol;
+ break;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ case NFPROTO_IPV6:
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
+ memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
+ sizeof(dst->ip6.dst));
+ hashlimit_ipv6_mask(dst->ip6.dst, hinfo->cfg.dstmask);
+ }
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) {
+ memcpy(&dst->ip6.src, &ipv6_hdr(skb)->saddr,
+ sizeof(dst->ip6.src));
+ hashlimit_ipv6_mask(dst->ip6.src, hinfo->cfg.srcmask);
+ }
+
+ if (!(hinfo->cfg.mode &
+ (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+ return 0;
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ if ((int)protoff < 0)
+ return -1;
+ break;
+#endif
+ default:
+ BUG();
+ return 0;
+ }
+
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ ports = skb_header_pointer(skb, protoff, sizeof(_ports),
+ &_ports);
+ break;
+ default:
+ _ports[0] = _ports[1] = 0;
+ ports = _ports;
+ break;
+ }
+ if (!ports)
+ return -1;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT)
+ dst->src_port = ports[0];
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT)
+ dst->dst_port = ports[1];
+ return 0;
+}
+
+static bool
+hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_hashlimit_info *r =
+ ((const struct xt_hashlimit_info *)par->matchinfo)->u.master;
+ struct xt_hashlimit_htable *hinfo = r->hinfo;
+ unsigned long now = jiffies;
+ struct dsthash_ent *dh;
+ struct dsthash_dst dst;
+
+ if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
+ goto hotdrop;
+
+ spin_lock_bh(&hinfo->lock);
+ dh = dsthash_find(hinfo, &dst);
+ if (!dh) {
+ dh = dsthash_alloc_init(hinfo, &dst);
+ if (!dh) {
+ spin_unlock_bh(&hinfo->lock);
+ goto hotdrop;
+ }
+
+ dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+ dh->rateinfo.prev = jiffies;
+ dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+ } else {
+ /* update expiration timeout */
+ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_recalc(dh, now);
+ }
+
+ if (dh->rateinfo.credit >= dh->rateinfo.cost) {
+ /* We're underlimit. */
+ dh->rateinfo.credit -= dh->rateinfo.cost;
+ spin_unlock_bh(&hinfo->lock);
+ return true;
+ }
+
+ spin_unlock_bh(&hinfo->lock);
+
+ /* default case: we're overlimit, thus don't match */
+ return false;
+
+hotdrop:
+ *par->hotdrop = true;
+ return false;
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
+ unsigned long now = jiffies;
+ struct dsthash_ent *dh;
+ struct dsthash_dst dst;
+
+ if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
+ goto hotdrop;
+
+ spin_lock_bh(&hinfo->lock);
+ dh = dsthash_find(hinfo, &dst);
+ if (dh == NULL) {
+ dh = dsthash_alloc_init(hinfo, &dst);
+ if (dh == NULL) {
+ spin_unlock_bh(&hinfo->lock);
+ goto hotdrop;
+ }
+
+ dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+ dh->rateinfo.prev = jiffies;
+ dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+ } else {
+ /* update expiration timeout */
+ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_recalc(dh, now);
+ }
+
+ if (dh->rateinfo.credit >= dh->rateinfo.cost) {
+ /* below the limit */
+ dh->rateinfo.credit -= dh->rateinfo.cost;
+ spin_unlock_bh(&hinfo->lock);
+ return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
+ }
+
+ spin_unlock_bh(&hinfo->lock);
+ /* default match is underlimit - so over the limit, we need to invert */
+ return info->cfg.mode & XT_HASHLIMIT_INVERT;
+
+ hotdrop:
+ *par->hotdrop = true;
+ return false;
+}
+
+static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_info *r = par->matchinfo;
+
+ /* Check for overflow. */
+ if (r->cfg.burst == 0 ||
+ user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
+ printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
+ r->cfg.avg, r->cfg.burst);
+ return false;
+ }
+ if (r->cfg.mode == 0 ||
+ r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
+ XT_HASHLIMIT_HASH_DIP |
+ XT_HASHLIMIT_HASH_SIP |
+ XT_HASHLIMIT_HASH_SPT))
+ return false;
+ if (!r->cfg.gc_interval)
+ return false;
+ if (!r->cfg.expire)
+ return false;
+ if (r->name[sizeof(r->name) - 1] != '\0')
+ return false;
+
+ /* This is the best we've got: We cannot release and re-grab lock,
+ * since checkentry() is called before x_tables.c grabs xt_mutex.
+ * We also cannot grab the hashtable spinlock, since htable_create will
+ * call vmalloc, and that can sleep. And we cannot just re-search
+ * the list of htable's in htable_create(), since then we would
+ * create duplicate proc files. -HW */
+ mutex_lock(&hlimit_mutex);
+ r->hinfo = htable_find_get(r->name, par->match->family);
+ if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
+ mutex_unlock(&hlimit_mutex);
+ return false;
+ }
+ mutex_unlock(&hlimit_mutex);
+
+ /* Ugly hack: For SMP, we only want to use one set */
+ r->u.master = r;
+ return true;
+}
+
+static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+
+ /* Check for overflow. */
+ if (info->cfg.burst == 0 ||
+ user2credits(info->cfg.avg * info->cfg.burst) <
+ user2credits(info->cfg.avg)) {
+ printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
+ info->cfg.avg, info->cfg.burst);
+ return false;
+ }
+ if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
+ return false;
+ if (info->name[sizeof(info->name)-1] != '\0')
+ return false;
+ if (par->match->family == NFPROTO_IPV4) {
+ if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
+ return false;
+ } else {
+ if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
+ return false;
+ }
+
+ /* This is the best we've got: We cannot release and re-grab lock,
+ * since checkentry() is called before x_tables.c grabs xt_mutex.
+ * We also cannot grab the hashtable spinlock, since htable_create will
+ * call vmalloc, and that can sleep. And we cannot just re-search
+ * the list of htable's in htable_create(), since then we would
+ * create duplicate proc files. -HW */
+ mutex_lock(&hlimit_mutex);
+ info->hinfo = htable_find_get(info->name, par->match->family);
+ if (!info->hinfo && htable_create(info, par->match->family) != 0) {
+ mutex_unlock(&hlimit_mutex);
+ return false;
+ }
+ mutex_unlock(&hlimit_mutex);
+ return true;
+}
+
+static void
+hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_info *r = par->matchinfo;
+
+ htable_put(r->hinfo);
+}
+
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+
+ htable_put(info->hinfo);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_hashlimit_info {
+ char name[IFNAMSIZ];
+ struct hashlimit_cfg cfg;
+ compat_uptr_t hinfo;
+ compat_uptr_t master;
+};
+
+static void hashlimit_mt_compat_from_user(void *dst, void *src)
+{
+ int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
+
+ memcpy(dst, src, off);
+ memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
+}
+
+static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
+{
+ int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
+
+ return copy_to_user(dst, src, off) ? -EFAULT : 0;
+}
+#endif
+
+static struct xt_match hashlimit_mt_reg[] __read_mostly = {
+ {
+ .name = "hashlimit",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt_v0,
+ .matchsize = sizeof(struct xt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_hashlimit_info),
+ .compat_from_user = hashlimit_mt_compat_from_user,
+ .compat_to_user = hashlimit_mt_compat_to_user,
+#endif
+ .checkentry = hashlimit_mt_check_v0,
+ .destroy = hashlimit_mt_destroy_v0,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "hashlimit",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .checkentry = hashlimit_mt_check,
+ .destroy = hashlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ {
+ .name = "hashlimit",
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt_v0,
+ .matchsize = sizeof(struct xt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_hashlimit_info),
+ .compat_from_user = hashlimit_mt_compat_from_user,
+ .compat_to_user = hashlimit_mt_compat_to_user,
+#endif
+ .checkentry = hashlimit_mt_check_v0,
+ .destroy = hashlimit_mt_destroy_v0,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "hashlimit",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .checkentry = hashlimit_mt_check,
+ .destroy = hashlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
+};
+
+/* PROC stuff */
+static void *dl_seq_start(struct seq_file *s, loff_t *pos)
+ __acquires(htable->lock)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct xt_hashlimit_htable *htable = pde->data;
+ unsigned int *bucket;
+
+ spin_lock_bh(&htable->lock);
+ if (*pos >= htable->cfg.size)
+ return NULL;
+
+ bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
+ if (!bucket)
+ return ERR_PTR(-ENOMEM);
+
+ *bucket = *pos;
+ return bucket;
+}
+
+static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct xt_hashlimit_htable *htable = pde->data;
+ unsigned int *bucket = (unsigned int *)v;
+
+ *pos = ++(*bucket);
+ if (*pos >= htable->cfg.size) {
+ kfree(v);
+ return NULL;
+ }
+ return bucket;
+}
+
+static void dl_seq_stop(struct seq_file *s, void *v)
+ __releases(htable->lock)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct xt_hashlimit_htable *htable = pde->data;
+ unsigned int *bucket = (unsigned int *)v;
+
+ kfree(bucket);
+ spin_unlock_bh(&htable->lock);
+}
+
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies);
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
+ "%u.%u.%u.%u:%u %u %u %u\n",
+ (long)(ent->expires - jiffies)/HZ,
+ NIPQUAD(ent->dst.ip.src),
+ ntohs(ent->dst.src_port),
+ NIPQUAD(ent->dst.ip.dst),
+ ntohs(ent->dst.dst_port),
+ ent->rateinfo.credit, ent->rateinfo.credit_cap,
+ ent->rateinfo.cost);
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ case NFPROTO_IPV6:
+ return seq_printf(s, "%ld " NIP6_FMT ":%u->"
+ NIP6_FMT ":%u %u %u %u\n",
+ (long)(ent->expires - jiffies)/HZ,
+ NIP6(*(struct in6_addr *)&ent->dst.ip6.src),
+ ntohs(ent->dst.src_port),
+ NIP6(*(struct in6_addr *)&ent->dst.ip6.dst),
+ ntohs(ent->dst.dst_port),
+ ent->rateinfo.credit, ent->rateinfo.credit_cap,
+ ent->rateinfo.cost);
+#endif
+ default:
+ BUG();
+ return 0;
+ }
+}
+
+static int dl_seq_show(struct seq_file *s, void *v)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct xt_hashlimit_htable *htable = pde->data;
+ unsigned int *bucket = (unsigned int *)v;
+ struct dsthash_ent *ent;
+ struct hlist_node *pos;
+
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
+ if (dl_seq_real_show(ent, htable->family, s))
+ return 1;
+ }
+ return 0;
+}
+
+static const struct seq_operations dl_seq_ops = {
+ .start = dl_seq_start,
+ .next = dl_seq_next,
+ .stop = dl_seq_stop,
+ .show = dl_seq_show
+};
+
+static int dl_proc_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &dl_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ sf->private = PDE(inode);
+ }
+ return ret;
+}
+
+static const struct file_operations dl_file_ops = {
+ .owner = THIS_MODULE,
+ .open = dl_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static int __init hashlimit_mt_init(void)
+{
+ int err;
+
+ err = xt_register_matches(hashlimit_mt_reg,
+ ARRAY_SIZE(hashlimit_mt_reg));
+ if (err < 0)
+ goto err1;
+
+ err = -ENOMEM;
+ hashlimit_cachep = kmem_cache_create("xt_hashlimit",
+ sizeof(struct dsthash_ent), 0, 0,
+ NULL);
+ if (!hashlimit_cachep) {
+ printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+ goto err2;
+ }
+ hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
+ if (!hashlimit_procdir4) {
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ "entry\n");
+ goto err3;
+ }
+ err = 0;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
+ if (!hashlimit_procdir6) {
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ "entry\n");
+ err = -ENOMEM;
+ }
+#endif
+ if (!err)
+ return 0;
+ remove_proc_entry("ipt_hashlimit", init_net.proc_net);
+err3:
+ kmem_cache_destroy(hashlimit_cachep);
+err2:
+ xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+err1:
+ return err;
+
+}
+
+static void __exit hashlimit_mt_exit(void)
+{
+ remove_proc_entry("ipt_hashlimit", init_net.proc_net);
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
+#endif
+ kmem_cache_destroy(hashlimit_cachep);
+ xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+}
+
+module_init(hashlimit_mt_init);
+module_exit(hashlimit_mt_exit);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
new file mode 100644
index 0000000..64fc7f2
--- /dev/null
+++ b/net/netfilter/xt_helper.c
@@ -0,0 +1,97 @@
+/* iptables module to match on related connections */
+/*
+ * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Related connection matching");
+MODULE_ALIAS("ipt_helper");
+MODULE_ALIAS("ip6t_helper");
+
+
+static bool
+helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_helper_info *info = par->matchinfo;
+ const struct nf_conn *ct;
+ const struct nf_conn_help *master_help;
+ const struct nf_conntrack_helper *helper;
+ enum ip_conntrack_info ctinfo;
+ bool ret = info->invert;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || !ct->master)
+ return ret;
+
+ master_help = nfct_help(ct->master);
+ if (!master_help)
+ return ret;
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(master_help->helper);
+ if (!helper)
+ return ret;
+
+ if (info->name[0] == '\0')
+ ret = !ret;
+ else
+ ret ^= !strncmp(helper->name, info->name,
+ strlen(helper->name));
+ return ret;
+}
+
+static bool helper_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_helper_info *info = par->matchinfo;
+
+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->family);
+ return false;
+ }
+ info->name[29] = '\0';
+ return true;
+}
+
+static void helper_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_match helper_mt_reg __read_mostly = {
+ .name = "helper",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = helper_mt_check,
+ .match = helper_mt,
+ .destroy = helper_mt_destroy,
+ .matchsize = sizeof(struct xt_helper_info),
+ .me = THIS_MODULE,
+};
+
+static int __init helper_mt_init(void)
+{
+ return xt_register_match(&helper_mt_reg);
+}
+
+static void __exit helper_mt_exit(void)
+{
+ xt_unregister_match(&helper_mt_reg);
+}
+
+module_init(helper_mt_init);
+module_exit(helper_mt_exit);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
new file mode 100644
index 0000000..7ac54ea
--- /dev/null
+++ b/net/netfilter/xt_iprange.c
@@ -0,0 +1,174 @@
+/*
+ * xt_iprange - Netfilter module to match IP address ranges
+ *
+ * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) CC Computer Consultants GmbH, 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_iprange.h>
+#include <linux/netfilter_ipv4/ipt_iprange.h>
+
+static bool
+iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_iprange_info *info = par->matchinfo;
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (info->flags & IPRANGE_SRC) {
+ if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
+ || ntohl(iph->saddr) > ntohl(info->src.max_ip))
+ ^ !!(info->flags & IPRANGE_SRC_INV)) {
+ pr_debug("src IP %u.%u.%u.%u NOT in range %s"
+ "%u.%u.%u.%u-%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr),
+ info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
+ NIPQUAD(info->src.min_ip),
+ NIPQUAD(info->src.max_ip));
+ return false;
+ }
+ }
+ if (info->flags & IPRANGE_DST) {
+ if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
+ || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
+ ^ !!(info->flags & IPRANGE_DST_INV)) {
+ pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
+ "%u.%u.%u.%u-%u.%u.%u.%u\n",
+ NIPQUAD(iph->daddr),
+ info->flags & IPRANGE_DST_INV ? "(INV) " : "",
+ NIPQUAD(info->dst.min_ip),
+ NIPQUAD(info->dst.max_ip));
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_iprange_mtinfo *info = par->matchinfo;
+ const struct iphdr *iph = ip_hdr(skb);
+ bool m;
+
+ if (info->flags & IPRANGE_SRC) {
+ m = ntohl(iph->saddr) < ntohl(info->src_min.ip);
+ m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
+ m ^= !!(info->flags & IPRANGE_SRC_INV);
+ if (m) {
+ pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
+ NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
+ NIPQUAD(iph->saddr),
+ (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+ NIPQUAD(info->src_max.ip),
+ NIPQUAD(info->src_max.ip));
+ return false;
+ }
+ }
+ if (info->flags & IPRANGE_DST) {
+ m = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
+ m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
+ m ^= !!(info->flags & IPRANGE_DST_INV);
+ if (m) {
+ pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
+ NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
+ NIPQUAD(iph->daddr),
+ (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+ NIPQUAD(info->dst_min.ip),
+ NIPQUAD(info->dst_max.ip));
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline int
+iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < 4; ++i) {
+ r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool
+iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_iprange_mtinfo *info = par->matchinfo;
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ bool m;
+
+ if (info->flags & IPRANGE_SRC) {
+ m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
+ m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
+ m ^= !!(info->flags & IPRANGE_SRC_INV);
+ if (m)
+ return false;
+ }
+ if (info->flags & IPRANGE_DST) {
+ m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
+ m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
+ m ^= !!(info->flags & IPRANGE_DST_INV);
+ if (m)
+ return false;
+ }
+ return true;
+}
+
+static struct xt_match iprange_mt_reg[] __read_mostly = {
+ {
+ .name = "iprange",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = iprange_mt_v0,
+ .matchsize = sizeof(struct ipt_iprange_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "iprange",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .match = iprange_mt4,
+ .matchsize = sizeof(struct xt_iprange_mtinfo),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "iprange",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .match = iprange_mt6,
+ .matchsize = sizeof(struct xt_iprange_mtinfo),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init iprange_mt_init(void)
+{
+ return xt_register_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
+}
+
+static void __exit iprange_mt_exit(void)
+{
+ xt_unregister_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
+}
+
+module_init(iprange_mt_init);
+module_exit(iprange_mt_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
+MODULE_ALIAS("ipt_iprange");
+MODULE_ALIAS("ip6t_iprange");
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
new file mode 100644
index 0000000..c4871ca
--- /dev/null
+++ b/net/netfilter/xt_length.c
@@ -0,0 +1,70 @@
+/* Kernel module to match packet length. */
+/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+
+#include <linux/netfilter/xt_length.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Xtables: Packet length (Layer3,4,5) match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_length");
+MODULE_ALIAS("ip6t_length");
+
+static bool
+length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_length_info *info = par->matchinfo;
+ u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
+
+ return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static bool
+length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_length_info *info = par->matchinfo;
+ const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+
+ return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static struct xt_match length_mt_reg[] __read_mostly = {
+ {
+ .name = "length",
+ .family = NFPROTO_IPV4,
+ .match = length_mt,
+ .matchsize = sizeof(struct xt_length_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "length",
+ .family = NFPROTO_IPV6,
+ .match = length_mt6,
+ .matchsize = sizeof(struct xt_length_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init length_mt_init(void)
+{
+ return xt_register_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
+}
+
+static void __exit length_mt_exit(void)
+{
+ xt_unregister_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
+}
+
+module_init(length_mt_init);
+module_exit(length_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
new file mode 100644
index 0000000..c908d69
--- /dev/null
+++ b/net/netfilter/xt_limit.c
@@ -0,0 +1,190 @@
+/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
+ * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_limit.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+MODULE_DESCRIPTION("Xtables: rate-limit match");
+MODULE_ALIAS("ipt_limit");
+MODULE_ALIAS("ip6t_limit");
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+static DEFINE_SPINLOCK(limit_lock);
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+ this algorithm. The `average rate' in jiffies becomes your initial
+ amount of credit `credit' and the most credit you can ever have
+ `credit_cap'. The `peak rate' becomes the cost of passing the
+ test, `cost'.
+
+ `prev' tracks the last packet hit: you gain one credit per jiffy.
+ If you get credit balance more than this, the extra credit is
+ discarded. Every time the match passes, you lose `cost' credits;
+ if you don't have that many, the test fails.
+
+ See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+ To get the maxmum range, we multiply by this factor (ie. you get N
+ credits per jiffy). We want to allow a rate as low as 1 per day
+ (slowest userspace tool allows), which means
+ CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+static bool
+limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ struct xt_rateinfo *r =
+ ((const struct xt_rateinfo *)par->matchinfo)->master;
+ unsigned long now = jiffies;
+
+ spin_lock_bh(&limit_lock);
+ r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
+ if (r->credit > r->credit_cap)
+ r->credit = r->credit_cap;
+
+ if (r->credit >= r->cost) {
+ /* We're not limited. */
+ r->credit -= r->cost;
+ spin_unlock_bh(&limit_lock);
+ return true;
+ }
+
+ spin_unlock_bh(&limit_lock);
+ return false;
+}
+
+/* Precision saver. */
+static u_int32_t
+user2credits(u_int32_t user)
+{
+ /* If multiplying would overflow... */
+ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+ /* Divide first. */
+ return (user / XT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+ return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
+}
+
+static bool limit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_rateinfo *r = par->matchinfo;
+
+ /* Check for overflow. */
+ if (r->burst == 0
+ || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+ printk("Overflow in xt_limit, try lower: %u/%u\n",
+ r->avg, r->burst);
+ return false;
+ }
+
+ /* For SMP, we only want to use one set of counters. */
+ r->master = r;
+ if (r->cost == 0) {
+ /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+ 128. */
+ r->prev = jiffies;
+ r->credit = user2credits(r->avg * r->burst); /* Credits full. */
+ r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+ r->cost = user2credits(r->avg);
+ }
+ return true;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+ u_int32_t avg;
+ u_int32_t burst;
+
+ compat_ulong_t prev;
+ u_int32_t credit;
+ u_int32_t credit_cap, cost;
+
+ u_int32_t master;
+};
+
+/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
+ * master pointer, which does not need to be preserved. */
+static void limit_mt_compat_from_user(void *dst, void *src)
+{
+ const struct compat_xt_rateinfo *cm = src;
+ struct xt_rateinfo m = {
+ .avg = cm->avg,
+ .burst = cm->burst,
+ .prev = cm->prev | (unsigned long)cm->master << 32,
+ .credit = cm->credit,
+ .credit_cap = cm->credit_cap,
+ .cost = cm->cost,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int limit_mt_compat_to_user(void __user *dst, void *src)
+{
+ const struct xt_rateinfo *m = src;
+ struct compat_xt_rateinfo cm = {
+ .avg = m->avg,
+ .burst = m->burst,
+ .prev = m->prev,
+ .credit = m->credit,
+ .credit_cap = m->credit_cap,
+ .cost = m->cost,
+ .master = m->prev >> 32,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match limit_mt_reg __read_mostly = {
+ .name = "limit",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = limit_mt,
+ .checkentry = limit_mt_check,
+ .matchsize = sizeof(struct xt_rateinfo),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_rateinfo),
+ .compat_from_user = limit_mt_compat_from_user,
+ .compat_to_user = limit_mt_compat_to_user,
+#endif
+ .me = THIS_MODULE,
+};
+
+static int __init limit_mt_init(void)
+{
+ return xt_register_match(&limit_mt_reg);
+}
+
+static void __exit limit_mt_exit(void)
+{
+ xt_unregister_match(&limit_mt_reg);
+}
+
+module_init(limit_mt_init);
+module_exit(limit_mt_exit);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
new file mode 100644
index 0000000..c200711
--- /dev/null
+++ b/net/netfilter/xt_mac.c
@@ -0,0 +1,61 @@
+/* Kernel module to match MAC address parameters. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/xt_mac.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: MAC address match");
+MODULE_ALIAS("ipt_mac");
+MODULE_ALIAS("ip6t_mac");
+
+static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_mac_info *info = par->matchinfo;
+
+ /* Is mac pointer valid? */
+ return skb_mac_header(skb) >= skb->head &&
+ skb_mac_header(skb) + ETH_HLEN <= skb->data
+ /* If so, compare... */
+ && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
+ ^ info->invert);
+}
+
+static struct xt_match mac_mt_reg __read_mostly = {
+ .name = "mac",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = mac_mt,
+ .matchsize = sizeof(struct xt_mac_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD),
+ .me = THIS_MODULE,
+};
+
+static int __init mac_mt_init(void)
+{
+ return xt_register_match(&mac_mt_reg);
+}
+
+static void __exit mac_mt_exit(void)
+{
+ xt_unregister_match(&mac_mt_reg);
+}
+
+module_init(mac_mt_init);
+module_exit(mac_mt_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
new file mode 100644
index 0000000..10b9e34
--- /dev/null
+++ b/net/netfilter/xt_mark.c
@@ -0,0 +1,119 @@
+/*
+ * xt_mark - Netfilter module to match NFMARK value
+ *
+ * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ * Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/xt_mark.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: packet mark match");
+MODULE_ALIAS("ipt_mark");
+MODULE_ALIAS("ip6t_mark");
+
+static bool
+mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_mark_info *info = par->matchinfo;
+
+ return ((skb->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static bool
+mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_mark_mtinfo1 *info = par->matchinfo;
+
+ return ((skb->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static bool mark_mt_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct xt_mark_info *minfo = par->matchinfo;
+
+ if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
+ printk(KERN_WARNING "mark: only supports 32bit mark\n");
+ return false;
+ }
+ return true;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_info {
+ compat_ulong_t mark, mask;
+ u_int8_t invert;
+ u_int8_t __pad1;
+ u_int16_t __pad2;
+};
+
+static void mark_mt_compat_from_user_v0(void *dst, void *src)
+{
+ const struct compat_xt_mark_info *cm = src;
+ struct xt_mark_info m = {
+ .mark = cm->mark,
+ .mask = cm->mask,
+ .invert = cm->invert,
+ };
+ memcpy(dst, &m, sizeof(m));
+}
+
+static int mark_mt_compat_to_user_v0(void __user *dst, void *src)
+{
+ const struct xt_mark_info *m = src;
+ struct compat_xt_mark_info cm = {
+ .mark = m->mark,
+ .mask = m->mask,
+ .invert = m->invert,
+ };
+ return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match mark_mt_reg[] __read_mostly = {
+ {
+ .name = "mark",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = mark_mt_check_v0,
+ .match = mark_mt_v0,
+ .matchsize = sizeof(struct xt_mark_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_mark_info),
+ .compat_from_user = mark_mt_compat_from_user_v0,
+ .compat_to_user = mark_mt_compat_to_user_v0,
+#endif
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "mark",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .match = mark_mt,
+ .matchsize = sizeof(struct xt_mark_mtinfo1),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init mark_mt_init(void)
+{
+ return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
+}
+
+static void __exit mark_mt_exit(void)
+{
+ xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
+}
+
+module_init(mark_mt_init);
+module_exit(mark_mt_exit);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
new file mode 100644
index 0000000..d06bb2d
--- /dev/null
+++ b/net/netfilter/xt_multiport.c
@@ -0,0 +1,248 @@
+/* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports:
+ ports are in the same place so we can treat them as equal. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+
+#include <linux/netfilter/xt_multiport.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP");
+MODULE_ALIAS("ipt_multiport");
+MODULE_ALIAS("ip6t_multiport");
+
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline bool
+ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
+ u_int8_t count, u_int16_t src, u_int16_t dst)
+{
+ unsigned int i;
+ for (i = 0; i < count; i++) {
+ if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src)
+ return true;
+
+ if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst)
+ return true;
+ }
+
+ return false;
+}
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline bool
+ports_match_v1(const struct xt_multiport_v1 *minfo,
+ u_int16_t src, u_int16_t dst)
+{
+ unsigned int i;
+ u_int16_t s, e;
+
+ for (i = 0; i < minfo->count; i++) {
+ s = minfo->ports[i];
+
+ if (minfo->pflags[i]) {
+ /* range port matching */
+ e = minfo->ports[++i];
+ duprintf("src or dst matches with %d-%d?\n", s, e);
+
+ if (minfo->flags == XT_MULTIPORT_SOURCE
+ && src >= s && src <= e)
+ return true ^ minfo->invert;
+ if (minfo->flags == XT_MULTIPORT_DESTINATION
+ && dst >= s && dst <= e)
+ return true ^ minfo->invert;
+ if (minfo->flags == XT_MULTIPORT_EITHER
+ && ((dst >= s && dst <= e)
+ || (src >= s && src <= e)))
+ return true ^ minfo->invert;
+ } else {
+ /* exact port matching */
+ duprintf("src or dst matches with %d?\n", s);
+
+ if (minfo->flags == XT_MULTIPORT_SOURCE
+ && src == s)
+ return true ^ minfo->invert;
+ if (minfo->flags == XT_MULTIPORT_DESTINATION
+ && dst == s)
+ return true ^ minfo->invert;
+ if (minfo->flags == XT_MULTIPORT_EITHER
+ && (src == s || dst == s))
+ return true ^ minfo->invert;
+ }
+ }
+
+ return minfo->invert;
+}
+
+static bool
+multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const __be16 *pptr;
+ __be16 _ports[2];
+ const struct xt_multiport *multiinfo = par->matchinfo;
+
+ if (par->fragoff != 0)
+ return false;
+
+ pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
+ if (pptr == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return ports_match_v0(multiinfo->ports, multiinfo->flags,
+ multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+static bool
+multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const __be16 *pptr;
+ __be16 _ports[2];
+ const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+ if (par->fragoff != 0)
+ return false;
+
+ pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
+ if (pptr == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+static inline bool
+check(u_int16_t proto,
+ u_int8_t ip_invflags,
+ u_int8_t match_flags,
+ u_int8_t count)
+{
+ /* Must specify supported protocol, no unknown flags or bad count */
+ return (proto == IPPROTO_TCP || proto == IPPROTO_UDP
+ || proto == IPPROTO_UDPLITE
+ || proto == IPPROTO_SCTP || proto == IPPROTO_DCCP)
+ && !(ip_invflags & XT_INV_PROTO)
+ && (match_flags == XT_MULTIPORT_SOURCE
+ || match_flags == XT_MULTIPORT_DESTINATION
+ || match_flags == XT_MULTIPORT_EITHER)
+ && count <= XT_MULTI_PORTS;
+}
+
+static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct ipt_ip *ip = par->entryinfo;
+ const struct xt_multiport *multiinfo = par->matchinfo;
+
+ return check(ip->proto, ip->invflags, multiinfo->flags,
+ multiinfo->count);
+}
+
+static bool multiport_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct ipt_ip *ip = par->entryinfo;
+ const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+ return check(ip->proto, ip->invflags, multiinfo->flags,
+ multiinfo->count);
+}
+
+static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_ip6 *ip = par->entryinfo;
+ const struct xt_multiport *multiinfo = par->matchinfo;
+
+ return check(ip->proto, ip->invflags, multiinfo->flags,
+ multiinfo->count);
+}
+
+static bool multiport_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_ip6 *ip = par->entryinfo;
+ const struct xt_multiport_v1 *multiinfo = par->matchinfo;
+
+ return check(ip->proto, ip->invflags, multiinfo->flags,
+ multiinfo->count);
+}
+
+static struct xt_match multiport_mt_reg[] __read_mostly = {
+ {
+ .name = "multiport",
+ .family = NFPROTO_IPV4,
+ .revision = 0,
+ .checkentry = multiport_mt_check_v0,
+ .match = multiport_mt_v0,
+ .matchsize = sizeof(struct xt_multiport),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = NFPROTO_IPV4,
+ .revision = 1,
+ .checkentry = multiport_mt_check,
+ .match = multiport_mt,
+ .matchsize = sizeof(struct xt_multiport_v1),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = NFPROTO_IPV6,
+ .revision = 0,
+ .checkentry = multiport_mt6_check_v0,
+ .match = multiport_mt_v0,
+ .matchsize = sizeof(struct xt_multiport),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "multiport",
+ .family = NFPROTO_IPV6,
+ .revision = 1,
+ .checkentry = multiport_mt6_check,
+ .match = multiport_mt,
+ .matchsize = sizeof(struct xt_multiport_v1),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init multiport_mt_init(void)
+{
+ return xt_register_matches(multiport_mt_reg,
+ ARRAY_SIZE(multiport_mt_reg));
+}
+
+static void __exit multiport_mt_exit(void)
+{
+ xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg));
+}
+
+module_init(multiport_mt_init);
+module_exit(multiport_mt_exit);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
new file mode 100644
index 0000000..f19ebd9
--- /dev/null
+++ b/net/netfilter/xt_owner.c
@@ -0,0 +1,188 @@
+/*
+ * Kernel module to match various things tied to sockets associated with
+ * locally generated outgoing packets.
+ *
+ * (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ * <jengelh@computergmbh.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_owner.h>
+#include <linux/netfilter_ipv4/ipt_owner.h>
+#include <linux/netfilter_ipv6/ip6t_owner.h>
+
+static bool
+owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_owner_info *info = par->matchinfo;
+ const struct file *filp;
+
+ if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ return false;
+
+ filp = skb->sk->sk_socket->file;
+ if (filp == NULL)
+ return false;
+
+ if (info->match & IPT_OWNER_UID)
+ if ((filp->f_uid != info->uid) ^
+ !!(info->invert & IPT_OWNER_UID))
+ return false;
+
+ if (info->match & IPT_OWNER_GID)
+ if ((filp->f_gid != info->gid) ^
+ !!(info->invert & IPT_OWNER_GID))
+ return false;
+
+ return true;
+}
+
+static bool
+owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ip6t_owner_info *info = par->matchinfo;
+ const struct file *filp;
+
+ if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ return false;
+
+ filp = skb->sk->sk_socket->file;
+ if (filp == NULL)
+ return false;
+
+ if (info->match & IP6T_OWNER_UID)
+ if ((filp->f_uid != info->uid) ^
+ !!(info->invert & IP6T_OWNER_UID))
+ return false;
+
+ if (info->match & IP6T_OWNER_GID)
+ if ((filp->f_gid != info->gid) ^
+ !!(info->invert & IP6T_OWNER_GID))
+ return false;
+
+ return true;
+}
+
+static bool
+owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_owner_match_info *info = par->matchinfo;
+ const struct file *filp;
+
+ if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ return (info->match ^ info->invert) == 0;
+ else if (info->match & info->invert & XT_OWNER_SOCKET)
+ /*
+ * Socket exists but user wanted ! --socket-exists.
+ * (Single ampersands intended.)
+ */
+ return false;
+
+ filp = skb->sk->sk_socket->file;
+ if (filp == NULL)
+ return ((info->match ^ info->invert) &
+ (XT_OWNER_UID | XT_OWNER_GID)) == 0;
+
+ if (info->match & XT_OWNER_UID)
+ if ((filp->f_uid >= info->uid_min &&
+ filp->f_uid <= info->uid_max) ^
+ !(info->invert & XT_OWNER_UID))
+ return false;
+
+ if (info->match & XT_OWNER_GID)
+ if ((filp->f_gid >= info->gid_min &&
+ filp->f_gid <= info->gid_max) ^
+ !(info->invert & XT_OWNER_GID))
+ return false;
+
+ return true;
+}
+
+static bool owner_mt_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct ipt_owner_info *info = par->matchinfo;
+
+ if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
+ printk(KERN_WARNING KBUILD_MODNAME
+ ": PID, SID and command matching is not "
+ "supported anymore\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool owner_mt6_check_v0(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_owner_info *info = par->matchinfo;
+
+ if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
+ printk(KERN_WARNING KBUILD_MODNAME
+ ": PID and SID matching is not supported anymore\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct xt_match owner_mt_reg[] __read_mostly = {
+ {
+ .name = "owner",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = owner_mt_v0,
+ .matchsize = sizeof(struct ipt_owner_info),
+ .checkentry = owner_mt_check_v0,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "owner",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .match = owner_mt6_v0,
+ .matchsize = sizeof(struct ip6t_owner_info),
+ .checkentry = owner_mt6_check_v0,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "owner",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .match = owner_mt,
+ .matchsize = sizeof(struct xt_owner_match_info),
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init owner_mt_init(void)
+{
+ return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+}
+
+static void __exit owner_mt_exit(void)
+{
+ xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+}
+
+module_init(owner_mt_init);
+module_exit(owner_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: socket owner matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
new file mode 100644
index 0000000..1bcdfc1
--- /dev/null
+++ b/net/netfilter/xt_physdev.c
@@ -0,0 +1,136 @@
+/* Kernel module to match the bridge port in and
+ * out device for IP packets coming into contact with a bridge. */
+
+/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter/xt_physdev.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Xtables: Bridge physical device match");
+MODULE_ALIAS("ipt_physdev");
+MODULE_ALIAS("ip6t_physdev");
+
+static bool
+physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ int i;
+ static const char nulldevname[IFNAMSIZ];
+ const struct xt_physdev_info *info = par->matchinfo;
+ bool ret;
+ const char *indev, *outdev;
+ const struct nf_bridge_info *nf_bridge;
+
+ /* Not a bridged IP packet or no info available yet:
+ * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
+ * the destination device will be a bridge. */
+ if (!(nf_bridge = skb->nf_bridge)) {
+ /* Return MATCH if the invert flags of the used options are on */
+ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
+ !(info->invert & XT_PHYSDEV_OP_BRIDGED))
+ return false;
+ if ((info->bitmask & XT_PHYSDEV_OP_ISIN) &&
+ !(info->invert & XT_PHYSDEV_OP_ISIN))
+ return false;
+ if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) &&
+ !(info->invert & XT_PHYSDEV_OP_ISOUT))
+ return false;
+ if ((info->bitmask & XT_PHYSDEV_OP_IN) &&
+ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+ if ((info->bitmask & XT_PHYSDEV_OP_OUT) &&
+ !(info->invert & XT_PHYSDEV_OP_OUT))
+ return false;
+ return true;
+ }
+
+ /* This only makes sense in the FORWARD and POSTROUTING chains */
+ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
+ (!!(nf_bridge->mask & BRNF_BRIDGED) ^
+ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+ return false;
+
+ if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
+ (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+ (info->bitmask & XT_PHYSDEV_OP_ISOUT &&
+ (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+ return false;
+
+ if (!(info->bitmask & XT_PHYSDEV_OP_IN))
+ goto match_outdev;
+ indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
+ for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+ ret |= (((const unsigned int *)indev)[i]
+ ^ ((const unsigned int *)info->physindev)[i])
+ & ((const unsigned int *)info->in_mask)[i];
+ }
+
+ if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+
+match_outdev:
+ if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
+ return true;
+ outdev = nf_bridge->physoutdev ?
+ nf_bridge->physoutdev->name : nulldevname;
+ for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+ ret |= (((const unsigned int *)outdev)[i]
+ ^ ((const unsigned int *)info->physoutdev)[i])
+ & ((const unsigned int *)info->out_mask)[i];
+ }
+
+ return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT);
+}
+
+static bool physdev_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_physdev_info *info = par->matchinfo;
+
+ if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
+ info->bitmask & ~XT_PHYSDEV_OP_MASK)
+ return false;
+ if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+ (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
+ info->invert & XT_PHYSDEV_OP_BRIDGED) &&
+ par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
+ printk(KERN_WARNING "physdev match: using --physdev-out in the "
+ "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
+ "traffic is not supported anymore.\n");
+ if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
+ return false;
+ }
+ return true;
+}
+
+static struct xt_match physdev_mt_reg __read_mostly = {
+ .name = "physdev",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = physdev_mt_check,
+ .match = physdev_mt,
+ .matchsize = sizeof(struct xt_physdev_info),
+ .me = THIS_MODULE,
+};
+
+static int __init physdev_mt_init(void)
+{
+ return xt_register_match(&physdev_mt_reg);
+}
+
+static void __exit physdev_mt_exit(void)
+{
+ xt_unregister_match(&physdev_mt_reg);
+}
+
+module_init(physdev_mt_init);
+module_exit(physdev_mt_exit);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
new file mode 100644
index 0000000..69da1d3
--- /dev/null
+++ b/net/netfilter/xt_pkttype.c
@@ -0,0 +1,65 @@
+/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/netfilter/xt_pkttype.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
+MODULE_DESCRIPTION("Xtables: link layer packet type match");
+MODULE_ALIAS("ipt_pkttype");
+MODULE_ALIAS("ip6t_pkttype");
+
+static bool
+pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_pkttype_info *info = par->matchinfo;
+ u_int8_t type;
+
+ if (skb->pkt_type != PACKET_LOOPBACK)
+ type = skb->pkt_type;
+ else if (par->family == NFPROTO_IPV4 &&
+ ipv4_is_multicast(ip_hdr(skb)->daddr))
+ type = PACKET_MULTICAST;
+ else if (par->family == NFPROTO_IPV6 &&
+ ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
+ type = PACKET_MULTICAST;
+ else
+ type = PACKET_BROADCAST;
+
+ return (type == info->pkttype) ^ info->invert;
+}
+
+static struct xt_match pkttype_mt_reg __read_mostly = {
+ .name = "pkttype",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = pkttype_mt,
+ .matchsize = sizeof(struct xt_pkttype_info),
+ .me = THIS_MODULE,
+};
+
+static int __init pkttype_mt_init(void)
+{
+ return xt_register_match(&pkttype_mt_reg);
+}
+
+static void __exit pkttype_mt_exit(void)
+{
+ xt_unregister_match(&pkttype_mt_reg);
+}
+
+module_init(pkttype_mt_init);
+module_exit(pkttype_mt_exit);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
new file mode 100644
index 0000000..328bd20
--- /dev/null
+++ b/net/netfilter/xt_policy.c
@@ -0,0 +1,191 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_policy.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: IPsec policy match");
+MODULE_LICENSE("GPL");
+
+static inline bool
+xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
+ const union nf_inet_addr *a2, unsigned short family)
+{
+ switch (family) {
+ case NFPROTO_IPV4:
+ return ((a1->ip ^ a2->ip) & m->ip) == 0;
+ case NFPROTO_IPV6:
+ return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
+ }
+ return false;
+}
+
+static bool
+match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
+ unsigned short family)
+{
+#define MATCH_ADDR(x,y,z) (!e->match.x || \
+ (xt_addr_cmp(&e->x, &e->y, (const union nf_inet_addr *)(z), family) \
+ ^ e->invert.x))
+#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+ return MATCH_ADDR(saddr, smask, &x->props.saddr) &&
+ MATCH_ADDR(daddr, dmask, &x->id.daddr) &&
+ MATCH(proto, x->id.proto) &&
+ MATCH(mode, x->props.mode) &&
+ MATCH(spi, x->id.spi) &&
+ MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
+ unsigned short family)
+{
+ const struct xt_policy_elem *e;
+ const struct sec_path *sp = skb->sp;
+ int strict = info->flags & XT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (sp == NULL)
+ return -1;
+ if (strict && info->len != sp->len)
+ return 0;
+
+ for (i = sp->len - 1; i >= 0; i--) {
+ pos = strict ? i - sp->len + 1 : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(sp->xvec[i], e, family)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
+ unsigned short family)
+{
+ const struct xt_policy_elem *e;
+ const struct dst_entry *dst = skb->dst;
+ int strict = info->flags & XT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (dst->xfrm == NULL)
+ return -1;
+
+ for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+ pos = strict ? i : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(dst->xfrm, e, family)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? i == info->len : 0;
+}
+
+static bool
+policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_policy_info *info = par->matchinfo;
+ int ret;
+
+ if (info->flags & XT_POLICY_MATCH_IN)
+ ret = match_policy_in(skb, info, par->match->family);
+ else
+ ret = match_policy_out(skb, info, par->match->family);
+
+ if (ret < 0)
+ ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
+ else if (info->flags & XT_POLICY_MATCH_NONE)
+ ret = false;
+
+ return ret;
+}
+
+static bool policy_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_policy_info *info = par->matchinfo;
+
+ if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
+ printk(KERN_ERR "xt_policy: neither incoming nor "
+ "outgoing policy selected\n");
+ return false;
+ }
+ if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
+ printk(KERN_ERR "xt_policy: output policy not valid in "
+ "PRE_ROUTING and INPUT\n");
+ return false;
+ }
+ if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
+ printk(KERN_ERR "xt_policy: input policy not valid in "
+ "POST_ROUTING and OUTPUT\n");
+ return false;
+ }
+ if (info->len > XT_POLICY_MAX_ELEM) {
+ printk(KERN_ERR "xt_policy: too many policy elements\n");
+ return false;
+ }
+ return true;
+}
+
+static struct xt_match policy_mt_reg[] __read_mostly = {
+ {
+ .name = "policy",
+ .family = NFPROTO_IPV4,
+ .checkentry = policy_mt_check,
+ .match = policy_mt,
+ .matchsize = sizeof(struct xt_policy_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "policy",
+ .family = NFPROTO_IPV6,
+ .checkentry = policy_mt_check,
+ .match = policy_mt,
+ .matchsize = sizeof(struct xt_policy_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init policy_mt_init(void)
+{
+ return xt_register_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
+}
+
+static void __exit policy_mt_exit(void)
+{
+ xt_unregister_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
+}
+
+module_init(policy_mt_init);
+module_exit(policy_mt_exit);
+MODULE_ALIAS("ipt_policy");
+MODULE_ALIAS("ip6t_policy");
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
new file mode 100644
index 0000000..c84fce5
--- /dev/null
+++ b/net/netfilter/xt_quota.c
@@ -0,0 +1,72 @@
+/*
+ * netfilter module to enforce network quotas
+ *
+ * Sam Johnston <samj@samj.net>
+ */
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_DESCRIPTION("Xtables: countdown quota match");
+MODULE_ALIAS("ipt_quota");
+MODULE_ALIAS("ip6t_quota");
+
+static DEFINE_SPINLOCK(quota_lock);
+
+static bool
+quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ struct xt_quota_info *q =
+ ((const struct xt_quota_info *)par->matchinfo)->master;
+ bool ret = q->flags & XT_QUOTA_INVERT;
+
+ spin_lock_bh(&quota_lock);
+ if (q->quota >= skb->len) {
+ q->quota -= skb->len;
+ ret = !ret;
+ } else {
+ /* we do not allow even small packets from now on */
+ q->quota = 0;
+ }
+ spin_unlock_bh(&quota_lock);
+
+ return ret;
+}
+
+static bool quota_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_quota_info *q = par->matchinfo;
+
+ if (q->flags & ~XT_QUOTA_MASK)
+ return false;
+ /* For SMP, we only want to use one set of counters. */
+ q->master = q;
+ return true;
+}
+
+static struct xt_match quota_mt_reg __read_mostly = {
+ .name = "quota",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = quota_mt,
+ .checkentry = quota_mt_check,
+ .matchsize = sizeof(struct xt_quota_info),
+ .me = THIS_MODULE,
+};
+
+static int __init quota_mt_init(void)
+{
+ return xt_register_match(&quota_mt_reg);
+}
+
+static void __exit quota_mt_exit(void)
+{
+ xt_unregister_match(&quota_mt_reg);
+}
+
+module_init(quota_mt_init);
+module_exit(quota_mt_exit);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
new file mode 100644
index 0000000..220a1d5
--- /dev/null
+++ b/net/netfilter/xt_rateest.c
@@ -0,0 +1,156 @@
+/*
+ * (C) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/gen_stats.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_rateest.h>
+#include <net/netfilter/xt_rateest.h>
+
+
+static bool
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_rateest_match_info *info = par->matchinfo;
+ struct gnet_stats_rate_est *r;
+ u_int32_t bps1, bps2, pps1, pps2;
+ bool ret = true;
+
+ spin_lock_bh(&info->est1->lock);
+ r = &info->est1->rstats;
+ if (info->flags & XT_RATEEST_MATCH_DELTA) {
+ bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
+ pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
+ } else {
+ bps1 = r->bps;
+ pps1 = r->pps;
+ }
+ spin_unlock_bh(&info->est1->lock);
+
+ if (info->flags & XT_RATEEST_MATCH_ABS) {
+ bps2 = info->bps2;
+ pps2 = info->pps2;
+ } else {
+ spin_lock_bh(&info->est2->lock);
+ r = &info->est2->rstats;
+ if (info->flags & XT_RATEEST_MATCH_DELTA) {
+ bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
+ pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
+ } else {
+ bps2 = r->bps;
+ pps2 = r->pps;
+ }
+ spin_unlock_bh(&info->est2->lock);
+ }
+
+ switch (info->mode) {
+ case XT_RATEEST_MATCH_LT:
+ if (info->flags & XT_RATEEST_MATCH_BPS)
+ ret &= bps1 < bps2;
+ if (info->flags & XT_RATEEST_MATCH_PPS)
+ ret &= pps1 < pps2;
+ break;
+ case XT_RATEEST_MATCH_GT:
+ if (info->flags & XT_RATEEST_MATCH_BPS)
+ ret &= bps1 > bps2;
+ if (info->flags & XT_RATEEST_MATCH_PPS)
+ ret &= pps1 > pps2;
+ break;
+ case XT_RATEEST_MATCH_EQ:
+ if (info->flags & XT_RATEEST_MATCH_BPS)
+ ret &= bps1 == bps2;
+ if (info->flags & XT_RATEEST_MATCH_PPS)
+ ret &= pps2 == pps2;
+ break;
+ }
+
+ ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
+ return ret;
+}
+
+static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ struct xt_rateest_match_info *info = par->matchinfo;
+ struct xt_rateest *est1, *est2;
+
+ if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
+ XT_RATEEST_MATCH_REL)) != 1)
+ goto err1;
+
+ if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
+ goto err1;
+
+ switch (info->mode) {
+ case XT_RATEEST_MATCH_EQ:
+ case XT_RATEEST_MATCH_LT:
+ case XT_RATEEST_MATCH_GT:
+ break;
+ default:
+ goto err1;
+ }
+
+ est1 = xt_rateest_lookup(info->name1);
+ if (!est1)
+ goto err1;
+
+ if (info->flags & XT_RATEEST_MATCH_REL) {
+ est2 = xt_rateest_lookup(info->name2);
+ if (!est2)
+ goto err2;
+ } else
+ est2 = NULL;
+
+
+ info->est1 = est1;
+ info->est2 = est2;
+ return true;
+
+err2:
+ xt_rateest_put(est1);
+err1:
+ return false;
+}
+
+static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ struct xt_rateest_match_info *info = par->matchinfo;
+
+ xt_rateest_put(info->est1);
+ if (info->est2)
+ xt_rateest_put(info->est2);
+}
+
+static struct xt_match xt_rateest_mt_reg __read_mostly = {
+ .name = "rateest",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = xt_rateest_mt,
+ .checkentry = xt_rateest_mt_checkentry,
+ .destroy = xt_rateest_mt_destroy,
+ .matchsize = sizeof(struct xt_rateest_match_info),
+ .me = THIS_MODULE,
+};
+
+static int __init xt_rateest_mt_init(void)
+{
+ return xt_register_match(&xt_rateest_mt_reg);
+}
+
+static void __exit xt_rateest_mt_fini(void)
+{
+ xt_unregister_match(&xt_rateest_mt_reg);
+}
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xtables rate estimator match");
+MODULE_ALIAS("ipt_rateest");
+MODULE_ALIAS("ip6t_rateest");
+module_init(xt_rateest_mt_init);
+module_exit(xt_rateest_mt_fini);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
new file mode 100644
index 0000000..6741928
--- /dev/null
+++ b/net/netfilter/xt_realm.c
@@ -0,0 +1,54 @@
+/* IP tables module for matching the routing realm
+ *
+ * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/xt_realm.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Routing realm match");
+MODULE_ALIAS("ipt_realm");
+
+static bool
+realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_realm_info *info = par->matchinfo;
+ const struct dst_entry *dst = skb->dst;
+
+ return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
+}
+
+static struct xt_match realm_mt_reg __read_mostly = {
+ .name = "realm",
+ .match = realm_mt,
+ .matchsize = sizeof(struct xt_realm_info),
+ .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
+ .family = NFPROTO_UNSPEC,
+ .me = THIS_MODULE
+};
+
+static int __init realm_mt_init(void)
+{
+ return xt_register_match(&realm_mt_reg);
+}
+
+static void __exit realm_mt_exit(void)
+{
+ xt_unregister_match(&realm_mt_reg);
+}
+
+module_init(realm_mt_init);
+module_exit(realm_mt_exit);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
new file mode 100644
index 0000000..280c471
--- /dev/null
+++ b/net/netfilter/xt_recent.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is a replacement of the old ipt_recent module, which carried the
+ * following copyright notice:
+ *
+ * Author: Stephen Frost <sfrost@snowman.net>
+ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
+ */
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_recent.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_recent");
+MODULE_ALIAS("ip6t_recent");
+
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
+MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files");
+
+struct recent_entry {
+ struct list_head list;
+ struct list_head lru_list;
+ union nf_inet_addr addr;
+ u_int16_t family;
+ u_int8_t ttl;
+ u_int8_t index;
+ u_int16_t nstamps;
+ unsigned long stamps[0];
+};
+
+struct recent_table {
+ struct list_head list;
+ char name[XT_RECENT_NAME_LEN];
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc_old, *proc;
+#endif
+ unsigned int refcnt;
+ unsigned int entries;
+ struct list_head lru_list;
+ struct list_head iphash[0];
+};
+
+static LIST_HEAD(tables);
+static DEFINE_SPINLOCK(recent_lock);
+static DEFINE_MUTEX(recent_mutex);
+
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static struct proc_dir_entry *proc_old_dir;
+#endif
+static struct proc_dir_entry *recent_proc_dir;
+static const struct file_operations recent_old_fops, recent_mt_fops;
+#endif
+
+static u_int32_t hash_rnd;
+static bool hash_rnd_initted;
+
+static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+{
+ if (!hash_rnd_initted) {
+ get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+ hash_rnd_initted = true;
+ }
+ return jhash_1word((__force u32)addr->ip, hash_rnd) &
+ (ip_list_hash_size - 1);
+}
+
+static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+{
+ if (!hash_rnd_initted) {
+ get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+ hash_rnd_initted = true;
+ }
+ return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
+ (ip_list_hash_size - 1);
+}
+
+static struct recent_entry *
+recent_entry_lookup(const struct recent_table *table,
+ const union nf_inet_addr *addrp, u_int16_t family,
+ u_int8_t ttl)
+{
+ struct recent_entry *e;
+ unsigned int h;
+
+ if (family == NFPROTO_IPV4)
+ h = recent_entry_hash4(addrp);
+ else
+ h = recent_entry_hash6(addrp);
+
+ list_for_each_entry(e, &table->iphash[h], list)
+ if (e->family == family &&
+ memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
+ (ttl == e->ttl || ttl == 0 || e->ttl == 0))
+ return e;
+ return NULL;
+}
+
+static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
+{
+ list_del(&e->list);
+ list_del(&e->lru_list);
+ kfree(e);
+ t->entries--;
+}
+
+static struct recent_entry *
+recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
+ u_int16_t family, u_int8_t ttl)
+{
+ struct recent_entry *e;
+
+ if (t->entries >= ip_list_tot) {
+ e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+ recent_entry_remove(t, e);
+ }
+ e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
+ GFP_ATOMIC);
+ if (e == NULL)
+ return NULL;
+ memcpy(&e->addr, addr, sizeof(e->addr));
+ e->ttl = ttl;
+ e->stamps[0] = jiffies;
+ e->nstamps = 1;
+ e->index = 1;
+ e->family = family;
+ if (family == NFPROTO_IPV4)
+ list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
+ else
+ list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
+ list_add_tail(&e->lru_list, &t->lru_list);
+ t->entries++;
+ return e;
+}
+
+static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
+{
+ e->stamps[e->index++] = jiffies;
+ if (e->index > e->nstamps)
+ e->nstamps = e->index;
+ e->index %= ip_pkt_list_tot;
+ list_move_tail(&e->lru_list, &t->lru_list);
+}
+
+static struct recent_table *recent_table_lookup(const char *name)
+{
+ struct recent_table *t;
+
+ list_for_each_entry(t, &tables, list)
+ if (!strcmp(t->name, name))
+ return t;
+ return NULL;
+}
+
+static void recent_table_flush(struct recent_table *t)
+{
+ struct recent_entry *e, *next;
+ unsigned int i;
+
+ for (i = 0; i < ip_list_hash_size; i++)
+ list_for_each_entry_safe(e, next, &t->iphash[i], list)
+ recent_entry_remove(t, e);
+}
+
+static bool
+recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_recent_mtinfo *info = par->matchinfo;
+ struct recent_table *t;
+ struct recent_entry *e;
+ union nf_inet_addr addr = {};
+ u_int8_t ttl;
+ bool ret = info->invert;
+
+ if (par->match->family == NFPROTO_IPV4) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (info->side == XT_RECENT_DEST)
+ addr.ip = iph->daddr;
+ else
+ addr.ip = iph->saddr;
+
+ ttl = iph->ttl;
+ } else {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ if (info->side == XT_RECENT_DEST)
+ memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
+ else
+ memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
+
+ ttl = iph->hop_limit;
+ }
+
+ /* use TTL as seen before forwarding */
+ if (par->out != NULL && skb->sk == NULL)
+ ttl++;
+
+ spin_lock_bh(&recent_lock);
+ t = recent_table_lookup(info->name);
+ e = recent_entry_lookup(t, &addr, par->match->family,
+ (info->check_set & XT_RECENT_TTL) ? ttl : 0);
+ if (e == NULL) {
+ if (!(info->check_set & XT_RECENT_SET))
+ goto out;
+ e = recent_entry_init(t, &addr, par->match->family, ttl);
+ if (e == NULL)
+ *par->hotdrop = true;
+ ret = !ret;
+ goto out;
+ }
+
+ if (info->check_set & XT_RECENT_SET)
+ ret = !ret;
+ else if (info->check_set & XT_RECENT_REMOVE) {
+ recent_entry_remove(t, e);
+ ret = !ret;
+ } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
+ unsigned long time = jiffies - info->seconds * HZ;
+ unsigned int i, hits = 0;
+
+ for (i = 0; i < e->nstamps; i++) {
+ if (info->seconds && time_after(time, e->stamps[i]))
+ continue;
+ if (++hits >= info->hit_count) {
+ ret = !ret;
+ break;
+ }
+ }
+ }
+
+ if (info->check_set & XT_RECENT_SET ||
+ (info->check_set & XT_RECENT_UPDATE && ret)) {
+ recent_entry_update(t, e);
+ e->ttl = ttl;
+ }
+out:
+ spin_unlock_bh(&recent_lock);
+ return ret;
+}
+
+static bool recent_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_recent_mtinfo *info = par->matchinfo;
+ struct recent_table *t;
+ unsigned i;
+ bool ret = false;
+
+ if (hweight8(info->check_set &
+ (XT_RECENT_SET | XT_RECENT_REMOVE |
+ XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
+ return false;
+ if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
+ (info->seconds || info->hit_count))
+ return false;
+ if (info->hit_count > ip_pkt_list_tot)
+ return false;
+ if (info->name[0] == '\0' ||
+ strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
+ return false;
+
+ mutex_lock(&recent_mutex);
+ t = recent_table_lookup(info->name);
+ if (t != NULL) {
+ t->refcnt++;
+ ret = true;
+ goto out;
+ }
+
+ t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
+ GFP_KERNEL);
+ if (t == NULL)
+ goto out;
+ t->refcnt = 1;
+ strcpy(t->name, info->name);
+ INIT_LIST_HEAD(&t->lru_list);
+ for (i = 0; i < ip_list_hash_size; i++)
+ INIT_LIST_HEAD(&t->iphash[i]);
+#ifdef CONFIG_PROC_FS
+ t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+ &recent_mt_fops, t);
+ if (t->proc == NULL) {
+ kfree(t);
+ goto out;
+ }
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+ &recent_old_fops, t);
+ if (t->proc_old == NULL) {
+ remove_proc_entry(t->name, proc_old_dir);
+ kfree(t);
+ goto out;
+ }
+ t->proc_old->uid = ip_list_uid;
+ t->proc_old->gid = ip_list_gid;
+#endif
+ t->proc->uid = ip_list_uid;
+ t->proc->gid = ip_list_gid;
+#endif
+ spin_lock_bh(&recent_lock);
+ list_add_tail(&t->list, &tables);
+ spin_unlock_bh(&recent_lock);
+ ret = true;
+out:
+ mutex_unlock(&recent_mutex);
+ return ret;
+}
+
+static void recent_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_recent_mtinfo *info = par->matchinfo;
+ struct recent_table *t;
+
+ mutex_lock(&recent_mutex);
+ t = recent_table_lookup(info->name);
+ if (--t->refcnt == 0) {
+ spin_lock_bh(&recent_lock);
+ list_del(&t->list);
+ spin_unlock_bh(&recent_lock);
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ remove_proc_entry(t->name, proc_old_dir);
+#endif
+ remove_proc_entry(t->name, recent_proc_dir);
+#endif
+ recent_table_flush(t);
+ kfree(t);
+ }
+ mutex_unlock(&recent_mutex);
+}
+
+#ifdef CONFIG_PROC_FS
+struct recent_iter_state {
+ const struct recent_table *table;
+ unsigned int bucket;
+};
+
+static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(recent_lock)
+{
+ struct recent_iter_state *st = seq->private;
+ const struct recent_table *t = st->table;
+ struct recent_entry *e;
+ loff_t p = *pos;
+
+ spin_lock_bh(&recent_lock);
+
+ for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
+ list_for_each_entry(e, &t->iphash[st->bucket], list)
+ if (p-- == 0)
+ return e;
+ return NULL;
+}
+
+static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct recent_iter_state *st = seq->private;
+ const struct recent_table *t = st->table;
+ const struct recent_entry *e = v;
+ const struct list_head *head = e->list.next;
+
+ while (head == &t->iphash[st->bucket]) {
+ if (++st->bucket >= ip_list_hash_size)
+ return NULL;
+ head = t->iphash[st->bucket].next;
+ }
+ (*pos)++;
+ return list_entry(head, struct recent_entry, list);
+}
+
+static void recent_seq_stop(struct seq_file *s, void *v)
+ __releases(recent_lock)
+{
+ spin_unlock_bh(&recent_lock);
+}
+
+static int recent_seq_show(struct seq_file *seq, void *v)
+{
+ const struct recent_entry *e = v;
+ unsigned int i;
+
+ i = (e->index - 1) % ip_pkt_list_tot;
+ if (e->family == NFPROTO_IPV4)
+ seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu "
+ "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl,
+ e->stamps[i], e->index);
+ else
+ seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu "
+ "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl,
+ e->stamps[i], e->index);
+ for (i = 0; i < e->nstamps; i++)
+ seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
+ seq_printf(seq, "\n");
+ return 0;
+}
+
+static const struct seq_operations recent_seq_ops = {
+ .start = recent_seq_start,
+ .next = recent_seq_next,
+ .stop = recent_seq_stop,
+ .show = recent_seq_show,
+};
+
+static int recent_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *pde = PDE(inode);
+ struct recent_iter_state *st;
+
+ st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
+ if (st == NULL)
+ return -ENOMEM;
+
+ st->table = pde->data;
+ return 0;
+}
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static int recent_old_seq_open(struct inode *inode, struct file *filp)
+{
+ static bool warned_of_old;
+
+ if (unlikely(!warned_of_old)) {
+ printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
+ " is deprecated; use /proc/net/xt_recent.\n");
+ warned_of_old = true;
+ }
+ return recent_seq_open(inode, filp);
+}
+
+static ssize_t recent_old_proc_write(struct file *file,
+ const char __user *input,
+ size_t size, loff_t *loff)
+{
+ const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ struct recent_table *t = pde->data;
+ struct recent_entry *e;
+ char buf[sizeof("+255.255.255.255")], *c = buf;
+ __be32 addr;
+ int add;
+
+ if (size > sizeof(buf))
+ size = sizeof(buf);
+ if (copy_from_user(buf, input, size))
+ return -EFAULT;
+
+ while (isspace(*c))
+ c++;
+
+ if (size - (c - buf) < 5)
+ return c - buf;
+ if (!strncmp(c, "clear", 5)) {
+ c += 5;
+ spin_lock_bh(&recent_lock);
+ recent_table_flush(t);
+ spin_unlock_bh(&recent_lock);
+ return c - buf;
+ }
+
+ switch (*c) {
+ case '-':
+ add = 0;
+ c++;
+ break;
+ case '+':
+ c++;
+ default:
+ add = 1;
+ break;
+ }
+ addr = in_aton(c);
+
+ spin_lock_bh(&recent_lock);
+ e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0);
+ if (e == NULL) {
+ if (add)
+ recent_entry_init(t, (const void *)&addr,
+ NFPROTO_IPV4, 0);
+ } else {
+ if (add)
+ recent_entry_update(t, e);
+ else
+ recent_entry_remove(t, e);
+ }
+ spin_unlock_bh(&recent_lock);
+ return size;
+}
+
+static const struct file_operations recent_old_fops = {
+ .open = recent_old_seq_open,
+ .read = seq_read,
+ .write = recent_old_proc_write,
+ .release = seq_release_private,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static ssize_t
+recent_mt_proc_write(struct file *file, const char __user *input,
+ size_t size, loff_t *loff)
+{
+ const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ struct recent_table *t = pde->data;
+ struct recent_entry *e;
+ char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+ const char *c = buf;
+ union nf_inet_addr addr;
+ u_int16_t family;
+ bool add, succ;
+
+ if (size == 0)
+ return 0;
+ if (size > sizeof(buf))
+ size = sizeof(buf);
+ if (copy_from_user(buf, input, size) != 0)
+ return -EFAULT;
+
+ /* Strict protocol! */
+ if (*loff != 0)
+ return -ESPIPE;
+ switch (*c) {
+ case '/': /* flush table */
+ spin_lock_bh(&recent_lock);
+ recent_table_flush(t);
+ spin_unlock_bh(&recent_lock);
+ return size;
+ case '-': /* remove address */
+ add = false;
+ break;
+ case '+': /* add address */
+ add = true;
+ break;
+ default:
+ printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n");
+ return -EINVAL;
+ }
+
+ ++c;
+ --size;
+ if (strnchr(c, size, ':') != NULL) {
+ family = NFPROTO_IPV6;
+ succ = in6_pton(c, size, (void *)&addr, '\n', NULL);
+ } else {
+ family = NFPROTO_IPV4;
+ succ = in4_pton(c, size, (void *)&addr, '\n', NULL);
+ }
+
+ if (!succ) {
+ printk(KERN_INFO KBUILD_MODNAME ": illegal address written "
+ "to procfs\n");
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&recent_lock);
+ e = recent_entry_lookup(t, &addr, family, 0);
+ if (e == NULL) {
+ if (add)
+ recent_entry_init(t, &addr, family, 0);
+ } else {
+ if (add)
+ recent_entry_update(t, e);
+ else
+ recent_entry_remove(t, e);
+ }
+ spin_unlock_bh(&recent_lock);
+ /* Note we removed one above */
+ *loff += size + 1;
+ return size + 1;
+}
+
+static const struct file_operations recent_mt_fops = {
+ .open = recent_seq_open,
+ .read = seq_read,
+ .write = recent_mt_proc_write,
+ .release = seq_release_private,
+ .owner = THIS_MODULE,
+};
+#endif /* CONFIG_PROC_FS */
+
+static struct xt_match recent_mt_reg[] __read_mostly = {
+ {
+ .name = "recent",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = recent_mt,
+ .matchsize = sizeof(struct xt_recent_mtinfo),
+ .checkentry = recent_mt_check,
+ .destroy = recent_mt_destroy,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "recent",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .match = recent_mt,
+ .matchsize = sizeof(struct xt_recent_mtinfo),
+ .checkentry = recent_mt_check,
+ .destroy = recent_mt_destroy,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init recent_mt_init(void)
+{
+ int err;
+
+ if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
+ return -EINVAL;
+ ip_list_hash_size = 1 << fls(ip_list_tot);
+
+ err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+#ifdef CONFIG_PROC_FS
+ if (err)
+ return err;
+ recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
+ if (recent_proc_dir == NULL) {
+ xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+ err = -ENOMEM;
+ }
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ if (err < 0)
+ return err;
+ proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+ if (proc_old_dir == NULL) {
+ remove_proc_entry("xt_recent", init_net.proc_net);
+ xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+ err = -ENOMEM;
+ }
+#endif
+#endif
+ return err;
+}
+
+static void __exit recent_mt_exit(void)
+{
+ BUG_ON(!list_empty(&tables));
+ xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ remove_proc_entry("ipt_recent", init_net.proc_net);
+#endif
+ remove_proc_entry("xt_recent", init_net.proc_net);
+#endif
+}
+
+module_init(recent_mt_init);
+module_exit(recent_mt_exit);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
new file mode 100644
index 0000000..a189ada
--- /dev/null
+++ b/net/netfilter/xt_sctp.c
@@ -0,0 +1,196 @@
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/sctp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_sctp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
+MODULE_ALIAS("ipt_sctp");
+MODULE_ALIAS("ip6t_sctp");
+
+#ifdef DEBUG_SCTP
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+ || (!!((invflag) & (option)) ^ (cond)))
+
+static bool
+match_flags(const struct xt_sctp_flag_info *flag_info,
+ const int flag_count,
+ u_int8_t chunktype,
+ u_int8_t chunkflags)
+{
+ int i;
+
+ for (i = 0; i < flag_count; i++)
+ if (flag_info[i].chunktype == chunktype)
+ return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
+
+ return true;
+}
+
+static inline bool
+match_packet(const struct sk_buff *skb,
+ unsigned int offset,
+ const struct xt_sctp_info *info,
+ bool *hotdrop)
+{
+ u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
+ const sctp_chunkhdr_t *sch;
+ sctp_chunkhdr_t _sch;
+ int chunk_match_type = info->chunk_match_type;
+ const struct xt_sctp_flag_info *flag_info = info->flag_info;
+ int flag_count = info->flag_count;
+
+#ifdef DEBUG_SCTP
+ int i = 0;
+#endif
+
+ if (chunk_match_type == SCTP_CHUNK_MATCH_ALL)
+ SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap);
+
+ do {
+ sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
+ if (sch == NULL || sch->length == 0) {
+ duprintf("Dropping invalid SCTP packet.\n");
+ *hotdrop = true;
+ return false;
+ }
+
+ duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
+ ++i, offset, sch->type, htons(sch->length), sch->flags);
+
+ offset += (ntohs(sch->length) + 3) & ~3;
+
+ duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
+
+ if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ANY:
+ if (match_flags(flag_info, flag_count,
+ sch->type, sch->flags)) {
+ return true;
+ }
+ break;
+
+ case SCTP_CHUNK_MATCH_ALL:
+ if (match_flags(flag_info, flag_count,
+ sch->type, sch->flags))
+ SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
+ break;
+
+ case SCTP_CHUNK_MATCH_ONLY:
+ if (!match_flags(flag_info, flag_count,
+ sch->type, sch->flags))
+ return false;
+ break;
+ }
+ } else {
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ONLY:
+ return false;
+ }
+ }
+ } while (offset < skb->len);
+
+ switch (chunk_match_type) {
+ case SCTP_CHUNK_MATCH_ALL:
+ return SCTP_CHUNKMAP_IS_CLEAR(chunkmapcopy);
+ case SCTP_CHUNK_MATCH_ANY:
+ return false;
+ case SCTP_CHUNK_MATCH_ONLY:
+ return true;
+ }
+
+ /* This will never be reached, but required to stop compiler whine */
+ return false;
+}
+
+static bool
+sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_sctp_info *info = par->matchinfo;
+ const sctp_sctphdr_t *sh;
+ sctp_sctphdr_t _sh;
+
+ if (par->fragoff != 0) {
+ duprintf("Dropping non-first fragment.. FIXME\n");
+ return false;
+ }
+
+ sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
+ if (sh == NULL) {
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+ duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
+
+ return SCCHECK(ntohs(sh->source) >= info->spts[0]
+ && ntohs(sh->source) <= info->spts[1],
+ XT_SCTP_SRC_PORTS, info->flags, info->invflags)
+ && SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+ && ntohs(sh->dest) <= info->dpts[1],
+ XT_SCTP_DEST_PORTS, info->flags, info->invflags)
+ && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
+ info, par->hotdrop),
+ XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+}
+
+static bool sctp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_sctp_info *info = par->matchinfo;
+
+ return !(info->flags & ~XT_SCTP_VALID_FLAGS)
+ && !(info->invflags & ~XT_SCTP_VALID_FLAGS)
+ && !(info->invflags & ~info->flags)
+ && ((!(info->flags & XT_SCTP_CHUNK_TYPES)) ||
+ (info->chunk_match_type &
+ (SCTP_CHUNK_MATCH_ALL
+ | SCTP_CHUNK_MATCH_ANY
+ | SCTP_CHUNK_MATCH_ONLY)));
+}
+
+static struct xt_match sctp_mt_reg[] __read_mostly = {
+ {
+ .name = "sctp",
+ .family = NFPROTO_IPV4,
+ .checkentry = sctp_mt_check,
+ .match = sctp_mt,
+ .matchsize = sizeof(struct xt_sctp_info),
+ .proto = IPPROTO_SCTP,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "sctp",
+ .family = NFPROTO_IPV6,
+ .checkentry = sctp_mt_check,
+ .match = sctp_mt,
+ .matchsize = sizeof(struct xt_sctp_info),
+ .proto = IPPROTO_SCTP,
+ .me = THIS_MODULE
+ },
+};
+
+static int __init sctp_mt_init(void)
+{
+ return xt_register_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
+}
+
+static void __exit sctp_mt_exit(void)
+{
+ xt_unregister_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
+}
+
+module_init(sctp_mt_init);
+module_exit(sctp_mt_exit);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
new file mode 100644
index 0000000..1acc089
--- /dev/null
+++ b/net/netfilter/xt_socket.c
@@ -0,0 +1,185 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_tproxy_core.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#define XT_SOCKET_HAVE_CONNTRACK 1
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp_fields(const struct sk_buff *skb,
+ u8 *protocol,
+ __be32 *raddr,
+ __be32 *laddr,
+ __be16 *rport,
+ __be16 *lport)
+{
+ unsigned int outside_hdrlen = ip_hdrlen(skb);
+ struct iphdr *inside_iph, _inside_iph;
+ struct icmphdr *icmph, _icmph;
+ __be16 *ports, _ports[2];
+
+ icmph = skb_header_pointer(skb, outside_hdrlen,
+ sizeof(_icmph), &_icmph);
+ if (icmph == NULL)
+ return 1;
+
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_REDIRECT:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ break;
+ default:
+ return 1;
+ }
+
+ inside_iph = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr),
+ sizeof(_inside_iph), &_inside_iph);
+ if (inside_iph == NULL)
+ return 1;
+
+ if (inside_iph->protocol != IPPROTO_TCP &&
+ inside_iph->protocol != IPPROTO_UDP)
+ return 1;
+
+ ports = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr) +
+ (inside_iph->ihl << 2),
+ sizeof(_ports), &_ports);
+ if (ports == NULL)
+ return 1;
+
+ /* the inside IP packet is the one quoted from our side, thus
+ * its saddr is the local address */
+ *protocol = inside_iph->protocol;
+ *laddr = inside_iph->saddr;
+ *lport = ports[0];
+ *raddr = inside_iph->daddr;
+ *rport = ports[1];
+
+ return 0;
+}
+
+
+static bool
+socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct udphdr _hdr, *hp = NULL;
+ struct sock *sk;
+ __be32 daddr, saddr;
+ __be16 dport, sport;
+ u8 protocol;
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+ struct nf_conn const *ct;
+ enum ip_conntrack_info ctinfo;
+#endif
+
+ if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ protocol = iph->protocol;
+ saddr = iph->saddr;
+ sport = hp->source;
+ daddr = iph->daddr;
+ dport = hp->dest;
+
+ } else if (iph->protocol == IPPROTO_ICMP) {
+ if (extract_icmp_fields(skb, &protocol, &saddr, &daddr,
+ &sport, &dport))
+ return false;
+ } else {
+ return false;
+ }
+
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+ /* Do the lookup with the original socket address in case this is a
+ * reply packet of an established SNAT-ted connection. */
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && (ct != &nf_conntrack_untracked) &&
+ ((iph->protocol != IPPROTO_ICMP &&
+ ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
+ (iph->protocol == IPPROTO_ICMP &&
+ ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) &&
+ (ct->status & IPS_SRC_NAT_DONE)) {
+
+ daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+ dport = (iph->protocol == IPPROTO_TCP) ?
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ }
+#endif
+
+ sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+ saddr, daddr, sport, dport, par->in, false);
+ if (sk != NULL) {
+ bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0);
+
+ nf_tproxy_put_sock(sk);
+ if (wildcard)
+ sk = NULL;
+ }
+
+ pr_debug("socket match: proto %u %08x:%u -> %08x:%u "
+ "(orig %08x:%u) sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport),
+ ntohl(daddr), ntohs(dport),
+ ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
+
+ return (sk != NULL);
+}
+
+static struct xt_match socket_mt_reg __read_mostly = {
+ .name = "socket",
+ .family = AF_INET,
+ .match = socket_mt,
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init socket_mt_init(void)
+{
+ nf_defrag_ipv4_enable();
+ return xt_register_match(&socket_mt_reg);
+}
+
+static void __exit socket_mt_exit(void)
+{
+ xt_unregister_match(&socket_mt_reg);
+}
+
+module_init(socket_mt_init);
+module_exit(socket_mt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("x_tables socket match module");
+MODULE_ALIAS("ipt_socket");
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
new file mode 100644
index 0000000..4c946cb
--- /dev/null
+++ b/net/netfilter/xt_state.c
@@ -0,0 +1,87 @@
+/* Kernel module to match connection tracking information. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_state.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module");
+MODULE_ALIAS("ipt_state");
+MODULE_ALIAS("ip6t_state");
+
+static bool
+state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_state_info *sinfo = par->matchinfo;
+ enum ip_conntrack_info ctinfo;
+ unsigned int statebit;
+
+ if (nf_ct_is_untracked(skb))
+ statebit = XT_STATE_UNTRACKED;
+ else if (!nf_ct_get(skb, &ctinfo))
+ statebit = XT_STATE_INVALID;
+ else
+ statebit = XT_STATE_BIT(ctinfo);
+
+ return (sinfo->statemask & statebit);
+}
+
+static bool state_mt_check(const struct xt_mtchk_param *par)
+{
+ if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->match->family);
+ return false;
+ }
+ return true;
+}
+
+static void state_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->match->family);
+}
+
+static struct xt_match state_mt_reg[] __read_mostly = {
+ {
+ .name = "state",
+ .family = NFPROTO_IPV4,
+ .checkentry = state_mt_check,
+ .match = state_mt,
+ .destroy = state_mt_destroy,
+ .matchsize = sizeof(struct xt_state_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "state",
+ .family = NFPROTO_IPV6,
+ .checkentry = state_mt_check,
+ .match = state_mt,
+ .destroy = state_mt_destroy,
+ .matchsize = sizeof(struct xt_state_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init state_mt_init(void)
+{
+ return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+}
+
+static void __exit state_mt_exit(void)
+{
+ xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+}
+
+module_init(state_mt_init);
+module_exit(state_mt_exit);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
new file mode 100644
index 0000000..0d75141
--- /dev/null
+++ b/net/netfilter/xt_statistic.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on ipt_random and ipt_nth by Fabrice MARIE <fabrice@netfilter.org>.
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+
+#include <linux/netfilter/xt_statistic.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
+MODULE_ALIAS("ipt_statistic");
+MODULE_ALIAS("ip6t_statistic");
+
+static DEFINE_SPINLOCK(nth_lock);
+
+static bool
+statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ struct xt_statistic_info *info = (void *)par->matchinfo;
+ bool ret = info->flags & XT_STATISTIC_INVERT;
+
+ switch (info->mode) {
+ case XT_STATISTIC_MODE_RANDOM:
+ if ((net_random() & 0x7FFFFFFF) < info->u.random.probability)
+ ret = !ret;
+ break;
+ case XT_STATISTIC_MODE_NTH:
+ info = info->master;
+ spin_lock_bh(&nth_lock);
+ if (info->u.nth.count++ == info->u.nth.every) {
+ info->u.nth.count = 0;
+ ret = !ret;
+ }
+ spin_unlock_bh(&nth_lock);
+ break;
+ }
+
+ return ret;
+}
+
+static bool statistic_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_statistic_info *info = par->matchinfo;
+
+ if (info->mode > XT_STATISTIC_MODE_MAX ||
+ info->flags & ~XT_STATISTIC_MASK)
+ return false;
+ info->master = info;
+ return true;
+}
+
+static struct xt_match xt_statistic_mt_reg __read_mostly = {
+ .name = "statistic",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = statistic_mt,
+ .checkentry = statistic_mt_check,
+ .matchsize = sizeof(struct xt_statistic_info),
+ .me = THIS_MODULE,
+};
+
+static int __init statistic_mt_init(void)
+{
+ return xt_register_match(&xt_statistic_mt_reg);
+}
+
+static void __exit statistic_mt_exit(void)
+{
+ xt_unregister_match(&xt_statistic_mt_reg);
+}
+
+module_init(statistic_mt_init);
+module_exit(statistic_mt_exit);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
new file mode 100644
index 0000000..b4d7741
--- /dev/null
+++ b/net/netfilter/xt_string.c
@@ -0,0 +1,113 @@
+/* String matching match for iptables
+ *
+ * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_string.h>
+#include <linux/textsearch.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
+MODULE_DESCRIPTION("Xtables: string-based matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_string");
+MODULE_ALIAS("ip6t_string");
+
+static bool
+string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_string_info *conf = par->matchinfo;
+ struct ts_state state;
+ int invert;
+
+ memset(&state, 0, sizeof(struct ts_state));
+
+ invert = (par->match->revision == 0 ? conf->u.v0.invert :
+ conf->u.v1.flags & XT_STRING_FLAG_INVERT);
+
+ return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
+ conf->to_offset, conf->config, &state)
+ != UINT_MAX) ^ invert;
+}
+
+#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
+
+static bool string_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_string_info *conf = par->matchinfo;
+ struct ts_config *ts_conf;
+ int flags = TS_AUTOLOAD;
+
+ /* Damn, can't handle this case properly with iptables... */
+ if (conf->from_offset > conf->to_offset)
+ return false;
+ if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
+ return false;
+ if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
+ return false;
+ if (par->match->revision == 1) {
+ if (conf->u.v1.flags &
+ ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
+ return false;
+ if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
+ flags |= TS_IGNORECASE;
+ }
+ ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
+ GFP_KERNEL, flags);
+ if (IS_ERR(ts_conf))
+ return false;
+
+ conf->config = ts_conf;
+
+ return true;
+}
+
+static void string_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
+}
+
+static struct xt_match xt_string_mt_reg[] __read_mostly = {
+ {
+ .name = "string",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = string_mt_check,
+ .match = string_mt,
+ .destroy = string_mt_destroy,
+ .matchsize = sizeof(struct xt_string_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "string",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = string_mt_check,
+ .match = string_mt,
+ .destroy = string_mt_destroy,
+ .matchsize = sizeof(struct xt_string_info),
+ .me = THIS_MODULE
+ },
+};
+
+static int __init string_mt_init(void)
+{
+ return xt_register_matches(xt_string_mt_reg,
+ ARRAY_SIZE(xt_string_mt_reg));
+}
+
+static void __exit string_mt_exit(void)
+{
+ xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg));
+}
+
+module_init(string_mt_init);
+module_exit(string_mt_exit);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
new file mode 100644
index 0000000..4809b34
--- /dev/null
+++ b/net/netfilter/xt_tcpmss.c
@@ -0,0 +1,110 @@
+/* Kernel module to match TCP MSS values. */
+
+/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Portions (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/xt_tcpmss.h>
+#include <linux/netfilter/x_tables.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("Xtables: TCP MSS match");
+MODULE_ALIAS("ipt_tcpmss");
+MODULE_ALIAS("ip6t_tcpmss");
+
+static bool
+tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_tcpmss_match_info *info = par->matchinfo;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+ const u_int8_t *op;
+ u8 _opt[15 * 4 - sizeof(_tcph)];
+ unsigned int i, optlen;
+
+ /* If we don't have the whole header, drop packet. */
+ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ goto dropit;
+
+ /* Malformed. */
+ if (th->doff*4 < sizeof(*th))
+ goto dropit;
+
+ optlen = th->doff*4 - sizeof(*th);
+ if (!optlen)
+ goto out;
+
+ /* Truncated options. */
+ op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt);
+ if (op == NULL)
+ goto dropit;
+
+ for (i = 0; i < optlen; ) {
+ if (op[i] == TCPOPT_MSS
+ && (optlen - i) >= TCPOLEN_MSS
+ && op[i+1] == TCPOLEN_MSS) {
+ u_int16_t mssval;
+
+ mssval = (op[i+2] << 8) | op[i+3];
+
+ return (mssval >= info->mss_min &&
+ mssval <= info->mss_max) ^ info->invert;
+ }
+ if (op[i] < 2)
+ i++;
+ else
+ i += op[i+1] ? : 1;
+ }
+out:
+ return info->invert;
+
+dropit:
+ *par->hotdrop = true;
+ return false;
+}
+
+static struct xt_match tcpmss_mt_reg[] __read_mostly = {
+ {
+ .name = "tcpmss",
+ .family = NFPROTO_IPV4,
+ .match = tcpmss_mt,
+ .matchsize = sizeof(struct xt_tcpmss_match_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tcpmss",
+ .family = NFPROTO_IPV6,
+ .match = tcpmss_mt,
+ .matchsize = sizeof(struct xt_tcpmss_match_info),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init tcpmss_mt_init(void)
+{
+ return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
+}
+
+static void __exit tcpmss_mt_exit(void)
+{
+ xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
+}
+
+module_init(tcpmss_mt_init);
+module_exit(tcpmss_mt_exit);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
new file mode 100644
index 0000000..1ebdc49
--- /dev/null
+++ b/net/netfilter/xt_tcpudp.c
@@ -0,0 +1,240 @@
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xt_tcp");
+MODULE_ALIAS("xt_udp");
+MODULE_ALIAS("ipt_udp");
+MODULE_ALIAS("ipt_tcp");
+MODULE_ALIAS("ip6t_udp");
+MODULE_ALIAS("ip6t_tcp");
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline bool
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
+{
+ return (port >= min && port <= max) ^ invert;
+}
+
+static bool
+tcp_find_option(u_int8_t option,
+ const struct sk_buff *skb,
+ unsigned int protoff,
+ unsigned int optlen,
+ bool invert,
+ bool *hotdrop)
+{
+ /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+ const u_int8_t *op;
+ u_int8_t _opt[60 - sizeof(struct tcphdr)];
+ unsigned int i;
+
+ duprintf("tcp_match: finding option\n");
+
+ if (!optlen)
+ return invert;
+
+ /* If we don't have the whole header, drop packet. */
+ op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr),
+ optlen, _opt);
+ if (op == NULL) {
+ *hotdrop = true;
+ return false;
+ }
+
+ for (i = 0; i < optlen; ) {
+ if (op[i] == option) return !invert;
+ if (op[i] < 2) i++;
+ else i += op[i+1]?:1;
+ }
+
+ return invert;
+}
+
+static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+ const struct xt_tcp *tcpinfo = par->matchinfo;
+
+ if (par->fragoff != 0) {
+ /* To quote Alan:
+
+ Don't allow a fragment of TCP 8 bytes in. Nobody normal
+ causes this. Its a cracker trying to break in by doing a
+ flag overwrite to pass the direction checks.
+ */
+ if (par->fragoff == 1) {
+ duprintf("Dropping evil TCP offset=1 frag.\n");
+ *par->hotdrop = true;
+ }
+ /* Must not be a fragment. */
+ return false;
+ }
+
+#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+ ntohs(th->source),
+ !!(tcpinfo->invflags & XT_TCP_INV_SRCPT)))
+ return false;
+ if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+ ntohs(th->dest),
+ !!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
+ return false;
+ if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+ == tcpinfo->flg_cmp,
+ XT_TCP_INV_FLAGS))
+ return false;
+ if (tcpinfo->option) {
+ if (th->doff * 4 < sizeof(_tcph)) {
+ *par->hotdrop = true;
+ return false;
+ }
+ if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
+ th->doff*4 - sizeof(_tcph),
+ tcpinfo->invflags & XT_TCP_INV_OPTION,
+ par->hotdrop))
+ return false;
+ }
+ return true;
+}
+
+static bool tcp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_tcp *tcpinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
+}
+
+static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct udphdr *uh;
+ struct udphdr _udph;
+ const struct xt_udp *udpinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil UDP tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return port_match(udpinfo->spts[0], udpinfo->spts[1],
+ ntohs(uh->source),
+ !!(udpinfo->invflags & XT_UDP_INV_SRCPT))
+ && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+ ntohs(uh->dest),
+ !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
+}
+
+static bool udp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_udp *udpinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
+}
+
+static struct xt_match tcpudp_mt_reg[] __read_mostly = {
+ {
+ .name = "tcp",
+ .family = NFPROTO_IPV4,
+ .checkentry = tcp_mt_check,
+ .match = tcp_mt,
+ .matchsize = sizeof(struct xt_tcp),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "tcp",
+ .family = NFPROTO_IPV6,
+ .checkentry = tcp_mt_check,
+ .match = tcp_mt,
+ .matchsize = sizeof(struct xt_tcp),
+ .proto = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udp",
+ .family = NFPROTO_IPV4,
+ .checkentry = udp_mt_check,
+ .match = udp_mt,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udp",
+ .family = NFPROTO_IPV6,
+ .checkentry = udp_mt_check,
+ .match = udp_mt,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udplite",
+ .family = NFPROTO_IPV4,
+ .checkentry = udp_mt_check,
+ .match = udp_mt,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udplite",
+ .family = NFPROTO_IPV6,
+ .checkentry = udp_mt_check,
+ .match = udp_mt,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init tcpudp_mt_init(void)
+{
+ return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
+}
+
+static void __exit tcpudp_mt_exit(void)
+{
+ xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
+}
+
+module_init(tcpudp_mt_init);
+module_exit(tcpudp_mt_exit);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
new file mode 100644
index 0000000..29375ba
--- /dev/null
+++ b/net/netfilter/xt_time.c
@@ -0,0 +1,260 @@
+/*
+ * xt_time
+ * Copyright © CC Computer Consultants GmbH, 2007
+ * Contact: <jengelh@computergmbh.de>
+ *
+ * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
+ * This is a module which is used for time matching
+ * It is using some modified code from dietlibc (localtime() function)
+ * that you can find at http://www.fefe.de/dietlibc/
+ * This file is distributed under the terms of the GNU General Public
+ * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
+ */
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_time.h>
+
+struct xtm {
+ u_int8_t month; /* (1-12) */
+ u_int8_t monthday; /* (1-31) */
+ u_int8_t weekday; /* (1-7) */
+ u_int8_t hour; /* (0-23) */
+ u_int8_t minute; /* (0-59) */
+ u_int8_t second; /* (0-59) */
+ unsigned int dse;
+};
+
+extern struct timezone sys_tz; /* ouch */
+
+static const u_int16_t days_since_year[] = {
+ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
+};
+
+static const u_int16_t days_since_leapyear[] = {
+ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335,
+};
+
+/*
+ * Since time progresses forward, it is best to organize this array in reverse,
+ * to minimize lookup time.
+ */
+enum {
+ DSE_FIRST = 2039,
+};
+static const u_int16_t days_since_epoch[] = {
+ /* 2039 - 2030 */
+ 25202, 24837, 24472, 24106, 23741, 23376, 23011, 22645, 22280, 21915,
+ /* 2029 - 2020 */
+ 21550, 21184, 20819, 20454, 20089, 19723, 19358, 18993, 18628, 18262,
+ /* 2019 - 2010 */
+ 17897, 17532, 17167, 16801, 16436, 16071, 15706, 15340, 14975, 14610,
+ /* 2009 - 2000 */
+ 14245, 13879, 13514, 13149, 12784, 12418, 12053, 11688, 11323, 10957,
+ /* 1999 - 1990 */
+ 10592, 10227, 9862, 9496, 9131, 8766, 8401, 8035, 7670, 7305,
+ /* 1989 - 1980 */
+ 6940, 6574, 6209, 5844, 5479, 5113, 4748, 4383, 4018, 3652,
+ /* 1979 - 1970 */
+ 3287, 2922, 2557, 2191, 1826, 1461, 1096, 730, 365, 0,
+};
+
+static inline bool is_leap(unsigned int y)
+{
+ return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0);
+}
+
+/*
+ * Each network packet has a (nano)seconds-since-the-epoch (SSTE) timestamp.
+ * Since we match against days and daytime, the SSTE value needs to be
+ * computed back into human-readable dates.
+ *
+ * This is done in three separate functions so that the most expensive
+ * calculations are done last, in case a "simple match" can be found earlier.
+ */
+static inline unsigned int localtime_1(struct xtm *r, time_t time)
+{
+ unsigned int v, w;
+
+ /* Each day has 86400s, so finding the hour/minute is actually easy. */
+ v = time % 86400;
+ r->second = v % 60;
+ w = v / 60;
+ r->minute = w % 60;
+ r->hour = w / 60;
+ return v;
+}
+
+static inline void localtime_2(struct xtm *r, time_t time)
+{
+ /*
+ * Here comes the rest (weekday, monthday). First, divide the SSTE
+ * by seconds-per-day to get the number of _days_ since the epoch.
+ */
+ r->dse = time / 86400;
+
+ /*
+ * 1970-01-01 (w=0) was a Thursday (4).
+ * -1 and +1 map Sunday properly onto 7.
+ */
+ r->weekday = (4 + r->dse - 1) % 7 + 1;
+}
+
+static void localtime_3(struct xtm *r, time_t time)
+{
+ unsigned int year, i, w = r->dse;
+
+ /*
+ * In each year, a certain number of days-since-the-epoch have passed.
+ * Find the year that is closest to said days.
+ *
+ * Consider, for example, w=21612 (2029-03-04). Loop will abort on
+ * dse[i] <= w, which happens when dse[i] == 21550. This implies
+ * year == 2009. w will then be 62.
+ */
+ for (i = 0, year = DSE_FIRST; days_since_epoch[i] > w;
+ ++i, --year)
+ /* just loop */;
+
+ w -= days_since_epoch[i];
+
+ /*
+ * By now we have the current year, and the day of the year.
+ * r->yearday = w;
+ *
+ * On to finding the month (like above). In each month, a certain
+ * number of days-since-New Year have passed, and find the closest
+ * one.
+ *
+ * Consider w=62 (in a non-leap year). Loop will abort on
+ * dsy[i] < w, which happens when dsy[i] == 31+28 (i == 2).
+ * Concludes i == 2, i.e. 3rd month => March.
+ *
+ * (A different approach to use would be to subtract a monthlength
+ * from w repeatedly while counting.)
+ */
+ if (is_leap(year)) {
+ /* use days_since_leapyear[] in a leap year */
+ for (i = ARRAY_SIZE(days_since_leapyear) - 1;
+ i > 0 && days_since_leapyear[i] > w; --i)
+ /* just loop */;
+ r->monthday = w - days_since_leapyear[i] + 1;
+ } else {
+ for (i = ARRAY_SIZE(days_since_year) - 1;
+ i > 0 && days_since_year[i] > w; --i)
+ /* just loop */;
+ r->monthday = w - days_since_year[i] + 1;
+ }
+
+ r->month = i + 1;
+ return;
+}
+
+static bool
+time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_time_info *info = par->matchinfo;
+ unsigned int packet_time;
+ struct xtm current_time;
+ s64 stamp;
+
+ /*
+ * We cannot use get_seconds() instead of __net_timestamp() here.
+ * Suppose you have two rules:
+ * 1. match before 13:00
+ * 2. match after 13:00
+ * If you match against processing time (get_seconds) it
+ * may happen that the same packet matches both rules if
+ * it arrived at the right moment before 13:00.
+ */
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp((struct sk_buff *)skb);
+
+ stamp = ktime_to_ns(skb->tstamp);
+ stamp = div_s64(stamp, NSEC_PER_SEC);
+
+ if (info->flags & XT_TIME_LOCAL_TZ)
+ /* Adjust for local timezone */
+ stamp -= 60 * sys_tz.tz_minuteswest;
+
+ /*
+ * xt_time will match when _all_ of the following hold:
+ * - 'now' is in the global time range date_start..date_end
+ * - 'now' is in the monthday mask
+ * - 'now' is in the weekday mask
+ * - 'now' is in the daytime range time_start..time_end
+ * (and by default, libxt_time will set these so as to match)
+ */
+
+ if (stamp < info->date_start || stamp > info->date_stop)
+ return false;
+
+ packet_time = localtime_1(&current_time, stamp);
+
+ if (info->daytime_start < info->daytime_stop) {
+ if (packet_time < info->daytime_start ||
+ packet_time > info->daytime_stop)
+ return false;
+ } else {
+ if (packet_time < info->daytime_start &&
+ packet_time > info->daytime_stop)
+ return false;
+ }
+
+ localtime_2(&current_time, stamp);
+
+ if (!(info->weekdays_match & (1 << current_time.weekday)))
+ return false;
+
+ /* Do not spend time computing monthday if all days match anyway */
+ if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) {
+ localtime_3(&current_time, stamp);
+ if (!(info->monthdays_match & (1 << current_time.monthday)))
+ return false;
+ }
+
+ return true;
+}
+
+static bool time_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_time_info *info = par->matchinfo;
+
+ if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
+ info->daytime_stop > XT_TIME_MAX_DAYTIME) {
+ printk(KERN_WARNING "xt_time: invalid argument - start or "
+ "stop time greater than 23:59:59\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct xt_match xt_time_mt_reg __read_mostly = {
+ .name = "time",
+ .family = NFPROTO_UNSPEC,
+ .match = time_mt,
+ .checkentry = time_mt_check,
+ .matchsize = sizeof(struct xt_time_info),
+ .me = THIS_MODULE,
+};
+
+static int __init time_mt_init(void)
+{
+ return xt_register_match(&xt_time_mt_reg);
+}
+
+static void __exit time_mt_exit(void)
+{
+ xt_unregister_match(&xt_time_mt_reg);
+}
+
+module_init(time_mt_init);
+module_exit(time_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: time-based matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_time");
+MODULE_ALIAS("ip6t_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
new file mode 100644
index 0000000..24a5276
--- /dev/null
+++ b/net/netfilter/xt_u32.c
@@ -0,0 +1,124 @@
+/*
+ * xt_u32 - kernel module to match u32 packet content
+ *
+ * Original author: Don Cohen <don@isis.cs3-inc.com>
+ * (C) CC Computer Consultants GmbH, 2007
+ * Contact: <jengelh@computergmbh.de>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_u32.h>
+
+static bool u32_match_it(const struct xt_u32 *data,
+ const struct sk_buff *skb)
+{
+ const struct xt_u32_test *ct;
+ unsigned int testind;
+ unsigned int nnums;
+ unsigned int nvals;
+ unsigned int i;
+ __be32 n;
+ u_int32_t pos;
+ u_int32_t val;
+ u_int32_t at;
+
+ /*
+ * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17"
+ * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands.
+ */
+ for (testind = 0; testind < data->ntests; ++testind) {
+ ct = &data->tests[testind];
+ at = 0;
+ pos = ct->location[0].number;
+
+ if (skb->len < 4 || pos > skb->len - 4)
+ return false;
+
+ if (skb_copy_bits(skb, pos, &n, sizeof(n)) < 0)
+ BUG();
+ val = ntohl(n);
+ nnums = ct->nnums;
+
+ /* Inner loop runs over "&", "<<", ">>" and "@" operands */
+ for (i = 1; i < nnums; ++i) {
+ u_int32_t number = ct->location[i].number;
+ switch (ct->location[i].nextop) {
+ case XT_U32_AND:
+ val &= number;
+ break;
+ case XT_U32_LEFTSH:
+ val <<= number;
+ break;
+ case XT_U32_RIGHTSH:
+ val >>= number;
+ break;
+ case XT_U32_AT:
+ if (at + val < at)
+ return false;
+ at += val;
+ pos = number;
+ if (at + 4 < at || skb->len < at + 4 ||
+ pos > skb->len - at - 4)
+ return false;
+
+ if (skb_copy_bits(skb, at + pos, &n,
+ sizeof(n)) < 0)
+ BUG();
+ val = ntohl(n);
+ break;
+ }
+ }
+
+ /* Run over the "," and ":" operands */
+ nvals = ct->nvalues;
+ for (i = 0; i < nvals; ++i)
+ if (ct->value[i].min <= val && val <= ct->value[i].max)
+ break;
+
+ if (i >= ct->nvalues)
+ return false;
+ }
+
+ return true;
+}
+
+static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ bool ret;
+
+ ret = u32_match_it(data, skb);
+ return ret ^ data->invert;
+}
+
+static struct xt_match xt_u32_mt_reg __read_mostly = {
+ .name = "u32",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = u32_mt,
+ .matchsize = sizeof(struct xt_u32),
+ .me = THIS_MODULE,
+};
+
+static int __init u32_mt_init(void)
+{
+ return xt_register_match(&xt_u32_mt_reg);
+}
+
+static void __exit u32_mt_exit(void)
+{
+ xt_unregister_match(&xt_u32_mt_reg);
+}
+
+module_init(u32_mt_init);
+module_exit(u32_mt_exit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_u32");
+MODULE_ALIAS("ip6t_u32");
OpenPOWER on IntegriCloud