summaryrefslogtreecommitdiffstats
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2017-08-23 14:45:25 -0500
commitfcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch)
tree22962a4387943edc841c72a4e636a068c66d58fd /net/ipv4/netfilter
downloadast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip
ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz
Initial import of modified Linux 2.6.28 tree
Original upstream URL: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig398
-rw-r--r--net/ipv4/netfilter/Makefile75
-rw-r--r--net/ipv4/netfilter/arp_tables.c1892
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c91
-rw-r--r--net/ipv4/netfilter/arptable_filter.c157
-rw-r--r--net/ipv4/netfilter/ip_queue.c645
-rw-r--r--net/ipv4/netfilter/ip_tables.c2285
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c744
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c141
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c492
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c183
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c96
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c110
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c220
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c97
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c448
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c132
-rw-r--r--net/ipv4/netfilter/ipt_ah.c97
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c129
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c63
-rw-r--r--net/ipv4/netfilter/iptable_filter.c190
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c251
-rw-r--r--net/ipv4/netfilter/iptable_raw.c144
-rw-r--r--net/ipv4/netfilter/iptable_security.c180
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c456
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c425
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c322
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c97
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c78
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c773
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c165
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c584
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c444
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c92
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c307
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c124
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c108
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c149
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c84
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c93
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c84
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c262
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c502
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1340
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c332
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c52
49 files changed, 16381 insertions, 0 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
new file mode 100644
index 0000000..3816e1d
--- /dev/null
+++ b/net/ipv4/netfilter/Kconfig
@@ -0,0 +1,398 @@
+#
+# IP netfilter configuration
+#
+
+menu "IP: Netfilter Configuration"
+ depends on INET && NETFILTER
+
+config NF_DEFRAG_IPV4
+ tristate
+ default n
+
+config NF_CONNTRACK_IPV4
+ tristate "IPv4 connection tracking support (required for NAT)"
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV4
+ ---help---
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is IPv4 support on Layer 3 independent connection tracking.
+ Layer 3 independent connection tracking is experimental scheme
+ which generalize ip_conntrack to support other layer 3 protocols.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_PROC_COMPAT
+ bool "proc/sysctl compatibility with old connection tracking"
+ depends on NF_CONNTRACK_IPV4
+ default y
+ help
+ This option enables /proc and sysctl compatibility with the old
+ layer 3 dependant connection tracking. This is needed to keep
+ old programs that have not been adapted to the new names working.
+
+ If unsure, say Y.
+
+config IP_NF_QUEUE
+ tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
+ depends on NETFILTER_ADVANCED
+ help
+ Netfilter has the ability to queue packets to user space: the
+ netlink device can be used to access them using this driver.
+
+ This option enables the old IPv4-only "ip_queue" implementation
+ which has been obsoleted by the new "nfnetlink_queue" code (see
+ CONFIG_NETFILTER_NETLINK_QUEUE).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_IPTABLES
+ tristate "IP tables support (required for filtering/masq/NAT)"
+ default m if NETFILTER_ADVANCED=n
+ select NETFILTER_XTABLES
+ help
+ iptables is a general, extensible packet identification framework.
+ The packet filtering and full NAT (masquerading, port forwarding,
+ etc) subsystems now use this: say `Y' or `M' here if you want to use
+ either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP_NF_IPTABLES
+
+# The matches.
+config IP_NF_MATCH_ADDRTYPE
+ tristate '"addrtype" address type match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option allows you to match what routing thinks of an address,
+ eg. UNICAST, LOCAL, BROADCAST, ...
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+config IP_NF_MATCH_AH
+ tristate '"ah" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This match extension allows you to match a range of SPIs
+ inside AH header of IPSec packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_ECN
+ tristate '"ecn" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `ECN' match, which allows you to match against
+ the IPv4 and TCP header ECN fields.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_TTL
+ tristate '"ttl" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
+ to match packets by their TTL value.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# `filter', generic and specific targets
+config IP_NF_FILTER
+ tristate "Packet filtering"
+ default m if NETFILTER_ADVANCED=n
+ help
+ Packet filtering defines a table `filter', which has a series of
+ rules for simple packet filtering at local input, forwarding and
+ local output. See the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_REJECT
+ tristate "REJECT target support"
+ depends on IP_NF_FILTER
+ default m if NETFILTER_ADVANCED=n
+ help
+ The REJECT target allows a filtering rule to specify that an ICMP
+ error should be issued in response to an incoming packet, rather
+ than silently being dropped.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_LOG
+ tristate "LOG target support"
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `LOG' target, which allows you to create rules in
+ any iptables table which records the packet header to the syslog.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_ULOG
+ tristate "ULOG target support"
+ default m if NETFILTER_ADVANCED=n
+ ---help---
+
+ This option enables the old IPv4-only "ipt_ULOG" implementation
+ which has been obsoleted by the new "nfnetlink_log" code (see
+ CONFIG_NETFILTER_NETLINK_LOG).
+
+ This option adds a `ULOG' target, which allows you to create rules in
+ any iptables table. The packet is passed to a userspace logging
+ daemon using netlink multicast sockets; unlike the LOG target
+ which can only be viewed through syslog.
+
+ The appropriate userspace logging daemon (ulogd) may be obtained from
+ <http://www.gnumonks.org/projects/ulogd/>
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# NAT + specific targets: nf_conntrack
+config NF_NAT
+ tristate "Full NAT"
+ depends on NF_CONNTRACK_IPV4
+ default m if NETFILTER_ADVANCED=n
+ help
+ The Full NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in iptables: see the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_NAT_NEEDED
+ bool
+ depends on NF_NAT
+ default y
+
+config IP_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ depends on NF_NAT
+ default m if NETFILTER_ADVANCED=n
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_NETMAP
+ tristate "NETMAP target support"
+ depends on NF_NAT
+ depends on NETFILTER_ADVANCED
+ help
+ NETMAP is an implementation of static 1:1 NAT mapping of network
+ addresses. It maps the network address part, while keeping the host
+ address part intact.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_REDIRECT
+ tristate "REDIRECT target support"
+ depends on NF_NAT
+ depends on NETFILTER_ADVANCED
+ help
+ REDIRECT is a special case of NAT: all incoming connections are
+ mapped onto the incoming interface's address, causing the packets to
+ come to the local machine instead of passing through. This is
+ useful for transparent proxies.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support"
+ depends on NF_NAT
+ depends on NETFILTER_ADVANCED
+ ---help---
+
+ This module implements an Application Layer Gateway (ALG) for
+ SNMP payloads. In conjunction with NAT, it allows a network
+ management system to access multiple private networks with
+ conflicting addresses. It works by modifying IP addresses
+ inside SNMP payloads to match IP-layer NAT mapping.
+
+ This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
+# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.
+# From kconfig-language.txt:
+#
+# <expr> '&&' <expr> (6)
+#
+# (6) Returns the result of min(/expr/, /expr/).
+config NF_NAT_PROTO_DCCP
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_DCCP
+ default NF_NAT && NF_CT_PROTO_DCCP
+
+config NF_NAT_PROTO_GRE
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_GRE
+
+config NF_NAT_PROTO_UDPLITE
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_UDPLITE
+ default NF_NAT && NF_CT_PROTO_UDPLITE
+
+config NF_NAT_PROTO_SCTP
+ tristate
+ default NF_NAT && NF_CT_PROTO_SCTP
+ depends on NF_NAT && NF_CT_PROTO_SCTP
+ select LIBCRC32C
+
+config NF_NAT_FTP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_FTP
+
+config NF_NAT_IRC
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_IRC
+
+config NF_NAT_TFTP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_TFTP
+
+config NF_NAT_AMANDA
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_AMANDA
+
+config NF_NAT_PPTP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_PPTP
+ select NF_NAT_PROTO_GRE
+
+config NF_NAT_H323
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_H323
+
+config NF_NAT_SIP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_SIP
+
+# mangle + specific targets
+config IP_NF_MANGLE
+ tristate "Packet mangling"
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `mangle' table to iptables: see the man page for
+ iptables(8). This table is used for various packet alterations
+ which can effect how the packet is routed.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_CLUSTERIP
+ tristate "CLUSTERIP target support (EXPERIMENTAL)"
+ depends on IP_NF_MANGLE && EXPERIMENTAL
+ depends on NF_CONNTRACK_IPV4
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_MARK
+ help
+ The CLUSTERIP target allows you to build load-balancing clusters of
+ network servers without having a dedicated load-balancing
+ router/server/switch.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_ECN
+ tristate "ECN target support"
+ depends on IP_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds a `ECN' target, which can be used in the iptables mangle
+ table.
+
+ You can use this target to remove the ECN bits from the IPv4 header of
+ an IP packet. This is particularly useful, if you need to work around
+ existing ECN blackholes on the internet, but don't want to disable
+ ECN support in general.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_TTL
+ tristate 'TTL target support'
+ depends on IP_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `TTL' target, which enables the user to modify
+ the TTL value of the IP header.
+
+ While it is safe to decrement/lower the TTL, this target also enables
+ functionality to increment and set the TTL value of the IP header to
+ arbitrary values. This is EXTREMELY DANGEROUS since you can easily
+ create immortal packets that loop forever on the network.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# raw + specific targets
+config IP_NF_RAW
+ tristate 'raw table support (required for NOTRACK/TRACE)'
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `raw' table to iptables. This table is the very
+ first in the netfilter framework and hooks in at the PREROUTING
+ and OUTPUT chains.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+# security table for MAC policy
+config IP_NF_SECURITY
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
+endif # IP_NF_IPTABLES
+
+# ARP tables
+config IP_NF_ARPTABLES
+ tristate "ARP tables support"
+ select NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
+ help
+ arptables is a general, extensible packet identification framework.
+ The ARP packet filtering and mangling (manipulation)subsystems
+ use this: say Y or M here if you want to use either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP_NF_ARPTABLES
+
+config IP_NF_ARPFILTER
+ tristate "ARP packet filtering"
+ help
+ ARP packet filtering defines a table `filter', which has a series of
+ rules for simple ARP packet filtering at local input and
+ local output. On a bridge, you can also specify filtering rules
+ for forwarded ARP packets. See the man page for arptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_ARP_MANGLE
+ tristate "ARP payload mangling"
+ help
+ Allows altering the ARP packet payload: source and destination
+ hardware and network addresses.
+
+endif # IP_NF_ARPTABLES
+
+endmenu
+
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
new file mode 100644
index 0000000..5f9b650
--- /dev/null
+++ b/net/ipv4/netfilter/Makefile
@@ -0,0 +1,75 @@
+#
+# Makefile for the netfilter modules on top of IPv4.
+#
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
+ifeq ($(CONFIG_PROC_FS),y)
+nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
+endif
+endif
+
+nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
+iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
+
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
+
+obj-$(CONFIG_NF_NAT) += nf_nat.o
+
+# defrag
+obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+
+# NAT helpers (nf_conntrack)
+obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
+obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
+obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
+obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
+obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
+obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
+
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
+obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+
+# generic IP tables
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
+obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
+
+# generic ARP tables
+obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
+obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+
+# just filtering instance of ARP tables for now
+obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
new file mode 100644
index 0000000..8d70d29
--- /dev/null
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -0,0 +1,1892 @@
+/*
+ * Packet matching code for ARP packets.
+ *
+ * Based heavily, if not almost entirely, upon ip_tables.c framework.
+ *
+ * Some ARP specific bits are:
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/capability.h>
+#include <linux/if_arp.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <net/compat.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables core");
+
+/*#define DEBUG_ARP_TABLES*/
+/*#define DEBUG_ARP_TABLES_USER*/
+
+#ifdef DEBUG_ARP_TABLES
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_ARP_TABLES_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define ARP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("ARP_NF_ASSERT: %s:%s:%u\n", \
+ __func__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define ARP_NF_ASSERT(x)
+#endif
+
+static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
+ const char *hdr_addr, int len)
+{
+ int i, ret;
+
+ if (len > ARPT_DEV_ADDR_LEN_MAX)
+ len = ARPT_DEV_ADDR_LEN_MAX;
+
+ ret = 0;
+ for (i = 0; i < len; i++)
+ ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
+
+ return (ret != 0);
+}
+
+/* Returns whether packet matches rule or not. */
+static inline int arp_packet_match(const struct arphdr *arphdr,
+ struct net_device *dev,
+ const char *indev,
+ const char *outdev,
+ const struct arpt_arp *arpinfo)
+{
+ const char *arpptr = (char *)(arphdr + 1);
+ const char *src_devaddr, *tgt_devaddr;
+ __be32 src_ipaddr, tgt_ipaddr;
+ int i, ret;
+
+#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
+
+ if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
+ ARPT_INV_ARPOP)) {
+ dprintf("ARP operation field mismatch.\n");
+ dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
+ arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
+ ARPT_INV_ARPHRD)) {
+ dprintf("ARP hardware address format mismatch.\n");
+ dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
+ arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
+ ARPT_INV_ARPPRO)) {
+ dprintf("ARP protocol address format mismatch.\n");
+ dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
+ arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
+ ARPT_INV_ARPHLN)) {
+ dprintf("ARP hardware address length mismatch.\n");
+ dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
+ arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+ return 0;
+ }
+
+ src_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&src_ipaddr, arpptr, sizeof(u32));
+ arpptr += sizeof(u32);
+ tgt_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
+
+ if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
+ ARPT_INV_SRCDEVADDR) ||
+ FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
+ ARPT_INV_TGTDEVADDR)) {
+ dprintf("Source or target device address mismatch.\n");
+
+ return 0;
+ }
+
+ if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
+ ARPT_INV_SRCIP) ||
+ FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
+ ARPT_INV_TGTIP)) {
+ dprintf("Source or target IP address mismatch.\n");
+
+ dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(src_ipaddr),
+ NIPQUAD(arpinfo->smsk.s_addr),
+ NIPQUAD(arpinfo->src.s_addr),
+ arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(tgt_ipaddr),
+ NIPQUAD(arpinfo->tmsk.s_addr),
+ NIPQUAD(arpinfo->tgt.s_addr),
+ arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+ return 0;
+ }
+
+ /* Look for ifname matches. */
+ for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+ ret |= (indev[i] ^ arpinfo->iniface[i])
+ & arpinfo->iniface_mask[i];
+ }
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, arpinfo->iniface,
+ arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+ ret |= (outdev[i] ^ arpinfo->outiface[i])
+ & arpinfo->outiface_mask[i];
+ }
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, arpinfo->outiface,
+ arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+ return 1;
+#undef FWINV
+}
+
+static inline int arp_checkentry(const struct arpt_arp *arp)
+{
+ if (arp->flags & ~ARPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ arp->flags & ~ARPT_F_MASK);
+ return 0;
+ }
+ if (arp->invflags & ~ARPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ arp->invflags & ~ARPT_INV_MASK);
+ return 0;
+ }
+
+ return 1;
+}
+
+static unsigned int
+arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ if (net_ratelimit())
+ printk("arp_tables: error: '%s'\n",
+ (const char *)par->targinfo);
+
+ return NF_DROP;
+}
+
+static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+{
+ return (struct arpt_entry *)(base + offset);
+}
+
+unsigned int arpt_do_table(struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct xt_table *table)
+{
+ static const char nulldevname[IFNAMSIZ];
+ unsigned int verdict = NF_DROP;
+ const struct arphdr *arp;
+ bool hotdrop = false;
+ struct arpt_entry *e, *back;
+ const char *indev, *outdev;
+ void *table_base;
+ const struct xt_table_info *private;
+ struct xt_target_param tgpar;
+
+ if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+ return NF_DROP;
+
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+
+ read_lock_bh(&table->lock);
+ private = table->private;
+ table_base = (void *)private->entries[smp_processor_id()];
+ e = get_entry(table_base, private->hook_entry[hook]);
+ back = get_entry(table_base, private->underflow[hook]);
+
+ tgpar.in = in;
+ tgpar.out = out;
+ tgpar.hooknum = hook;
+ tgpar.family = NFPROTO_ARP;
+
+ arp = arp_hdr(skb);
+ do {
+ if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ struct arpt_entry_target *t;
+ int hdr_len;
+
+ hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+ (2 * skb->dev->addr_len);
+ ADD_COUNTER(e->counters, hdr_len, 1);
+
+ t = arpt_get_target(e);
+
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct arpt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != ARPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != (void *)e + e->next_offset) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom =
+ (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb,
+ &tgpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == ARPT_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+ read_unlock_bh(&table->lock);
+
+ if (hotdrop)
+ return NF_DROP;
+ else
+ return verdict;
+}
+
+/* All zeroes == unconditional rule. */
+static inline int unconditional(const struct arpt_arp *arp)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
+ if (((__u32 *)arp)[i])
+ return 0;
+
+ return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ * there are loops. Puts hook bitmask in comefrom.
+ */
+static int mark_source_chains(struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ * to 0 as we leave), and comefrom to save source hook bitmask.
+ */
+ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct arpt_entry *e
+ = (struct arpt_entry *)(entry0 + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ const struct arpt_standard_target *t
+ = (void *)arpt_get_target(e);
+ int visited = e->comefrom & (1 << hook);
+
+ if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
+ printk("arptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if ((e->target_offset == sizeof(struct arpt_entry)
+ && (strcmp(t->target.u.user.name,
+ ARPT_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->arp)) || visited) {
+ unsigned int oldpos, size;
+
+ if (t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
+ /* Return: backtrack through the last
+ * big jump.
+ */
+ do {
+ e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct arpt_entry *)
+ (entry0 + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct arpt_entry *)
+ (entry0 + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ ARPT_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct arpt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct arpt_entry *)
+ (entry0 + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int check_entry(struct arpt_entry *e, const char *name)
+{
+ const struct arpt_entry_target *t;
+
+ if (!arp_checkentry(&e->arp)) {
+ duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
+ return -EINVAL;
+
+ t = arpt_get_target(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int check_target(struct arpt_entry *e, const char *name)
+{
+ struct arpt_entry_target *t = arpt_get_target(e);
+ int ret;
+ struct xt_tgchk_param par = {
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_ARP,
+ };
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+ if (ret < 0) {
+ duprintf("arp_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static inline int
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct arpt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
+
+ t = arpt_get_target(e);
+ target = try_then_request_module(xt_find_target(NFPROTO_ARP,
+ t->u.user.name,
+ t->u.user.revision),
+ "arpt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
+ goto out;
+ }
+ t->u.kernel.target = target;
+
+ ret = check_target(e, name);
+ if (ret)
+ goto err;
+
+ (*i)++;
+ return 0;
+err:
+ module_put(t->u.kernel.target->me);
+out:
+ return ret;
+}
+
+static inline int check_entry_size_and_hooks(struct arpt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
+ || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not ARPT_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct xt_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+{
+ struct xt_tgdtor_param par;
+ struct arpt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ t = arpt_get_target(e);
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_ARP;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ * newinfo).
+ */
+static int translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info *newinfo,
+ void *entry0,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+
+ /* Walk through entries, checking offsets. */
+ ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ entry0,
+ entry0 + size,
+ hook_entries, underflows, &i);
+ duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
+ duprintf("Looping hook\n");
+ return -ELOOP;
+ }
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
+ find_check_entry, name, size, &i);
+
+ if (ret != 0) {
+ ARPT_ENTRY_ITERATE(entry0, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i) {
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
+ }
+
+ return ret;
+}
+
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+ struct xt_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static inline int set_entry_to_counter(const struct arpt_entry *e,
+ struct xt_counters total[],
+ unsigned int *i)
+{
+ SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+ unsigned int curcpu;
+
+ /* Instead of clearing (by a previous call to memset())
+ * the counters and using adds, we set the counters
+ * with data used by 'current' CPU
+ * We dont care about preemption here.
+ */
+ curcpu = raw_smp_processor_id();
+
+ i = 0;
+ ARPT_ENTRY_ITERATE(t->entries[curcpu],
+ t->size,
+ set_entry_to_counter,
+ counters,
+ &i);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == curcpu)
+ continue;
+ i = 0;
+ ARPT_ENTRY_ITERATE(t->entries[cpu],
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static inline struct xt_counters *alloc_counters(struct xt_table *table)
+{
+ unsigned int countersize;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ * (other than comefrom, which userspace doesn't care
+ * about).
+ */
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vmalloc_node(countersize, numa_node_id());
+
+ if (counters == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* First, sum counters... */
+ write_lock_bh(&table->lock);
+ get_counters(private, counters);
+ write_unlock_bh(&table->lock);
+
+ return counters;
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+ struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ struct arpt_entry *e;
+ struct xt_counters *counters;
+ struct xt_table_info *private = table->private;
+ int ret = 0;
+ void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ /* ... then copy entire thing ... */
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ struct arpt_entry_target *t;
+
+ e = (struct arpt_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct arpt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ t = arpt_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct arpt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(NFPROTO_ARP, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(NFPROTO_ARP, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(struct arpt_entry *e,
+ const struct xt_table_info *info,
+ void *base, struct xt_table_info *newinfo)
+{
+ struct arpt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - base;
+
+ t = arpt_get_target(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct arpt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct arpt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ void *loc_cpu_entry;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
+ compat_calc_entry, info, loc_cpu_entry,
+ newinfo);
+}
+#endif
+
+static int get_info(struct net *net, void __user *user, int *len, int compat)
+{
+ char name[ARPT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct arpt_getinfo)) {
+ duprintf("length %u != %Zu\n", *len,
+ sizeof(struct arpt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(NFPROTO_ARP);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct arpt_getinfo info;
+ const struct xt_table_info *private = t->private;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ struct xt_table_info tmp;
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ private = &tmp;
+ }
+#endif
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(NFPROTO_ARP);
+#endif
+ return ret;
+}
+
+static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct arpt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %Zu\n", *len,
+ sizeof(struct arpt_get_entries) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (t && !IS_ERR(t)) {
+ const struct xt_table_info *private = t->private;
+
+ duprintf("t->private->number = %u\n",
+ private->number);
+ if (get.size == private->size)
+ ret = copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int __do_replace(struct net *net, const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info *newinfo,
+ unsigned int num_counters,
+ void __user *counters_ptr)
+{
+ int ret;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_old_entry;
+
+ ret = 0;
+ counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
+ numa_node_id());
+ if (!counters) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
+ NULL);
+
+ xt_free_table_info(oldinfo);
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ xt_table_unlock(t);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ xt_table_unlock(t);
+ free_newinfo_counters_untrans:
+ vfree(counters);
+ out:
+ return ret;
+}
+
+static int do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("arp_tables: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK.
+ */
+static inline int add_counter_to_entry(struct arpt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int do_add_counters(struct net *net, void __user *user, unsigned int len,
+ int compat)
+{
+ unsigned int i;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
+ int ret = 0;
+ void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
+
+ if (copy_from_user(ptmp, user, size) != 0)
+ return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc_node(len - size, numa_node_id());
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+ write_lock_bh(&t->lock);
+ private = t->private;
+ if (private->number != num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ /* Choose the copy that is on our node */
+ loc_cpu_entry = private->entries[smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_entry,
+ private->size,
+ add_counter_to_entry,
+ paddc,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ xt_table_unlock(t);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static inline int
+compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
+{
+ struct arpt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ t = compat_arpt_get_target(e);
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ unsigned char *base,
+ unsigned char *limit,
+ unsigned int *hook_entries,
+ unsigned int *underflows,
+ unsigned int *i,
+ const char *name)
+{
+ struct arpt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0
+ || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_arpt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct arpt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - (void *)base;
+
+ t = compat_arpt_get_target(e);
+ target = try_then_request_module(xt_find_target(NFPROTO_ARP,
+ t->u.user.name,
+ t->u.user.revision),
+ "arpt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
+ goto out;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ goto release_target;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+
+release_target:
+ module_put(t->u.kernel.target->me);
+out:
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct arpt_entry_target *t;
+ struct xt_target *target;
+ struct arpt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct arpt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct arpt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct arpt_entry);
+ *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_arpt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static inline int compat_check_entry(struct arpt_entry *e, const char *name,
+ unsigned int *i)
+{
+ int ret;
+
+ ret = check_target(e, name);
+ if (ret)
+ return ret;
+
+ (*i)++;
+ return 0;
+}
+
+static int translate_compat_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ unsigned int size;
+ int ret;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(NFPROTO_ARP);
+ /* Walk through entries, checking offsets. */
+ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
+ check_compat_entry_size_and_hooks,
+ info, &size, entry0,
+ entry0 + total_size,
+ hook_entries, underflows, &j, name);
+ if (ret != 0)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
+ compat_copy_entry_from_user,
+ &pos, &size, name, newinfo, entry1);
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
+ name, &i);
+ if (ret) {
+ j -= i;
+ COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
+ compat_release_entry, &j);
+ ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ goto out;
+}
+
+struct compat_arpt_replace {
+ char name[ARPT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
+static int compat_do_replace(struct net *net, void __user *user,
+ unsigned int len)
+{
+ int ret;
+ struct compat_arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = compat_do_replace(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 1);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
+ compat_uint_t *size,
+ struct xt_counters *counters,
+ unsigned int *i)
+{
+ struct arpt_entry_target *t;
+ struct compat_arpt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ int ret;
+
+ ret = -EFAULT;
+ origsize = *size;
+ ce = (struct compat_arpt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
+ goto out;
+
+ if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
+ goto out;
+
+ *dstptr += sizeof(struct compat_arpt_entry);
+ *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+ target_offset = e->target_offset - (origsize - *size);
+
+ t = arpt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ goto out;
+ ret = -EFAULT;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset))
+ goto out;
+ if (put_user(next_offset, &ce->next_offset))
+ goto out;
+
+ (*i)++;
+ return 0;
+out:
+ return ret;
+}
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+ struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ void *loc_cpu_entry;
+ unsigned int i = 0;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
+ compat_copy_entry_to_user,
+ &pos, &size, counters, &i);
+ vfree(counters);
+ return ret;
+}
+
+struct compat_arpt_get_entries {
+ char name[ARPT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_arpt_entry entrytable[0];
+};
+
+static int compat_get_entries(struct net *net,
+ struct compat_arpt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(NFPROTO_ARP);
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (t && !IS_ERR(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(NFPROTO_ARP);
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
+ int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case ARPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_arpt_get_ctl(sk, cmd, user, len);
+ }
+ return ret;
+}
+#endif
+
+static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case ARPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_GET_REVISION_TARGET: {
+ struct xt_get_revision rev;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+
+ try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
+ rev.revision, 1, &ret),
+ "arpt_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
+ const struct arpt_replace *repl)
+{
+ int ret;
+ struct xt_table_info *newinfo;
+ struct xt_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+ void *loc_cpu_entry;
+ struct xt_table *new_table;
+
+ newinfo = xt_alloc_table_info(repl->size);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, loc_cpu_entry, repl->size,
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+
+ duprintf("arpt_register_table: translate table gives %d\n", ret);
+ if (ret != 0)
+ goto out_free;
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
+ }
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
+}
+
+void arpt_unregister_table(struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
+ cleanup_entry, NULL);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct xt_target arpt_standard_target __read_mostly = {
+ .name = ARPT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_ARP,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+};
+
+static struct xt_target arpt_error_target __read_mostly = {
+ .name = ARPT_ERROR_TARGET,
+ .target = arpt_error,
+ .targetsize = ARPT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_ARP,
+};
+
+static struct nf_sockopt_ops arpt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = ARPT_BASE_CTL,
+ .set_optmax = ARPT_SO_SET_MAX+1,
+ .set = do_arpt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_arpt_set_ctl,
+#endif
+ .get_optmin = ARPT_BASE_CTL,
+ .get_optmax = ARPT_SO_GET_MAX+1,
+ .get = do_arpt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_arpt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static int __net_init arp_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_ARP);
+}
+
+static void __net_exit arp_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_ARP);
+}
+
+static struct pernet_operations arp_tables_net_ops = {
+ .init = arp_tables_net_init,
+ .exit = arp_tables_net_exit,
+};
+
+static int __init arp_tables_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&arp_tables_net_ops);
+ if (ret < 0)
+ goto err1;
+
+ /* Noone else will be downing sem now, so we won't sleep */
+ ret = xt_register_target(&arpt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&arpt_error_target);
+ if (ret < 0)
+ goto err3;
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&arpt_sockopts);
+ if (ret < 0)
+ goto err4;
+
+ printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n");
+ return 0;
+
+err4:
+ xt_unregister_target(&arpt_error_target);
+err3:
+ xt_unregister_target(&arpt_standard_target);
+err2:
+ unregister_pernet_subsys(&arp_tables_net_ops);
+err1:
+ return ret;
+}
+
+static void __exit arp_tables_fini(void)
+{
+ nf_unregister_sockopt(&arpt_sockopts);
+ xt_unregister_target(&arpt_error_target);
+ xt_unregister_target(&arpt_standard_target);
+ unregister_pernet_subsys(&arp_tables_net_ops);
+}
+
+EXPORT_SYMBOL(arpt_register_table);
+EXPORT_SYMBOL(arpt_unregister_table);
+EXPORT_SYMBOL(arpt_do_table);
+
+module_init(arp_tables_init);
+module_exit(arp_tables_fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
new file mode 100644
index 0000000..b0d5b1d
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -0,0 +1,91 @@
+/* module that allows mangling of the arp payload */
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp/arpt_mangle.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("arptables arp payload mangle target");
+
+static unsigned int
+target(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct arpt_mangle *mangle = par->targinfo;
+ const struct arphdr *arp;
+ unsigned char *arpptr;
+ int pln, hln;
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ arp = arp_hdr(skb);
+ arpptr = skb_network_header(skb) + sizeof(*arp);
+ pln = arp->ar_pln;
+ hln = arp->ar_hln;
+ /* We assume that pln and hln were checked in the match */
+ if (mangle->flags & ARPT_MANGLE_SDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, mangle->src_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_SIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_s.src_ip, pln);
+ }
+ arpptr += pln;
+ if (mangle->flags & ARPT_MANGLE_TDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, mangle->tgt_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_TIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
+ }
+ return mangle->target;
+}
+
+static bool checkentry(const struct xt_tgchk_param *par)
+{
+ const struct arpt_mangle *mangle = par->targinfo;
+
+ if (mangle->flags & ~ARPT_MANGLE_MASK ||
+ !(mangle->flags & ARPT_MANGLE_MASK))
+ return false;
+
+ if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
+ mangle->target != ARPT_CONTINUE)
+ return false;
+ return true;
+}
+
+static struct xt_target arpt_mangle_reg __read_mostly = {
+ .name = "mangle",
+ .family = NFPROTO_ARP,
+ .target = target,
+ .targetsize = sizeof(struct arpt_mangle),
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init arpt_mangle_init(void)
+{
+ return xt_register_target(&arpt_mangle_reg);
+}
+
+static void __exit arpt_mangle_fini(void)
+{
+ xt_unregister_target(&arpt_mangle_reg);
+}
+
+module_init(arpt_mangle_init);
+module_exit(arpt_mangle_fini);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
new file mode 100644
index 0000000..bee3d11
--- /dev/null
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -0,0 +1,157 @@
+/*
+ * Filtering ARP tables module.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
+ (1 << NF_ARP_FORWARD))
+
+static struct
+{
+ struct arpt_replace repl;
+ struct arpt_standard entries[3];
+ struct arpt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
+ .hook_entry = {
+ [NF_ARP_IN] = 0,
+ [NF_ARP_OUT] = sizeof(struct arpt_standard),
+ [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
+ },
+ .underflow = {
+ [NF_ARP_IN] = 0,
+ [NF_ARP_OUT] = sizeof(struct arpt_standard),
+ [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
+ },
+ },
+ .entries = {
+ ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */
+ ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */
+ ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */
+ },
+ .term = ARPT_ERROR_INIT,
+};
+
+static struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
+ .private = NULL,
+ .me = THIS_MODULE,
+ .af = NFPROTO_ARP,
+};
+
+/* The work comes in here from netfilter.c */
+static unsigned int arpt_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return arpt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.arptable_filter);
+}
+
+static unsigned int arpt_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return arpt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.arptable_filter);
+}
+
+static unsigned int arpt_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return arpt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.arptable_filter);
+}
+
+static struct nf_hook_ops arpt_ops[] __read_mostly = {
+ {
+ .hook = arpt_in_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_IN,
+ .priority = NF_IP_PRI_FILTER,
+ },
+ {
+ .hook = arpt_out_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = NF_IP_PRI_FILTER,
+ },
+ {
+ .hook = arpt_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_FORWARD,
+ .priority = NF_IP_PRI_FILTER,
+ },
+};
+
+static int __net_init arptable_filter_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.arptable_filter =
+ arpt_register_table(net, &packet_filter, &initial_table.repl);
+ if (IS_ERR(net->ipv4.arptable_filter))
+ return PTR_ERR(net->ipv4.arptable_filter);
+ return 0;
+}
+
+static void __net_exit arptable_filter_net_exit(struct net *net)
+{
+ arpt_unregister_table(net->ipv4.arptable_filter);
+}
+
+static struct pernet_operations arptable_filter_net_ops = {
+ .init = arptable_filter_net_init,
+ .exit = arptable_filter_net_exit,
+};
+
+static int __init arptable_filter_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&arptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+ return ret;
+}
+
+static void __exit arptable_filter_fini(void)
+{
+ nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+}
+
+module_init(arptable_filter_init);
+module_exit(arptable_filter_fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
new file mode 100644
index 0000000..432ce9d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -0,0 +1,645 @@
+/*
+ * This is a module which is used for queueing IPv4 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/security.h>
+#include <linux/mutex.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/ip.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
+
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
+
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
+static DEFINE_RWLOCK(queue_lock);
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl __read_mostly;
+static LIST_HEAD(queue_list);
+static DEFINE_MUTEX(ipqnl_mutex);
+
+static inline void
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
+{
+ list_add_tail(&entry->list, &queue_list);
+ queue_total++;
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status = 0;
+
+ switch(mode) {
+ case IPQ_COPY_NONE:
+ case IPQ_COPY_META:
+ copy_mode = mode;
+ copy_range = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ copy_mode = mode;
+ copy_range = range;
+ if (copy_range > 0xFFFF)
+ copy_range = 0xFFFF;
+ break;
+
+ default:
+ status = -EINVAL;
+
+ }
+ return status;
+}
+
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+
+static inline void
+__ipq_reset(void)
+{
+ peer_pid = 0;
+ net_disable_timestamp();
+ __ipq_set_mode(IPQ_COPY_NONE, 0);
+ __ipq_flush(NULL, 0);
+}
+
+static struct nf_queue_entry *
+ipq_find_dequeue_entry(unsigned long id)
+{
+ struct nf_queue_entry *entry = NULL, *i;
+
+ write_lock_bh(&queue_lock);
+
+ list_for_each_entry(i, &queue_list, list) {
+ if ((unsigned long)i == id) {
+ entry = i;
+ break;
+ }
+ }
+
+ if (entry) {
+ list_del(&entry->list);
+ queue_total--;
+ }
+
+ write_unlock_bh(&queue_lock);
+ return entry;
+}
+
+static void
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct nf_queue_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, &queue_list, list) {
+ if (!cmpfn || cmpfn(entry, data)) {
+ list_del(&entry->list);
+ queue_total--;
+ nf_reinject(entry, NF_DROP);
+ }
+ }
+}
+
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+ write_lock_bh(&queue_lock);
+ __ipq_flush(cmpfn, data);
+ write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+{
+ sk_buff_data_t old_tail;
+ size_t size = 0;
+ size_t data_len = 0;
+ struct sk_buff *skb;
+ struct ipq_packet_msg *pmsg;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+
+ read_lock_bh(&queue_lock);
+
+ switch (copy_mode) {
+ case IPQ_COPY_META:
+ case IPQ_COPY_NONE:
+ size = NLMSG_SPACE(sizeof(*pmsg));
+ break;
+
+ case IPQ_COPY_PACKET:
+ if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+ entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+ (*errp = skb_checksum_help(entry->skb))) {
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
+ if (copy_range == 0 || copy_range > entry->skb->len)
+ data_len = entry->skb->len;
+ else
+ data_len = copy_range;
+
+ size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+ break;
+
+ default:
+ *errp = -EINVAL;
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
+
+ read_unlock_bh(&queue_lock);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ goto nlmsg_failure;
+
+ old_tail = skb->tail;
+ nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+ pmsg = NLMSG_DATA(nlh);
+ memset(pmsg, 0, sizeof(*pmsg));
+
+ pmsg->packet_id = (unsigned long )entry;
+ pmsg->data_len = data_len;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
+ pmsg->mark = entry->skb->mark;
+ pmsg->hook = entry->hook;
+ pmsg->hw_protocol = entry->skb->protocol;
+
+ if (entry->indev)
+ strcpy(pmsg->indev_name, entry->indev->name);
+ else
+ pmsg->indev_name[0] = '\0';
+
+ if (entry->outdev)
+ strcpy(pmsg->outdev_name, entry->outdev->name);
+ else
+ pmsg->outdev_name[0] = '\0';
+
+ if (entry->indev && entry->skb->dev) {
+ pmsg->hw_type = entry->skb->dev->type;
+ pmsg->hw_addrlen = dev_parse_header(entry->skb,
+ pmsg->hw_addr);
+ }
+
+ if (data_len)
+ if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+ BUG();
+
+ nlh->nlmsg_len = skb->tail - old_tail;
+ return skb;
+
+nlmsg_failure:
+ *errp = -EINVAL;
+ printk(KERN_ERR "ip_queue: error creating packet message\n");
+ return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+ int status = -EINVAL;
+ struct sk_buff *nskb;
+
+ if (copy_mode == IPQ_COPY_NONE)
+ return -EAGAIN;
+
+ nskb = ipq_build_packet_message(entry, &status);
+ if (nskb == NULL)
+ return status;
+
+ write_lock_bh(&queue_lock);
+
+ if (!peer_pid)
+ goto err_out_free_nskb;
+
+ if (queue_total >= queue_maxlen) {
+ queue_dropped++;
+ status = -ENOSPC;
+ if (net_ratelimit())
+ printk (KERN_WARNING "ip_queue: full at %d entries, "
+ "dropping packets(s). Dropped: %d\n", queue_total,
+ queue_dropped);
+ goto err_out_free_nskb;
+ }
+
+ /* netlink_unicast will either free the nskb or attach it to a socket */
+ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+ if (status < 0) {
+ queue_user_dropped++;
+ goto err_out_unlock;
+ }
+
+ __ipq_enqueue_entry(entry);
+
+ write_unlock_bh(&queue_lock);
+ return status;
+
+err_out_free_nskb:
+ kfree_skb(nskb);
+
+err_out_unlock:
+ write_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
+{
+ int diff;
+ struct iphdr *user_iph = (struct iphdr *)v->payload;
+ struct sk_buff *nskb;
+
+ if (v->data_len < sizeof(*user_iph))
+ return 0;
+ diff = v->data_len - e->skb->len;
+ if (diff < 0) {
+ if (pskb_trim(e->skb, v->data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+ if (v->data_len > 0xFFFF)
+ return -EINVAL;
+ if (diff > skb_tailroom(e->skb)) {
+ nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+ diff, GFP_ATOMIC);
+ if (!nskb) {
+ printk(KERN_WARNING "ip_queue: error "
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
+ }
+ kfree_skb(e->skb);
+ e->skb = nskb;
+ }
+ skb_put(e->skb, diff);
+ }
+ if (!skb_make_writable(e->skb, v->data_len))
+ return -ENOMEM;
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
+ e->skb->ip_summed = CHECKSUM_NONE;
+
+ return 0;
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+ struct nf_queue_entry *entry;
+
+ if (vmsg->value > NF_MAX_VERDICT)
+ return -EINVAL;
+
+ entry = ipq_find_dequeue_entry(vmsg->id);
+ if (entry == NULL)
+ return -ENOENT;
+ else {
+ int verdict = vmsg->value;
+
+ if (vmsg->data_len && vmsg->data_len == len)
+ if (ipq_mangle_ipv4(vmsg, entry) < 0)
+ verdict = NF_DROP;
+
+ nf_reinject(entry, verdict);
+ return 0;
+ }
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status;
+
+ write_lock_bh(&queue_lock);
+ status = __ipq_set_mode(mode, range);
+ write_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+ unsigned char type, unsigned int len)
+{
+ int status = 0;
+
+ if (len < sizeof(*pmsg))
+ return -EINVAL;
+
+ switch (type) {
+ case IPQM_MODE:
+ status = ipq_set_mode(pmsg->msg.mode.value,
+ pmsg->msg.mode.range);
+ break;
+
+ case IPQM_VERDICT:
+ if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+ status = -EINVAL;
+ else
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
+ default:
+ status = -EINVAL;
+ }
+ return status;
+}
+
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+ if (entry->indev)
+ if (entry->indev->ifindex == ifindex)
+ return 1;
+ if (entry->outdev)
+ if (entry->outdev->ifindex == ifindex)
+ return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ if (entry->skb->nf_bridge->physindev &&
+ entry->skb->nf_bridge->physindev->ifindex == ifindex)
+ return 1;
+ if (entry->skb->nf_bridge->physoutdev &&
+ entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+ ipq_flush(dev_cmp, ifindex);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+__ipq_rcv_skb(struct sk_buff *skb)
+{
+ int status, type, pid, flags, nlmsglen, skblen;
+ struct nlmsghdr *nlh;
+
+ skblen = skb->len;
+ if (skblen < sizeof(*nlh))
+ return;
+
+ nlh = nlmsg_hdr(skb);
+ nlmsglen = nlh->nlmsg_len;
+ if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+ return;
+
+ pid = nlh->nlmsg_pid;
+ flags = nlh->nlmsg_flags;
+
+ if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (flags & MSG_TRUNC)
+ RCV_SKB_FAIL(-ECOMM);
+
+ type = nlh->nlmsg_type;
+ if (type < NLMSG_NOOP || type >= IPQM_MAX)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (type <= IPQM_BASE)
+ return;
+
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ RCV_SKB_FAIL(-EPERM);
+
+ write_lock_bh(&queue_lock);
+
+ if (peer_pid) {
+ if (peer_pid != pid) {
+ write_unlock_bh(&queue_lock);
+ RCV_SKB_FAIL(-EBUSY);
+ }
+ } else {
+ net_enable_timestamp();
+ peer_pid = pid;
+ }
+
+ write_unlock_bh(&queue_lock);
+
+ status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+ nlmsglen - NLMSG_LENGTH(0));
+ if (status < 0)
+ RCV_SKB_FAIL(status);
+
+ if (flags & NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+ return;
+}
+
+static void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+ mutex_lock(&ipqnl_mutex);
+ __ipq_rcv_skb(skb);
+ mutex_unlock(&ipqnl_mutex);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+ .notifier_call = ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_FIREWALL && n->pid) {
+ write_lock_bh(&queue_lock);
+ if ((n->net == &init_net) && (n->pid == peer_pid))
+ __ipq_reset();
+ write_unlock_bh(&queue_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+ .notifier_call = ipq_rcv_nl_event,
+};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+ {
+ .ctl_name = NET_IPQ_QMAX,
+ .procname = NET_IPQ_QMAX_NAME,
+ .data = &queue_maxlen,
+ .maxlen = sizeof(queue_maxlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+#ifdef CONFIG_PROC_FS
+static int ip_queue_show(struct seq_file *m, void *v)
+{
+ read_lock_bh(&queue_lock);
+
+ seq_printf(m,
+ "Peer PID : %d\n"
+ "Copy mode : %hu\n"
+ "Copy range : %u\n"
+ "Queue length : %u\n"
+ "Queue max. length : %u\n"
+ "Queue dropped : %u\n"
+ "Netlink dropped : %u\n",
+ peer_pid,
+ copy_mode,
+ copy_range,
+ queue_total,
+ queue_maxlen,
+ queue_dropped,
+ queue_user_dropped);
+
+ read_unlock_bh(&queue_lock);
+ return 0;
+}
+
+static int ip_queue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ip_queue_show, NULL);
+}
+
+static const struct file_operations ip_queue_proc_fops = {
+ .open = ip_queue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static const struct nf_queue_handler nfqh = {
+ .name = "ip_queue",
+ .outfn = &ipq_enqueue_packet,
+};
+
+static int __init ip_queue_init(void)
+{
+ int status = -ENOMEM;
+ struct proc_dir_entry *proc __maybe_unused;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+ ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
+ ipq_rcv_skb, NULL, THIS_MODULE);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
+ &ip_queue_proc_fops);
+ if (!proc) {
+ printk(KERN_ERR "ip_queue: failed to create proc entry\n");
+ goto cleanup_ipqnl;
+ }
+#endif
+ register_netdevice_notifier(&ipq_dev_notifier);
+#ifdef CONFIG_SYSCTL
+ ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
+#endif
+ status = nf_register_queue_handler(PF_INET, &nfqh);
+ if (status < 0) {
+ printk(KERN_ERR "ip_queue: failed to register queue handler\n");
+ goto cleanup_sysctl;
+ }
+ return status;
+
+cleanup_sysctl:
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ipq_sysctl_header);
+#endif
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+cleanup_ipqnl: __maybe_unused
+ netlink_kernel_release(ipqnl);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
+
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&ipq_nl_notifier);
+ return status;
+}
+
+static void __exit ip_queue_fini(void)
+{
+ nf_unregister_queue_handlers(&nfqh);
+ synchronize_net();
+ ipq_flush(NULL, 0);
+
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ipq_sysctl_header);
+#endif
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+ netlink_kernel_release(ipqnl);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
+
+ netlink_unregister_notifier(&ipq_nl_notifier);
+}
+
+MODULE_DESCRIPTION("IPv4 packet queue handler");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_LICENSE("GPL");
+
+module_init(ip_queue_init);
+module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
new file mode 100644
index 0000000..213fb27
--- /dev/null
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -0,0 +1,2285 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cache.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+#include <net/compat.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/err.h>
+#include <linux/cpumask.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_log.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv4 packet filter");
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("IP_NF_ASSERT: %s:%s:%u\n", \
+ __func__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/*
+ We keep a set of rules for each CPU, so we can avoid write-locking
+ them in the softirq when updating the counters and therefore
+ only need to read-lock in the softirq; doing a write_lock_bh() in user
+ context stops packets coming through and allows user context to read
+ the counters or update the rules.
+
+ Hence the start of any table is given by get_table() below. */
+
+/* Returns whether matches rule or not. */
+/* Performance critical - called for every packet */
+static inline bool
+ip_packet_match(const struct iphdr *ip,
+ const char *indev,
+ const char *outdev,
+ const struct ipt_ip *ipinfo,
+ int isfrag)
+{
+ size_t i;
+ unsigned long ret;
+
+#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
+
+ if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ IPT_INV_SRCIP)
+ || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+
+ dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->saddr),
+ NIPQUAD(ipinfo->smsk.s_addr),
+ NIPQUAD(ipinfo->src.s_addr),
+ ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+ NIPQUAD(ip->daddr),
+ NIPQUAD(ipinfo->dmsk.s_addr),
+ NIPQUAD(ipinfo->dst.s_addr),
+ ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+ return false;
+ }
+
+ /* Look for ifname matches; this should unroll nicely. */
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)indev)[i]
+ ^ ((const unsigned long *)ipinfo->iniface)[i])
+ & ((const unsigned long *)ipinfo->iniface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ipinfo->iniface,
+ ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+ return false;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)outdev)[i]
+ ^ ((const unsigned long *)ipinfo->outiface)[i])
+ & ((const unsigned long *)ipinfo->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ipinfo->outiface,
+ ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+ return false;
+ }
+
+ /* Check specific protocol */
+ if (ipinfo->proto
+ && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ dprintf("Packet protocol %hi does not match %hi.%s\n",
+ ip->protocol, ipinfo->proto,
+ ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+ return false;
+ }
+
+ /* If we have a fragment rule but the packet is not a fragment
+ * then we return zero */
+ if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
+ dprintf("Fragment rule but not fragment.%s\n",
+ ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+ip_checkentry(const struct ipt_ip *ip)
+{
+ if (ip->flags & ~IPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ip->flags & ~IPT_F_MASK);
+ return false;
+ }
+ if (ip->invflags & ~IPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ip->invflags & ~IPT_INV_MASK);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ if (net_ratelimit())
+ printk("ip_tables: error: `%s'\n",
+ (const char *)par->targinfo);
+
+ return NF_DROP;
+}
+
+/* Performance critical - called for every packet */
+static inline bool
+do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
+ struct xt_match_param *par)
+{
+ par->match = m->u.kernel.match;
+ par->matchinfo = m->data;
+
+ /* Stop iteration if it doesn't match */
+ if (!m->u.kernel.match->match(skb, par))
+ return true;
+ else
+ return false;
+}
+
+/* Performance critical */
+static inline struct ipt_entry *
+get_entry(void *base, unsigned int offset)
+{
+ return (struct ipt_entry *)(base + offset);
+}
+
+/* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+unconditional(const struct ipt_ip *ip)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
+ if (((__u32 *)ip)[i])
+ return 0;
+
+ return 1;
+#undef FWINV
+}
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+static const char *const hooknames[] = {
+ [NF_INET_PRE_ROUTING] = "PREROUTING",
+ [NF_INET_LOCAL_IN] = "INPUT",
+ [NF_INET_FORWARD] = "FORWARD",
+ [NF_INET_LOCAL_OUT] = "OUTPUT",
+ [NF_INET_POST_ROUTING] = "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+ NF_IP_TRACE_COMMENT_RULE,
+ NF_IP_TRACE_COMMENT_RETURN,
+ NF_IP_TRACE_COMMENT_POLICY,
+};
+
+static const char *const comments[] = {
+ [NF_IP_TRACE_COMMENT_RULE] = "rule",
+ [NF_IP_TRACE_COMMENT_RETURN] = "return",
+ [NF_IP_TRACE_COMMENT_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
+ char *hookname, char **chainname,
+ char **comment, unsigned int *rulenum)
+{
+ struct ipt_standard_target *t = (void *)ipt_get_target(s);
+
+ if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
+ /* Head of user chain: ERROR target with chainname */
+ *chainname = t->target.data;
+ (*rulenum) = 0;
+ } else if (s == e) {
+ (*rulenum)++;
+
+ if (s->target_offset == sizeof(struct ipt_entry)
+ && strcmp(t->target.u.kernel.target->name,
+ IPT_STANDARD_TARGET) == 0
+ && t->verdict < 0
+ && unconditional(&s->ip)) {
+ /* Tail of chains: STANDARD target (return/policy) */
+ *comment = *chainname == hookname
+ ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
+ : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+ }
+ return 1;
+ } else
+ (*rulenum)++;
+
+ return 0;
+}
+
+static void trace_packet(struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *tablename,
+ struct xt_table_info *private,
+ struct ipt_entry *e)
+{
+ void *table_base;
+ const struct ipt_entry *root;
+ char *hookname, *chainname, *comment;
+ unsigned int rulenum = 0;
+
+ table_base = (void *)private->entries[smp_processor_id()];
+ root = get_entry(table_base, private->hook_entry[hook]);
+
+ hookname = chainname = (char *)hooknames[hook];
+ comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+
+ IPT_ENTRY_ITERATE(root,
+ private->size - private->hook_entry[hook],
+ get_chainname_rulenum,
+ e, hookname, &chainname, &comment, &rulenum);
+
+ nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
+}
+#endif
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ipt_do_table(struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct xt_table *table)
+{
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+ const struct iphdr *ip;
+ u_int16_t datalen;
+ bool hotdrop = false;
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ void *table_base;
+ struct ipt_entry *e, *back;
+ struct xt_table_info *private;
+ struct xt_match_param mtpar;
+ struct xt_target_param tgpar;
+
+ /* Initialization */
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+ mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ mtpar.thoff = ip_hdrlen(skb);
+ mtpar.hotdrop = &hotdrop;
+ mtpar.in = tgpar.in = in;
+ mtpar.out = tgpar.out = out;
+ mtpar.family = tgpar.family = NFPROTO_IPV4;
+ tgpar.hooknum = hook;
+
+ read_lock_bh(&table->lock);
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ private = table->private;
+ table_base = (void *)private->entries[smp_processor_id()];
+ e = get_entry(table_base, private->hook_entry[hook]);
+
+ /* For return from builtin chain */
+ back = get_entry(table_base, private->underflow[hook]);
+
+ do {
+ IP_NF_ASSERT(e);
+ IP_NF_ASSERT(back);
+ if (ip_packet_match(ip, indev, outdev,
+ &e->ip, mtpar.fragoff)) {
+ struct ipt_entry_target *t;
+
+ if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
+ goto no_match;
+
+ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
+#endif
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ipt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v != (void *)e + e->next_offset
+ && !(e->ip.flags & IPT_F_GOTO)) {
+ /* Save old back ptr in next entry */
+ struct ipt_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom
+ = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ abs. verdicts */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
+ verdict = t->u.kernel.target->target(skb,
+ &tgpar);
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ipt_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
+
+ if (verdict == IPT_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+
+ no_match:
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+
+ read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ struct ipt_standard_target *t
+ = (void *)ipt_get_target(e);
+ int visited = e->comefrom & (1 << hook);
+
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
+ printk("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if ((e->target_offset == sizeof(struct ipt_entry)
+ && (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->ip)) || visited) {
+ unsigned int oldpos, size;
+
+ if (t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_INET_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_INET_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ipt_entry *)
+ (entry0 + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ipt_entry *)
+ (entry0 + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ IPT_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ipt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ipt_entry *)
+ (entry0 + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static int
+cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+{
+ struct xt_mtdtor_param par;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ par.match = m->u.kernel.match;
+ par.matchinfo = m->data;
+ par.family = NFPROTO_IPV4;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
+ module_put(par.match->me);
+ return 0;
+}
+
+static int
+check_entry(struct ipt_entry *e, const char *name)
+{
+ struct ipt_entry_target *t;
+
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ if (e->target_offset + sizeof(struct ipt_entry_target) >
+ e->next_offset)
+ return -EINVAL;
+
+ t = ipt_get_target(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
+ unsigned int *i)
+{
+ const struct ipt_ip *ip = par->entryinfo;
+ int ret;
+
+ par->match = m->u.kernel.match;
+ par->matchinfo = m->data;
+
+ ret = xt_check_match(par, m->u.match_size - sizeof(*m),
+ ip->proto, ip->invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ par.match->name);
+ return ret;
+ }
+ ++*i;
+ return 0;
+}
+
+static int
+find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
+ unsigned int *i)
+{
+ struct xt_match *match;
+ int ret;
+
+ match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
+ m->u.user.revision),
+ "ipt_%s", m->u.user.name);
+ if (IS_ERR(match) || !match) {
+ duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ return match ? PTR_ERR(match) : -ENOENT;
+ }
+ m->u.kernel.match = match;
+
+ ret = check_match(m, par, i);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
+}
+
+static int check_target(struct ipt_entry *e, const char *name)
+{
+ struct ipt_entry_target *t = ipt_get_target(e);
+ struct xt_tgchk_param par = {
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_IPV4,
+ };
+ int ret;
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct ipt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+ unsigned int j;
+ struct xt_mtchk_param mtpar;
+
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
+
+ j = 0;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
+ if (ret != 0)
+ goto cleanup_matches;
+
+ t = ipt_get_target(e);
+ target = try_then_request_module(xt_find_target(AF_INET,
+ t->u.user.name,
+ t->u.user.revision),
+ "ipt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
+ goto cleanup_matches;
+ }
+ t->u.kernel.target = target;
+
+ ret = check_target(e, name);
+ if (ret)
+ goto err;
+
+ (*i)++;
+ return 0;
+ err:
+ module_put(t->u.kernel.target->me);
+ cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
+}
+
+static int
+check_entry_size_and_hooks(struct ipt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
+ || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not IPT_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct xt_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static int
+cleanup_entry(struct ipt_entry *e, unsigned int *i)
+{
+ struct xt_tgdtor_param par;
+ struct ipt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+ t = ipt_get_target(e);
+
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_IPV4;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info *newinfo,
+ void *entry0,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ entry0,
+ entry0 + size,
+ hook_entries, underflows, &i);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks, entry0))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
+ find_check_entry, name, size, &i);
+
+ if (ret != 0) {
+ IPT_ENTRY_ITERATE(entry0, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i) {
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
+ }
+
+ return ret;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+ struct xt_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static inline int
+set_entry_to_counter(const struct ipt_entry *e,
+ struct ipt_counters total[],
+ unsigned int *i)
+{
+ SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void
+get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+ unsigned int curcpu;
+
+ /* Instead of clearing (by a previous call to memset())
+ * the counters and using adds, we set the counters
+ * with data used by 'current' CPU
+ * We dont care about preemption here.
+ */
+ curcpu = raw_smp_processor_id();
+
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries[curcpu],
+ t->size,
+ set_entry_to_counter,
+ counters,
+ &i);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == curcpu)
+ continue;
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries[cpu],
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static struct xt_counters * alloc_counters(struct xt_table *table)
+{
+ unsigned int countersize;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vmalloc_node(countersize, numa_node_id());
+
+ if (counters == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* First, sum counters... */
+ write_lock_bh(&table->lock);
+ get_counters(private, counters);
+ write_unlock_bh(&table->lock);
+
+ return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ struct ipt_entry *e;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ int ret = 0;
+ const void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ const struct ipt_entry_match *m;
+ const struct ipt_entry_target *t;
+
+ e = (struct ipt_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ipt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ipt_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct ipt_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ipt_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct ipt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(AF_INET, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(AF_INET, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static inline int
+compat_calc_match(struct ipt_entry_match *m, int *size)
+{
+ *size += xt_compat_match_offset(m->u.kernel.match);
+ return 0;
+}
+
+static int compat_calc_entry(struct ipt_entry *e,
+ const struct xt_table_info *info,
+ void *base, struct xt_table_info *newinfo)
+{
+ struct ipt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - base;
+ IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+ t = ipt_get_target(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct ipt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct ipt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ void *loc_cpu_entry;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
+ compat_calc_entry, info, loc_cpu_entry,
+ newinfo);
+}
+#endif
+
+static int get_info(struct net *net, void __user *user, int *len, int compat)
+{
+ char name[IPT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct ipt_getinfo)) {
+ duprintf("length %u != %zu\n", *len,
+ sizeof(struct ipt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(AF_INET);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct ipt_getinfo info;
+ const struct xt_table_info *private = t->private;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ struct xt_table_info tmp;
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(AF_INET);
+ private = &tmp;
+ }
+#endif
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(AF_INET);
+#endif
+ return ret;
+}
+
+static int
+get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
+{
+ int ret;
+ struct ipt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct ipt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (t && !IS_ERR(t)) {
+ const struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n", private->number);
+ if (get.size == private->size)
+ ret = copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_table_info *newinfo, unsigned int num_counters,
+ void __user *counters_ptr)
+{
+ int ret;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_old_entry;
+
+ ret = 0;
+ counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
+ NULL);
+ xt_free_table_info(oldinfo);
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ xt_table_unlock(t);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ xt_table_unlock(t);
+ free_newinfo_counters_untrans:
+ vfree(counters);
+ out:
+ return ret;
+}
+
+static int
+do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("ip_tables: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+#if 0
+ duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+ *i,
+ (long unsigned int)e->counters.pcnt,
+ (long unsigned int)e->counters.bcnt,
+ (long unsigned int)addme[*i].pcnt,
+ (long unsigned int)addme[*i].bcnt);
+#endif
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int
+do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
+{
+ unsigned int i;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
+ int ret = 0;
+ void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
+
+ if (copy_from_user(ptmp, user, size) != 0)
+ return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc_node(len - size, numa_node_id());
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = xt_find_table_lock(net, AF_INET, name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+ write_lock_bh(&t->lock);
+ private = t->private;
+ if (private->number != num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ /* Choose the copy that is on our node */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_entry,
+ private->size,
+ add_counter_to_entry,
+ paddc,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ xt_table_unlock(t);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+ char name[IPT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_INET_NUMHOOKS];
+ u32 underflow[NF_INET_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters; /* struct ipt_counters * */
+ struct compat_ipt_entry entries[0];
+};
+
+static int
+compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
+ unsigned int *size, struct xt_counters *counters,
+ unsigned int *i)
+{
+ struct ipt_entry_target *t;
+ struct compat_ipt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ int ret;
+
+ ret = -EFAULT;
+ origsize = *size;
+ ce = (struct compat_ipt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
+ goto out;
+
+ if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
+ goto out;
+
+ *dstptr += sizeof(struct compat_ipt_entry);
+ *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+ target_offset = e->target_offset - (origsize - *size);
+ if (ret)
+ goto out;
+ t = ipt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ goto out;
+ ret = -EFAULT;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset))
+ goto out;
+ if (put_user(next_offset, &ce->next_offset))
+ goto out;
+
+ (*i)++;
+ return 0;
+out:
+ return ret;
+}
+
+static int
+compat_find_calc_match(struct ipt_entry_match *m,
+ const char *name,
+ const struct ipt_ip *ip,
+ unsigned int hookmask,
+ int *size, unsigned int *i)
+{
+ struct xt_match *match;
+
+ match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
+ m->u.user.revision),
+ "ipt_%s", m->u.user.name);
+ if (IS_ERR(match) || !match) {
+ duprintf("compat_check_calc_match: `%s' not found\n",
+ m->u.user.name);
+ return match ? PTR_ERR(match) : -ENOENT;
+ }
+ m->u.kernel.match = match;
+ *size += xt_compat_match_offset(match);
+
+ (*i)++;
+ return 0;
+}
+
+static int
+compat_release_match(struct ipt_entry_match *m, unsigned int *i)
+{
+ if (i && (*i)-- == 0)
+ return 1;
+
+ module_put(m->u.kernel.match->me);
+ return 0;
+}
+
+static int
+compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
+{
+ struct ipt_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+ t = compat_ipt_get_target(e);
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+static int
+check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ unsigned char *base,
+ unsigned char *limit,
+ unsigned int *hook_entries,
+ unsigned int *underflows,
+ unsigned int *i,
+ const char *name)
+{
+ struct ipt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ unsigned int j;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
+ || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_ipt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct ipt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - (void *)base;
+ j = 0;
+ ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
+ &e->ip, e->comefrom, &off, &j);
+ if (ret != 0)
+ goto release_matches;
+
+ t = compat_ipt_get_target(e);
+ target = try_then_request_module(xt_find_target(AF_INET,
+ t->u.user.name,
+ t->u.user.revision),
+ "ipt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
+ goto release_matches;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ goto out;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+
+out:
+ module_put(t->u.kernel.target->me);
+release_matches:
+ IPT_MATCH_ITERATE(e, compat_release_match, &j);
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct ipt_entry_target *t;
+ struct xt_target *target;
+ struct ipt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct ipt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct ipt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct ipt_entry);
+ *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
+ dstptr, size);
+ if (ret)
+ return ret;
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_ipt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int
+compat_check_entry(struct ipt_entry *e, const char *name,
+ unsigned int *i)
+{
+ struct xt_mtchk_param mtpar;
+ unsigned int j;
+ int ret;
+
+ j = 0;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
+ if (ret)
+ goto cleanup_matches;
+
+ ret = check_target(e, name);
+ if (ret)
+ goto cleanup_matches;
+
+ (*i)++;
+ return 0;
+
+ cleanup_matches:
+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
+}
+
+static int
+translate_compat_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ unsigned int size;
+ int ret;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(AF_INET);
+ /* Walk through entries, checking offsets. */
+ ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
+ check_compat_entry_size_and_hooks,
+ info, &size, entry0,
+ entry0 + total_size,
+ hook_entries, underflows, &j, name);
+ if (ret != 0)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
+ compat_copy_entry_from_user,
+ &pos, &size, name, newinfo, entry1);
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
+ name, &i);
+ if (ret) {
+ j -= i;
+ COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
+ compat_release_entry, &j);
+ IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ goto out;
+}
+
+static int
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct compat_ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int
+compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = compat_do_replace(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 1);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct compat_ipt_get_entries {
+ char name[IPT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_ipt_entry entrytable[0];
+};
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ const void *loc_cpu_entry;
+ unsigned int i = 0;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
+ compat_copy_entry_to_user,
+ &pos, &size, counters, &i);
+
+ vfree(counters);
+ return ret;
+}
+
+static int
+compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_ipt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+
+ if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(AF_INET);
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (t && !IS_ERR(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ xt_compat_flush_offsets(AF_INET);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(AF_INET);
+ return ret;
+}
+
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case IPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
+ }
+ return ret;
+}
+#endif
+
+static int
+do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case IPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_GET_REVISION_MATCH:
+ case IPT_SO_GET_REVISION_TARGET: {
+ struct ipt_get_revision rev;
+ int target;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (cmd == IPT_SO_GET_REVISION_TARGET)
+ target = 1;
+ else
+ target = 0;
+
+ try_then_request_module(xt_find_revision(AF_INET, rev.name,
+ rev.revision,
+ target, &ret),
+ "ipt_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
+ const struct ipt_replace *repl)
+{
+ int ret;
+ struct xt_table_info *newinfo;
+ struct xt_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+ void *loc_cpu_entry;
+ struct xt_table *new_table;
+
+ newinfo = xt_alloc_table_info(repl->size);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* choose the copy on our node/cpu, but dont care about preemption */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, loc_cpu_entry, repl->size,
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+ if (ret != 0)
+ goto out_free;
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
+ }
+
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
+}
+
+void ipt_unregister_table(struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline bool
+icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+ u_int8_t type, u_int8_t code,
+ bool invert)
+{
+ return ((test_type == 0xFF) ||
+ (type == test_type && code >= min_code && code <= max_code))
+ ^ invert;
+}
+
+static bool
+icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct icmphdr *ic;
+ struct icmphdr _icmph;
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
+ if (ic == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ICMP tinygram.\n");
+ *par->hotdrop = true;
+ return false;
+ }
+
+ return icmp_type_code_match(icmpinfo->type,
+ icmpinfo->code[0],
+ icmpinfo->code[1],
+ ic->type, ic->code,
+ !!(icmpinfo->invflags&IPT_ICMP_INV));
+}
+
+static bool icmp_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return !(icmpinfo->invflags & ~IPT_ICMP_INV);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct xt_target ipt_standard_target __read_mostly = {
+ .name = IPT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = AF_INET,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+};
+
+static struct xt_target ipt_error_target __read_mostly = {
+ .name = IPT_ERROR_TARGET,
+ .target = ipt_error,
+ .targetsize = IPT_FUNCTION_MAXNAMELEN,
+ .family = AF_INET,
+};
+
+static struct nf_sockopt_ops ipt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = IPT_BASE_CTL,
+ .set_optmax = IPT_SO_SET_MAX+1,
+ .set = do_ipt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_ipt_set_ctl,
+#endif
+ .get_optmin = IPT_BASE_CTL,
+ .get_optmax = IPT_SO_GET_MAX+1,
+ .get = do_ipt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_ipt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static struct xt_match icmp_matchstruct __read_mostly = {
+ .name = "icmp",
+ .match = icmp_match,
+ .matchsize = sizeof(struct ipt_icmp),
+ .checkentry = icmp_checkentry,
+ .proto = IPPROTO_ICMP,
+ .family = AF_INET,
+};
+
+static int __net_init ip_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, AF_INET);
+}
+
+static void __net_exit ip_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, AF_INET);
+}
+
+static struct pernet_operations ip_tables_net_ops = {
+ .init = ip_tables_net_init,
+ .exit = ip_tables_net_exit,
+};
+
+static int __init ip_tables_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip_tables_net_ops);
+ if (ret < 0)
+ goto err1;
+
+ /* Noone else will be downing sem now, so we won't sleep */
+ ret = xt_register_target(&ipt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&ipt_error_target);
+ if (ret < 0)
+ goto err3;
+ ret = xt_register_match(&icmp_matchstruct);
+ if (ret < 0)
+ goto err4;
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&ipt_sockopts);
+ if (ret < 0)
+ goto err5;
+
+ printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+ return 0;
+
+err5:
+ xt_unregister_match(&icmp_matchstruct);
+err4:
+ xt_unregister_target(&ipt_error_target);
+err3:
+ xt_unregister_target(&ipt_standard_target);
+err2:
+ unregister_pernet_subsys(&ip_tables_net_ops);
+err1:
+ return ret;
+}
+
+static void __exit ip_tables_fini(void)
+{
+ nf_unregister_sockopt(&ipt_sockopts);
+
+ xt_unregister_match(&icmp_matchstruct);
+ xt_unregister_target(&ipt_error_target);
+ xt_unregister_target(&ipt_standard_target);
+
+ unregister_pernet_subsys(&ip_tables_net_ops);
+}
+
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_do_table);
+module_init(ip_tables_init);
+module_exit(ip_tables_fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
new file mode 100644
index 0000000..7ac1677
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -0,0 +1,744 @@
+/* Cluster IP hashmark target
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * based on ideas of Fabio Olive Leite <olive@unixforge.org>
+ *
+ * Development of this code funded by SuSE Linux AG, http://www.suse.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/if_arp.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/net_namespace.h>
+#include <net/checksum.h>
+
+#define CLUSTERIP_VERSION "0.8"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
+
+struct clusterip_config {
+ struct list_head list; /* list of all configs */
+ atomic_t refcount; /* reference count */
+ atomic_t entries; /* number of entries/rules
+ * referencing us */
+
+ __be32 clusterip; /* the IP address */
+ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
+ struct net_device *dev; /* device */
+ u_int16_t num_total_nodes; /* total number of nodes */
+ unsigned long local_nodes; /* node number array */
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pde; /* proc dir entry */
+#endif
+ enum clusterip_hashmode hash_mode; /* which hashing mode */
+ u_int32_t hash_initval; /* hash initialization */
+};
+
+static LIST_HEAD(clusterip_configs);
+
+/* clusterip_lock protects the clusterip_configs list */
+static DEFINE_RWLOCK(clusterip_lock);
+
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+static struct proc_dir_entry *clusterip_procdir;
+#endif
+
+static inline void
+clusterip_config_get(struct clusterip_config *c)
+{
+ atomic_inc(&c->refcount);
+}
+
+static inline void
+clusterip_config_put(struct clusterip_config *c)
+{
+ if (atomic_dec_and_test(&c->refcount))
+ kfree(c);
+}
+
+/* decrease the count of entries using/referencing this config. If last
+ * entry(rule) is removed, remove the config from lists, but don't free it
+ * yet, since proc-files could still be holding references */
+static inline void
+clusterip_config_entry_put(struct clusterip_config *c)
+{
+ write_lock_bh(&clusterip_lock);
+ if (atomic_dec_and_test(&c->entries)) {
+ list_del(&c->list);
+ write_unlock_bh(&clusterip_lock);
+
+ dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+ dev_put(c->dev);
+
+ /* In case anyone still accesses the file, the open/close
+ * functions are also incrementing the refcount on their own,
+ * so it's safe to remove the entry even if it's in use. */
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(c->pde->name, c->pde->parent);
+#endif
+ return;
+ }
+ write_unlock_bh(&clusterip_lock);
+}
+
+static struct clusterip_config *
+__clusterip_config_find(__be32 clusterip)
+{
+ struct clusterip_config *c;
+
+ list_for_each_entry(c, &clusterip_configs, list) {
+ if (c->clusterip == clusterip)
+ return c;
+ }
+
+ return NULL;
+}
+
+static inline struct clusterip_config *
+clusterip_config_find_get(__be32 clusterip, int entry)
+{
+ struct clusterip_config *c;
+
+ read_lock_bh(&clusterip_lock);
+ c = __clusterip_config_find(clusterip);
+ if (!c) {
+ read_unlock_bh(&clusterip_lock);
+ return NULL;
+ }
+ atomic_inc(&c->refcount);
+ if (entry)
+ atomic_inc(&c->entries);
+ read_unlock_bh(&clusterip_lock);
+
+ return c;
+}
+
+static void
+clusterip_config_init_nodelist(struct clusterip_config *c,
+ const struct ipt_clusterip_tgt_info *i)
+{
+ int n;
+
+ for (n = 0; n < i->num_local_nodes; n++)
+ set_bit(i->local_nodes[n] - 1, &c->local_nodes);
+}
+
+static struct clusterip_config *
+clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
+ struct net_device *dev)
+{
+ struct clusterip_config *c;
+
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return NULL;
+
+ c->dev = dev;
+ c->clusterip = ip;
+ memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ c->num_total_nodes = i->num_total_nodes;
+ clusterip_config_init_nodelist(c, i);
+ c->hash_mode = i->hash_mode;
+ c->hash_initval = i->hash_initval;
+ atomic_set(&c->refcount, 1);
+ atomic_set(&c->entries, 1);
+
+#ifdef CONFIG_PROC_FS
+ {
+ char buffer[16];
+
+ /* create proc dir entry */
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ clusterip_procdir,
+ &clusterip_proc_fops, c);
+ if (!c->pde) {
+ kfree(c);
+ return NULL;
+ }
+ }
+#endif
+
+ write_lock_bh(&clusterip_lock);
+ list_add(&c->list, &clusterip_configs);
+ write_unlock_bh(&clusterip_lock);
+
+ return c;
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
+ return 1;
+
+ /* check if we already have this number in our bitfield */
+ if (test_and_set_bit(nodenum - 1, &c->local_nodes))
+ return 1;
+
+ return 0;
+}
+
+static bool
+clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
+ return true;
+
+ if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
+ return false;
+
+ return true;
+}
+#endif
+
+static inline u_int32_t
+clusterip_hashfn(const struct sk_buff *skb,
+ const struct clusterip_config *config)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ unsigned long hashval;
+ u_int16_t sport, dport;
+ const u_int16_t *ports;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ICMP:
+ ports = (const void *)iph+iph->ihl*4;
+ sport = ports[0];
+ dport = ports[1];
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
+ iph->protocol);
+ sport = dport = 0;
+ }
+
+ switch (config->hash_mode) {
+ case CLUSTERIP_HASHMODE_SIP:
+ hashval = jhash_1word(ntohl(iph->saddr),
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT:
+ hashval = jhash_2words(ntohl(iph->saddr), sport,
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
+ hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
+ config->hash_initval);
+ break;
+ default:
+ /* to make gcc happy */
+ hashval = 0;
+ /* This cannot happen, unless the check function wasn't called
+ * at rule load time */
+ printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+ BUG();
+ break;
+ }
+
+ /* node numbers are 1..n, not 0..n */
+ return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
+}
+
+static inline int
+clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
+{
+ return test_bit(hash - 1, &config->local_nodes);
+}
+
+/***********************************************************************
+ * IPTABLES TARGET
+ ***********************************************************************/
+
+static unsigned int
+clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t hash;
+
+ /* don't need to clusterip_config_get() here, since refcount
+ * is only decremented by destroy() - and ip_tables guarantees
+ * that the ->target() function isn't called after ->destroy() */
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL) {
+ printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
+ /* FIXME: need to drop invalid ones, since replies
+ * to outgoing connections of other nodes will be
+ * marked as INVALID */
+ return NF_DROP;
+ }
+
+ /* special case: ICMP error handling. conntrack distinguishes between
+ * error messages (RELATED) and information requests (see below) */
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP
+ && (ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
+ return XT_CONTINUE;
+
+ /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ * on, which all have an ID field [relevant for hashing]. */
+
+ hash = clusterip_hashfn(skb, cipinfo->config);
+
+ switch (ctinfo) {
+ case IP_CT_NEW:
+ ct->mark = hash;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED+IP_CT_IS_REPLY:
+ /* FIXME: we don't handle expectations at the
+ * moment. they can arrive on a different node than
+ * the master connection (e.g. FTP passive mode) */
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+ break;
+ default:
+ break;
+ }
+
+#ifdef DEBUG
+ nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+#endif
+ pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
+ if (!clusterip_responsible(cipinfo->config, hash)) {
+ pr_debug("not responsible\n");
+ return NF_DROP;
+ }
+ pr_debug("responsible\n");
+
+ /* despite being received via linklayer multicast, this is
+ * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
+ skb->pkt_type = PACKET_HOST;
+
+ return XT_CONTINUE;
+}
+
+static bool clusterip_tg_check(const struct xt_tgchk_param *par)
+{
+ struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ struct clusterip_config *config;
+
+ if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
+ printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
+ cipinfo->hash_mode);
+ return false;
+
+ }
+ if (e->ip.dmsk.s_addr != htonl(0xffffffff)
+ || e->ip.dst.s_addr == 0) {
+ printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
+ return false;
+ }
+
+ /* FIXME: further sanity checks */
+
+ config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+ if (!config) {
+ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
+ printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
+ return false;
+ } else {
+ struct net_device *dev;
+
+ if (e->ip.iniface[0] == '\0') {
+ printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
+ return false;
+ }
+
+ dev = dev_get_by_name(&init_net, e->ip.iniface);
+ if (!dev) {
+ printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+ return false;
+ }
+
+ config = clusterip_config_init(cipinfo,
+ e->ip.dst.s_addr, dev);
+ if (!config) {
+ printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
+ dev_put(dev);
+ return false;
+ }
+ dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+ }
+ }
+ cipinfo->config = config;
+
+ if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%u\n", par->target->family);
+ return false;
+ }
+
+ return true;
+}
+
+/* drop reference count of cluster config when rule is deleted */
+static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+
+ /* if no more entries are referencing the config, remove it
+ * from the list and destroy the proc entry */
+ clusterip_config_entry_put(cipinfo->config);
+
+ clusterip_config_put(cipinfo->config);
+
+ nf_ct_l3proto_module_put(par->target->family);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_clusterip_tgt_info
+{
+ u_int32_t flags;
+ u_int8_t clustermac[6];
+ u_int16_t num_total_nodes;
+ u_int16_t num_local_nodes;
+ u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
+ u_int32_t hash_mode;
+ u_int32_t hash_initval;
+ compat_uptr_t config;
+};
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target clusterip_tg_reg __read_mostly = {
+ .name = "CLUSTERIP",
+ .family = NFPROTO_IPV4,
+ .target = clusterip_tg,
+ .checkentry = clusterip_tg_check,
+ .destroy = clusterip_tg_destroy,
+ .targetsize = sizeof(struct ipt_clusterip_tgt_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
+#endif /* CONFIG_COMPAT */
+ .me = THIS_MODULE
+};
+
+
+/***********************************************************************
+ * ARP MANGLING CODE
+ ***********************************************************************/
+
+/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
+struct arp_payload {
+ u_int8_t src_hw[ETH_ALEN];
+ __be32 src_ip;
+ u_int8_t dst_hw[ETH_ALEN];
+ __be32 dst_ip;
+} __attribute__ ((packed));
+
+#ifdef DEBUG
+static void arp_print(struct arp_payload *payload)
+{
+#define HBUFFERLEN 30
+ char hbuffer[HBUFFERLEN];
+ int j,k;
+
+ for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+ hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
+ hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
+ hbuffer[k++]=':';
+ }
+ hbuffer[--k]='\0';
+
+ printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n",
+ NIPQUAD(payload->src_ip), hbuffer,
+ NIPQUAD(payload->dst_ip));
+}
+#endif
+
+static unsigned int
+arp_mangle(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct arphdr *arp = arp_hdr(skb);
+ struct arp_payload *payload;
+ struct clusterip_config *c;
+
+ /* we don't care about non-ethernet and non-ipv4 ARP */
+ if (arp->ar_hrd != htons(ARPHRD_ETHER)
+ || arp->ar_pro != htons(ETH_P_IP)
+ || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+ return NF_ACCEPT;
+
+ /* we only want to mangle arp requests and replies */
+ if (arp->ar_op != htons(ARPOP_REPLY)
+ && arp->ar_op != htons(ARPOP_REQUEST))
+ return NF_ACCEPT;
+
+ payload = (void *)(arp+1);
+
+ /* if there is no clusterip configuration for the arp reply's
+ * source ip, we don't want to mangle it */
+ c = clusterip_config_find_get(payload->src_ip, 0);
+ if (!c)
+ return NF_ACCEPT;
+
+ /* normally the linux kernel always replies to arp queries of
+ * addresses on different interfacs. However, in the CLUSTERIP case
+ * this wouldn't work, since we didn't subscribe the mcast group on
+ * other interfaces */
+ if (c->dev != out) {
+ pr_debug("CLUSTERIP: not mangling arp reply on different "
+ "interface: cip'%s'-skb'%s'\n",
+ c->dev->name, out->name);
+ clusterip_config_put(c);
+ return NF_ACCEPT;
+ }
+
+ /* mangle reply hardware address */
+ memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
+
+#ifdef DEBUG
+ pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+ arp_print(payload);
+#endif
+
+ clusterip_config_put(c);
+
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops cip_arp_ops __read_mostly = {
+ .hook = arp_mangle,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = -1
+};
+
+/***********************************************************************
+ * PROC DIR HANDLING
+ ***********************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+struct clusterip_seq_position {
+ unsigned int pos; /* position */
+ unsigned int weight; /* number of bits set == size */
+ unsigned int bit; /* current bit */
+ unsigned long val; /* current value */
+};
+
+static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
+{
+ const struct proc_dir_entry *pde = s->private;
+ struct clusterip_config *c = pde->data;
+ unsigned int weight;
+ u_int32_t local_nodes;
+ struct clusterip_seq_position *idx;
+
+ /* FIXME: possible race */
+ local_nodes = c->local_nodes;
+ weight = hweight32(local_nodes);
+ if (*pos >= weight)
+ return NULL;
+
+ idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
+ if (!idx)
+ return ERR_PTR(-ENOMEM);
+
+ idx->pos = *pos;
+ idx->weight = weight;
+ idx->bit = ffs(local_nodes);
+ idx->val = local_nodes;
+ clear_bit(idx->bit - 1, &idx->val);
+
+ return idx;
+}
+
+static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct clusterip_seq_position *idx = v;
+
+ *pos = ++idx->pos;
+ if (*pos >= idx->weight) {
+ kfree(v);
+ return NULL;
+ }
+ idx->bit = ffs(idx->val);
+ clear_bit(idx->bit - 1, &idx->val);
+ return idx;
+}
+
+static void clusterip_seq_stop(struct seq_file *s, void *v)
+{
+ kfree(v);
+}
+
+static int clusterip_seq_show(struct seq_file *s, void *v)
+{
+ struct clusterip_seq_position *idx = v;
+
+ if (idx->pos != 0)
+ seq_putc(s, ',');
+
+ seq_printf(s, "%u", idx->bit);
+
+ if (idx->pos == idx->weight - 1)
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static const struct seq_operations clusterip_seq_ops = {
+ .start = clusterip_seq_start,
+ .next = clusterip_seq_next,
+ .stop = clusterip_seq_stop,
+ .show = clusterip_seq_show,
+};
+
+static int clusterip_proc_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &clusterip_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ struct proc_dir_entry *pde = PDE(inode);
+ struct clusterip_config *c = pde->data;
+
+ sf->private = pde;
+
+ clusterip_config_get(c);
+ }
+
+ return ret;
+}
+
+static int clusterip_proc_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *pde = PDE(inode);
+ struct clusterip_config *c = pde->data;
+ int ret;
+
+ ret = seq_release(inode, file);
+
+ if (!ret)
+ clusterip_config_put(c);
+
+ return ret;
+}
+
+static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
+ size_t size, loff_t *ofs)
+{
+#define PROC_WRITELEN 10
+ char buffer[PROC_WRITELEN+1];
+ const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ struct clusterip_config *c = pde->data;
+ unsigned long nodenum;
+
+ if (copy_from_user(buffer, input, PROC_WRITELEN))
+ return -EFAULT;
+
+ if (*buffer == '+') {
+ nodenum = simple_strtoul(buffer+1, NULL, 10);
+ if (clusterip_add_node(c, nodenum))
+ return -ENOMEM;
+ } else if (*buffer == '-') {
+ nodenum = simple_strtoul(buffer+1, NULL,10);
+ if (clusterip_del_node(c, nodenum))
+ return -ENOENT;
+ } else
+ return -EIO;
+
+ return size;
+}
+
+static const struct file_operations clusterip_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = clusterip_proc_open,
+ .read = seq_read,
+ .write = clusterip_proc_write,
+ .llseek = seq_lseek,
+ .release = clusterip_proc_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static int __init clusterip_tg_init(void)
+{
+ int ret;
+
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hook(&cip_arp_ops);
+ if (ret < 0)
+ goto cleanup_target;
+
+#ifdef CONFIG_PROC_FS
+ clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
+ if (!clusterip_procdir) {
+ printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+ ret = -ENOMEM;
+ goto cleanup_hook;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+ CLUSTERIP_VERSION);
+ return 0;
+
+#ifdef CONFIG_PROC_FS
+cleanup_hook:
+ nf_unregister_hook(&cip_arp_ops);
+#endif /* CONFIG_PROC_FS */
+cleanup_target:
+ xt_unregister_target(&clusterip_tg_reg);
+ return ret;
+}
+
+static void __exit clusterip_tg_exit(void)
+{
+ printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
+ CLUSTERIP_VERSION);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+#endif
+ nf_unregister_hook(&cip_arp_ops);
+ xt_unregister_target(&clusterip_tg_reg);
+}
+
+module_init(clusterip_tg_init);
+module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
new file mode 100644
index 0000000..f7e2fa0
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -0,0 +1,141 @@
+/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ECN.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
+
+/* set ECT codepoint from IP header.
+ * return false if there was an error. */
+static inline bool
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+ __u8 oldtos;
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return false;
+ iph = ip_hdr(skb);
+ oldtos = iph->tos;
+ iph->tos &= ~IPT_ECN_IP_MASK;
+ iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+ }
+ return true;
+}
+
+/* Return false if there was an error. */
+static inline bool
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
+{
+ struct tcphdr _tcph, *tcph;
+ __be16 oldval;
+
+ /* Not enought header? */
+ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+ if (!tcph)
+ return false;
+
+ if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
+ tcph->ece == einfo->proto.tcp.ece) &&
+ (!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+ tcph->cwr == einfo->proto.tcp.cwr))
+ return true;
+
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ return false;
+ tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
+
+ oldval = ((__be16 *)tcph)[6];
+ if (einfo->operation & IPT_ECN_OP_SET_ECE)
+ tcph->ece = einfo->proto.tcp.ece;
+ if (einfo->operation & IPT_ECN_OP_SET_CWR)
+ tcph->cwr = einfo->proto.tcp.cwr;
+
+ inet_proto_csum_replace2(&tcph->check, skb,
+ oldval, ((__be16 *)tcph)[6], 0);
+ return true;
+}
+
+static unsigned int
+ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct ipt_ECN_info *einfo = par->targinfo;
+
+ if (einfo->operation & IPT_ECN_OP_SET_IP)
+ if (!set_ect_ip(skb, einfo))
+ return NF_DROP;
+
+ if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
+ && ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(skb, einfo))
+ return NF_DROP;
+
+ return XT_CONTINUE;
+}
+
+static bool ecn_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_ECN_info *einfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (einfo->operation & IPT_ECN_OP_MASK) {
+ printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
+ einfo->operation);
+ return false;
+ }
+ if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
+ printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
+ einfo->ip_ect);
+ return false;
+ }
+ if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
+ && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
+ printk(KERN_WARNING "ECN: cannot use TCP operations on a "
+ "non-tcp rule\n");
+ return false;
+ }
+ return true;
+}
+
+static struct xt_target ecn_tg_reg __read_mostly = {
+ .name = "ECN",
+ .family = NFPROTO_IPV4,
+ .target = ecn_tg,
+ .targetsize = sizeof(struct ipt_ECN_info),
+ .table = "mangle",
+ .checkentry = ecn_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ecn_tg_init(void)
+{
+ return xt_register_target(&ecn_tg_reg);
+}
+
+static void __exit ecn_tg_exit(void)
+{
+ xt_unregister_target(&ecn_tg_reg);
+}
+
+module_init(ecn_tg_init);
+module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 0000000..fc6ce04
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,492 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
+
+/* Use lock to serialize, so printks don't overlap */
+static DEFINE_SPINLOCK(log_lock);
+
+/* One level of recursion won't kill us */
+static void dump_packet(const struct nf_loginfo *info,
+ const struct sk_buff *skb,
+ unsigned int iphoff)
+{
+ struct iphdr _iph;
+ const struct iphdr *ih;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+ printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(ih->frag_off) & IP_CE)
+ printk("CE ");
+ if (ntohs(ih->frag_off) & IP_DF)
+ printk("DF ");
+ if (ntohs(ih->frag_off) & IP_MF)
+ printk("MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+ if ((logflags & IPT_LOG_IPOPT)
+ && ih->ihl * 4 > sizeof(struct iphdr)) {
+ const unsigned char *op;
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+ unsigned int i, optsize;
+
+ optsize = ih->ihl * 4 - sizeof(struct iphdr);
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i = 0; i < optsize; i++)
+ printk("%02X", op[i]);
+ printk(") ");
+ }
+
+ switch (ih->protocol) {
+ case IPPROTO_TCP: {
+ struct tcphdr _tcph;
+ const struct tcphdr *th;
+
+ /* Max length: 10 "PROTO=TCP " */
+ printk("PROTO=TCP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u ",
+ ntohs(th->source), ntohs(th->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (logflags & IPT_LOG_TCPSEQ)
+ printk("SEQ=%u ACK=%u ",
+ ntohl(th->seq), ntohl(th->ack_seq));
+ /* Max length: 13 "WINDOW=65535 " */
+ printk("WINDOW=%u ", ntohs(th->window));
+ /* Max length: 9 "RES=0x3F " */
+ printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (th->cwr)
+ printk("CWR ");
+ if (th->ece)
+ printk("ECE ");
+ if (th->urg)
+ printk("URG ");
+ if (th->ack)
+ printk("ACK ");
+ if (th->psh)
+ printk("PSH ");
+ if (th->rst)
+ printk("RST ");
+ if (th->syn)
+ printk("SYN ");
+ if (th->fin)
+ printk("FIN ");
+ /* Max length: 11 "URGP=65535 " */
+ printk("URGP=%u ", ntohs(th->urg_ptr));
+
+ if ((logflags & IPT_LOG_TCPOPT)
+ && th->doff * 4 > sizeof(struct tcphdr)) {
+ unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
+ const unsigned char *op;
+ unsigned int i, optsize;
+
+ optsize = th->doff * 4 - sizeof(struct tcphdr);
+ op = skb_header_pointer(skb,
+ iphoff+ih->ihl*4+sizeof(_tcph),
+ optsize, _opt);
+ if (op == NULL) {
+ printk("TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ printk("OPT (");
+ for (i = 0; i < optsize; i++)
+ printk("%02X", op[i]);
+ printk(") ");
+ }
+ break;
+ }
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE: {
+ struct udphdr _udph;
+ const struct udphdr *uh;
+
+ if (ih->protocol == IPPROTO_UDP)
+ /* Max length: 10 "PROTO=UDP " */
+ printk("PROTO=UDP " );
+ else /* Max length: 14 "PROTO=UDPLITE " */
+ printk("PROTO=UDPLITE ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ printk("SPT=%u DPT=%u LEN=%u ",
+ ntohs(uh->source), ntohs(uh->dest),
+ ntohs(uh->len));
+ break;
+ }
+ case IPPROTO_ICMP: {
+ struct icmphdr _icmph;
+ const struct icmphdr *ich;
+ static const size_t required_len[NR_ICMP_TYPES+1]
+ = { [ICMP_ECHOREPLY] = 4,
+ [ICMP_DEST_UNREACH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_SOURCE_QUENCH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_REDIRECT]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_ECHO] = 4,
+ [ICMP_TIME_EXCEEDED]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_PARAMETERPROB]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_TIMESTAMP] = 20,
+ [ICMP_TIMESTAMPREPLY] = 20,
+ [ICMP_ADDRESS] = 12,
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+ printk("PROTO=ICMP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (ich->type <= NR_ICMP_TYPES
+ && required_len[ich->type]
+ && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ switch (ich->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ printk("ID=%u SEQ=%u ",
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+ printk("PARAMETER=%u ",
+ ntohl(ich->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+ printk("GATEWAY=%u.%u.%u.%u ",
+ NIPQUAD(ich->un.gateway));
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+ printk("[");
+ dump_packet(info, skb,
+ iphoff + ih->ihl*4+sizeof(_icmph));
+ printk("] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ich->type == ICMP_DEST_UNREACH
+ && ich->code == ICMP_FRAG_NEEDED)
+ printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+ }
+ break;
+ }
+ /* Max Length */
+ case IPPROTO_AH: {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+ printk("PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ printk("SPI=0x%x ", ntohl(ah->spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 10 "PROTO=ESP " */
+ printk("PROTO=ESP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
+ printk("INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ printk("SPI=0x%x ", ntohl(eh->spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+ printk("PROTO=%u ", ih->protocol);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+ printk("UID=%u GID=%u ",
+ skb->sk->sk_socket->file->f_uid,
+ skb->sk->sk_socket->file->f_gid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ }
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (!iphoff && skb->mark)
+ printk("MARK=0x%x ", skb->mark);
+
+ /* Proto Max log string length */
+ /* IP: 40+46+6+11+127 = 230 */
+ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
+ /* UDP: 10+max(25,20) = 35 */
+ /* UDPLITE: 14+max(25,20) = 39 */
+ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+ /* ESP: 10+max(25)+15 = 50 */
+ /* AH: 9+max(25)+15 = 49 */
+ /* unknown: 10 */
+
+ /* (ICMP allows recursion one level deep) */
+ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
+ /* maxlen = 230+ 91 + 230 + 252 = 803 */
+}
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 0,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static void
+ipt_log_packet(u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ spin_lock_bh(&log_lock);
+ printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+ prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge) {
+ const struct net_device *physindev;
+ const struct net_device *physoutdev;
+
+ physindev = skb->nf_bridge->physindev;
+ if (physindev && in != physindev)
+ printk("PHYSIN=%s ", physindev->name);
+ physoutdev = skb->nf_bridge->physoutdev;
+ if (physoutdev && out != physoutdev)
+ printk("PHYSOUT=%s ", physoutdev->name);
+ }
+#endif
+
+ if (in && !out) {
+ /* MAC logging for input chain only. */
+ printk("MAC=");
+ if (skb->dev && skb->dev->hard_header_len
+ && skb->mac_header != skb->network_header) {
+ int i;
+ const unsigned char *p = skb_mac_header(skb);
+ for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+ printk("%02x%c", *p,
+ i==skb->dev->hard_header_len - 1
+ ? ' ':':');
+ } else
+ printk(" ");
+ }
+
+ dump_packet(loginfo, skb, 0);
+ printk("\n");
+ spin_unlock_bh(&log_lock);
+}
+
+static unsigned int
+log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct ipt_log_info *loginfo = par->targinfo;
+ struct nf_loginfo li;
+
+ li.type = NF_LOG_TYPE_LOG;
+ li.u.log.level = loginfo->level;
+ li.u.log.logflags = loginfo->logflags;
+
+ ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
+ loginfo->prefix);
+ return XT_CONTINUE;
+}
+
+static bool log_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_log_info *loginfo = par->targinfo;
+
+ if (loginfo->level >= 8) {
+ pr_debug("LOG: level %u >= 8\n", loginfo->level);
+ return false;
+ }
+ if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+ pr_debug("LOG: prefix term %i\n",
+ loginfo->prefix[sizeof(loginfo->prefix)-1]);
+ return false;
+ }
+ return true;
+}
+
+static struct xt_target log_tg_reg __read_mostly = {
+ .name = "LOG",
+ .family = NFPROTO_IPV4,
+ .target = log_tg,
+ .targetsize = sizeof(struct ipt_log_info),
+ .checkentry = log_tg_check,
+ .me = THIS_MODULE,
+};
+
+static const struct nf_logger ipt_log_logger ={
+ .name = "ipt_LOG",
+ .logfn = &ipt_log_packet,
+ .me = THIS_MODULE,
+};
+
+static int __init log_tg_init(void)
+{
+ int ret;
+
+ ret = xt_register_target(&log_tg_reg);
+ if (ret < 0)
+ return ret;
+ nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
+ return 0;
+}
+
+static void __exit log_tg_exit(void)
+{
+ nf_log_unregister(&ipt_log_logger);
+ xt_unregister_target(&log_tg_reg);
+}
+
+module_init(log_tg_init);
+module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
new file mode 100644
index 0000000..f389f60
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -0,0 +1,183 @@
+/* Masquerade. Simple mapping which alters range to a local IP address
+ (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
+
+/* Lock protects masq region inside conntrack */
+static DEFINE_RWLOCK(masq_lock);
+
+/* FIXME: Multiple targets. --RR */
+static bool masquerade_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ pr_debug("masquerade_check: bad MAP_IPS.\n");
+ return false;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct nf_conn *ct;
+ struct nf_conn_nat *nat;
+ enum ip_conntrack_info ctinfo;
+ struct nf_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr;
+ const struct rtable *rt;
+ __be32 newsrc;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+
+ ct = nf_ct_get(skb, &ctinfo);
+ nat = nfct_nat(ct);
+
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+ || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+
+ /* Source address is 0.0.0.0 - locally generated packet that is
+ * probably not supposed to be masqueraded.
+ */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+ return NF_ACCEPT;
+
+ mr = par->targinfo;
+ rt = skb->rtable;
+ newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ if (!newsrc) {
+ printk("MASQUERADE: %s ate my IP address\n", par->out->name);
+ return NF_DROP;
+ }
+
+ write_lock_bh(&masq_lock);
+ nat->masq_index = par->out->ifindex;
+ write_unlock_bh(&masq_lock);
+
+ /* Transfer from original range. */
+ newrange = ((struct nf_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newsrc, newsrc,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+}
+
+static int
+device_cmp(struct nf_conn *i, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(i);
+ int ret;
+
+ if (!nat)
+ return 0;
+
+ read_lock_bh(&masq_lock);
+ ret = (nat->masq_index == (int)(long)ifindex);
+ read_unlock_bh(&masq_lock);
+
+ return ret;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ const struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ conntracks which were associated with that device,
+ and forget them. */
+ NF_CT_ASSERT(dev->ifindex != 0);
+
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ return masq_device_event(this, event, dev);
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static struct xt_target masquerade_tg_reg __read_mostly = {
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV4,
+ .target = masquerade_tg,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init masquerade_tg_init(void)
+{
+ int ret;
+
+ ret = xt_register_target(&masquerade_tg_reg);
+
+ if (ret == 0) {
+ /* Register for device down reports */
+ register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
+ }
+
+ return ret;
+}
+
+static void __exit masquerade_tg_exit(void)
+{
+ xt_unregister_target(&masquerade_tg_reg);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+}
+
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
new file mode 100644
index 0000000..7c29582
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -0,0 +1,96 @@
+/* NETMAP - static NAT mapping of IP network addresses (1:1).
+ * The mapping can be applied to source (POSTROUTING),
+ * destination (PREROUTING), or both (with separate rules).
+ */
+
+/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_rule.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
+MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
+
+static bool netmap_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+ pr_debug("NETMAP:check: bad MAP_IPS.\n");
+ return false;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ __be32 new_ip, netmask;
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range newrange;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_POST_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT);
+ ct = nf_ct_get(skb, &ctinfo);
+
+ netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+
+ if (par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT)
+ new_ip = ip_hdr(skb)->daddr & ~netmask;
+ else
+ new_ip = ip_hdr(skb)->saddr & ~netmask;
+ new_ip |= mr->range[0].min_ip & netmask;
+
+ newrange = ((struct nf_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ new_ip, new_ip,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+}
+
+static struct xt_target netmap_tg_reg __read_mostly = {
+ .name = "NETMAP",
+ .family = NFPROTO_IPV4,
+ .target = netmap_tg,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .checkentry = netmap_tg_check,
+ .me = THIS_MODULE
+};
+
+static int __init netmap_tg_init(void)
+{
+ return xt_register_target(&netmap_tg_reg);
+}
+
+static void __exit netmap_tg_exit(void)
+{
+ xt_unregister_target(&netmap_tg_reg);
+}
+
+module_init(netmap_tg_init);
+module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
new file mode 100644
index 0000000..698e5e7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -0,0 +1,110 @@
+/* Redirect. Simple mapping which alters dst to a local IP address. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_rule.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
+
+/* FIXME: Take multiple ranges --RR */
+static bool redirect_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ pr_debug("redirect_check: bad MAP_IPS.\n");
+ return false;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ __be32 newdst;
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range newrange;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT);
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ /* Local packets: make them go to loopback */
+ if (par->hooknum == NF_INET_LOCAL_OUT)
+ newdst = htonl(0x7F000001);
+ else {
+ struct in_device *indev;
+ struct in_ifaddr *ifa;
+
+ newdst = 0;
+
+ rcu_read_lock();
+ indev = __in_dev_get_rcu(skb->dev);
+ if (indev && (ifa = indev->ifa_list))
+ newdst = ifa->ifa_local;
+ rcu_read_unlock();
+
+ if (!newdst)
+ return NF_DROP;
+ }
+
+ /* Transfer from original range. */
+ newrange = ((struct nf_nat_range)
+ { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newdst, newdst,
+ mr->range[0].min, mr->range[0].max });
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+}
+
+static struct xt_target redirect_tg_reg __read_mostly = {
+ .name = "REDIRECT",
+ .family = NFPROTO_IPV4,
+ .target = redirect_tg,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
+ .checkentry = redirect_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init redirect_tg_init(void)
+{
+ return xt_register_target(&redirect_tg_reg);
+}
+
+static void __exit redirect_tg_exit(void)
+{
+ xt_unregister_target(&redirect_tg_reg);
+}
+
+module_init(redirect_tg_init);
+module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
new file mode 100644
index 0000000..0b4b6e0
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -0,0 +1,220 @@
+/*
+ * This is a module which is used for rejecting packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_REJECT.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include <linux/netfilter_bridge.h>
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
+
+/* Send RST reply */
+static void send_reset(struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ const struct iphdr *oiph;
+ struct iphdr *niph;
+ const struct tcphdr *oth;
+ struct tcphdr _otcph, *tcph;
+ unsigned int addr_type;
+
+ /* IP header checks: fragment. */
+ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
+ return;
+
+ oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
+ sizeof(_otcph), &_otcph);
+ if (oth == NULL)
+ return;
+
+ /* No RST for RST. */
+ if (oth->rst)
+ return;
+
+ /* Check checksum */
+ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
+ return;
+ oiph = ip_hdr(oldskb);
+
+ nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ LL_MAX_HEADER, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+
+ skb_reset_network_header(nskb);
+ niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+ niph->version = 4;
+ niph->ihl = sizeof(struct iphdr) / 4;
+ niph->tos = 0;
+ niph->id = 0;
+ niph->frag_off = htons(IP_DF);
+ niph->protocol = IPPROTO_TCP;
+ niph->check = 0;
+ niph->saddr = oiph->daddr;
+ niph->daddr = oiph->saddr;
+
+ tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+ memset(tcph, 0, sizeof(*tcph));
+ tcph->source = oth->dest;
+ tcph->dest = oth->source;
+ tcph->doff = sizeof(struct tcphdr) / 4;
+
+ if (oth->ack)
+ tcph->seq = oth->ack_seq;
+ else {
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+ oldskb->len - ip_hdrlen(oldskb) -
+ (oth->doff << 2));
+ tcph->ack = 1;
+ }
+
+ tcph->rst = 1;
+ tcph->check = tcp_v4_check(sizeof(struct tcphdr),
+ niph->saddr, niph->daddr,
+ csum_partial(tcph,
+ sizeof(struct tcphdr), 0));
+
+ addr_type = RTN_UNSPEC;
+ if (hook != NF_INET_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+ || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+ )
+ addr_type = RTN_LOCAL;
+
+ /* ip_route_me_harder expects skb->dst to be set */
+ dst_hold(oldskb->dst);
+ nskb->dst = oldskb->dst;
+
+ if (ip_route_me_harder(nskb, addr_type))
+ goto free_nskb;
+
+ niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+ nskb->ip_summed = CHECKSUM_NONE;
+
+ /* "Never happens" */
+ if (nskb->len > dst_mtu(nskb->dst))
+ goto free_nskb;
+
+ nf_ct_attach(nskb, oldskb);
+
+ ip_local_out(nskb);
+ return;
+
+ free_nskb:
+ kfree_skb(nskb);
+}
+
+static inline void send_unreach(struct sk_buff *skb_in, int code)
+{
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+
+static unsigned int
+reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct ipt_reject_info *reject = par->targinfo;
+
+ /* WARNING: This code causes reentry within iptables.
+ This means that the iptables jump stack is now crap. We
+ must return an absolute verdict. --RR */
+ switch (reject->with) {
+ case IPT_ICMP_NET_UNREACHABLE:
+ send_unreach(skb, ICMP_NET_UNREACH);
+ break;
+ case IPT_ICMP_HOST_UNREACHABLE:
+ send_unreach(skb, ICMP_HOST_UNREACH);
+ break;
+ case IPT_ICMP_PROT_UNREACHABLE:
+ send_unreach(skb, ICMP_PROT_UNREACH);
+ break;
+ case IPT_ICMP_PORT_UNREACHABLE:
+ send_unreach(skb, ICMP_PORT_UNREACH);
+ break;
+ case IPT_ICMP_NET_PROHIBITED:
+ send_unreach(skb, ICMP_NET_ANO);
+ break;
+ case IPT_ICMP_HOST_PROHIBITED:
+ send_unreach(skb, ICMP_HOST_ANO);
+ break;
+ case IPT_ICMP_ADMIN_PROHIBITED:
+ send_unreach(skb, ICMP_PKT_FILTERED);
+ break;
+ case IPT_TCP_RESET:
+ send_reset(skb, par->hooknum);
+ case IPT_ICMP_ECHOREPLY:
+ /* Doesn't happen. */
+ break;
+ }
+
+ return NF_DROP;
+}
+
+static bool reject_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_reject_info *rejinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+ printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
+ return false;
+ } else if (rejinfo->with == IPT_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+ if (e->ip.proto != IPPROTO_TCP
+ || (e->ip.invflags & XT_INV_PROTO)) {
+ printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static struct xt_target reject_tg_reg __read_mostly = {
+ .name = "REJECT",
+ .family = NFPROTO_IPV4,
+ .target = reject_tg,
+ .targetsize = sizeof(struct ipt_reject_info),
+ .table = "filter",
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT),
+ .checkentry = reject_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init reject_tg_init(void)
+{
+ return xt_register_target(&reject_tg_reg);
+}
+
+static void __exit reject_tg_exit(void)
+{
+ xt_unregister_target(&reject_tg_reg);
+}
+
+module_init(reject_tg_init);
+module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
new file mode 100644
index 0000000..6d76aae
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -0,0 +1,97 @@
+/* TTL modification target for IP tables
+ * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_TTL.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct iphdr *iph;
+ const struct ipt_TTL_info *info = par->targinfo;
+ int new_ttl;
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ iph = ip_hdr(skb);
+
+ switch (info->mode) {
+ case IPT_TTL_SET:
+ new_ttl = info->ttl;
+ break;
+ case IPT_TTL_INC:
+ new_ttl = iph->ttl + info->ttl;
+ if (new_ttl > 255)
+ new_ttl = 255;
+ break;
+ case IPT_TTL_DEC:
+ new_ttl = iph->ttl - info->ttl;
+ if (new_ttl < 0)
+ new_ttl = 0;
+ break;
+ default:
+ new_ttl = iph->ttl;
+ break;
+ }
+
+ if (new_ttl != iph->ttl) {
+ csum_replace2(&iph->check, htons(iph->ttl << 8),
+ htons(new_ttl << 8));
+ iph->ttl = new_ttl;
+ }
+
+ return XT_CONTINUE;
+}
+
+static bool ttl_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_TTL_info *info = par->targinfo;
+
+ if (info->mode > IPT_TTL_MAXMODE) {
+ printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
+ info->mode);
+ return false;
+ }
+ if (info->mode != IPT_TTL_SET && info->ttl == 0)
+ return false;
+ return true;
+}
+
+static struct xt_target ttl_tg_reg __read_mostly = {
+ .name = "TTL",
+ .family = NFPROTO_IPV4,
+ .target = ttl_tg,
+ .targetsize = sizeof(struct ipt_TTL_info),
+ .table = "mangle",
+ .checkentry = ttl_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ttl_tg_init(void)
+{
+ return xt_register_target(&ttl_tg_reg);
+}
+
+static void __exit ttl_tg_exit(void)
+{
+ xt_unregister_target(&ttl_tg_reg);
+}
+
+module_init(ttl_tg_init);
+module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
new file mode 100644
index 0000000..18a2826
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -0,0 +1,448 @@
+/*
+ * netfilter module for userspace packet logging daemons
+ *
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This module accepts two parameters:
+ *
+ * nlbufsiz:
+ * The parameter specifies how big the buffer for each netlink multicast
+ * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
+ * get accumulated in the kernel until they are sent to userspace. It is
+ * NOT possible to allocate more than 128kB, and it is strongly discouraged,
+ * because atomically allocating 128kB inside the network rx softirq is not
+ * reliable. Please also keep in mind that this buffer size is allocated for
+ * each nlgroup you are using, so the total kernel memory usage increases
+ * by that factor.
+ *
+ * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since
+ * nlbufsiz is used with alloc_skb, which adds another
+ * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead.
+ *
+ * flushtimeout:
+ * Specify, after how many hundredths of a second the queue should be
+ * flushed even if it is not full yet.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#include <net/netfilter/nf_log.h>
+#include <net/sock.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
+
+#define ULOG_NL_EVENT 111 /* Harald's favorite number */
+#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
+
+#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
+
+static unsigned int nlbufsiz = NLMSG_GOODSIZE;
+module_param(nlbufsiz, uint, 0400);
+MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
+
+static unsigned int flushtimeout = 10;
+module_param(flushtimeout, uint, 0600);
+MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
+
+static int nflog = 1;
+module_param(nflog, bool, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+
+/* global data structures */
+
+typedef struct {
+ unsigned int qlen; /* number of nlmsgs' in the skb */
+ struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */
+ struct sk_buff *skb; /* the pre-allocated skb */
+ struct timer_list timer; /* the timer function */
+} ulog_buff_t;
+
+static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+
+static struct sock *nflognl; /* our socket */
+static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+
+/* send one ulog_buff_t to userspace */
+static void ulog_send(unsigned int nlgroupnum)
+{
+ ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+
+ if (timer_pending(&ub->timer)) {
+ pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+ del_timer(&ub->timer);
+ }
+
+ if (!ub->skb) {
+ pr_debug("ipt_ULOG: ulog_send: nothing to send\n");
+ return;
+ }
+
+ /* last nlmsg needs NLMSG_DONE */
+ if (ub->qlen > 1)
+ ub->lastnlh->nlmsg_type = NLMSG_DONE;
+
+ NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
+ pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n",
+ ub->qlen, nlgroupnum + 1);
+ netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+
+ ub->qlen = 0;
+ ub->skb = NULL;
+ ub->lastnlh = NULL;
+}
+
+
+/* timer function to flush queue in flushtimeout time */
+static void ulog_timer(unsigned long data)
+{
+ pr_debug("ipt_ULOG: timer function called, calling ulog_send\n");
+
+ /* lock to protect against somebody modifying our structure
+ * from ipt_ulog_target at the same time */
+ spin_lock_bh(&ulog_lock);
+ ulog_send(data);
+ spin_unlock_bh(&ulog_lock);
+}
+
+static struct sk_buff *ulog_alloc_skb(unsigned int size)
+{
+ struct sk_buff *skb;
+ unsigned int n;
+
+ /* alloc skb which should be big enough for a whole
+ * multipart message. WARNING: has to be <= 131000
+ * due to slab allocator restrictions */
+
+ n = max(size, nlbufsiz);
+ skb = alloc_skb(n, GFP_ATOMIC);
+ if (!skb) {
+ PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n);
+
+ if (n > size) {
+ /* try to allocate only as much as we need for
+ * current packet */
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ PRINTR("ipt_ULOG: can't even allocate %ub\n",
+ size);
+ }
+ }
+
+ return skb;
+}
+
+static void ipt_ulog_packet(unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct ipt_ulog_info *loginfo,
+ const char *prefix)
+{
+ ulog_buff_t *ub;
+ ulog_packet_msg_t *pm;
+ size_t size, copy_len;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+
+ /* ffs == find first bit set, necessary because userspace
+ * is already shifting groupnumber, but we need unshifted.
+ * ffs() returns [1..32], we need [0..31] */
+ unsigned int groupnum = ffs(loginfo->nl_group) - 1;
+
+ /* calculate the size of the skb needed */
+ if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
+ copy_len = skb->len;
+ else
+ copy_len = loginfo->copy_range;
+
+ size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+
+ ub = &ulog_buffers[groupnum];
+
+ spin_lock_bh(&ulog_lock);
+
+ if (!ub->skb) {
+ if (!(ub->skb = ulog_alloc_skb(size)))
+ goto alloc_failure;
+ } else if (ub->qlen >= loginfo->qthreshold ||
+ size > skb_tailroom(ub->skb)) {
+ /* either the queue len is too high or we don't have
+ * enough room in nlskb left. send it to userspace. */
+
+ ulog_send(groupnum);
+
+ if (!(ub->skb = ulog_alloc_skb(size)))
+ goto alloc_failure;
+ }
+
+ pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen,
+ loginfo->qthreshold);
+
+ /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
+ nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
+ sizeof(*pm)+copy_len);
+ ub->qlen++;
+
+ pm = NLMSG_DATA(nlh);
+
+ /* We might not have a timestamp, get one */
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp((struct sk_buff *)skb);
+
+ /* copy hook, prefix, timestamp, payload, etc. */
+ pm->data_len = copy_len;
+ tv = ktime_to_timeval(skb->tstamp);
+ put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+ put_unaligned(tv.tv_usec, &pm->timestamp_usec);
+ put_unaligned(skb->mark, &pm->mark);
+ pm->hook = hooknum;
+ if (prefix != NULL)
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ else if (loginfo->prefix[0] != '\0')
+ strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
+ else
+ *(pm->prefix) = '\0';
+
+ if (in && in->hard_header_len > 0
+ && skb->mac_header != skb->network_header
+ && in->hard_header_len <= ULOG_MAC_LEN) {
+ memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
+ pm->mac_len = in->hard_header_len;
+ } else
+ pm->mac_len = 0;
+
+ if (in)
+ strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+ else
+ pm->indev_name[0] = '\0';
+
+ if (out)
+ strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+ else
+ pm->outdev_name[0] = '\0';
+
+ /* copy_len <= skb->len, so can't fail. */
+ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
+ BUG();
+
+ /* check if we are building multi-part messages */
+ if (ub->qlen > 1)
+ ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
+
+ ub->lastnlh = nlh;
+
+ /* if timer isn't already running, start it */
+ if (!timer_pending(&ub->timer)) {
+ ub->timer.expires = jiffies + flushtimeout * HZ / 100;
+ add_timer(&ub->timer);
+ }
+
+ /* if threshold is reached, send message to userspace */
+ if (ub->qlen >= loginfo->qthreshold) {
+ if (loginfo->qthreshold > 1)
+ nlh->nlmsg_type = NLMSG_DONE;
+ ulog_send(groupnum);
+ }
+
+ spin_unlock_bh(&ulog_lock);
+
+ return;
+
+nlmsg_failure:
+ PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
+
+alloc_failure:
+ PRINTR("ipt_ULOG: Error building netlink message\n");
+
+ spin_unlock_bh(&ulog_lock);
+}
+
+static unsigned int
+ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ par->targinfo, NULL);
+ return XT_CONTINUE;
+}
+
+static void ipt_logfn(u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *li,
+ const char *prefix)
+{
+ struct ipt_ulog_info loginfo;
+
+ if (!li || li->type != NF_LOG_TYPE_ULOG) {
+ loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
+ loginfo.copy_range = 0;
+ loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
+ loginfo.prefix[0] = '\0';
+ } else {
+ loginfo.nl_group = li->u.ulog.group;
+ loginfo.copy_range = li->u.ulog.copy_len;
+ loginfo.qthreshold = li->u.ulog.qthreshold;
+ strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
+ }
+
+ ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+}
+
+static bool ulog_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_ulog_info *loginfo = par->targinfo;
+
+ if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
+ pr_debug("ipt_ULOG: prefix term %i\n",
+ loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+ return false;
+ }
+ if (loginfo->qthreshold > ULOG_MAX_QLEN) {
+ pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n",
+ loginfo->qthreshold);
+ return false;
+ }
+ return true;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+ compat_uint_t nl_group;
+ compat_size_t copy_range;
+ compat_size_t qthreshold;
+ char prefix[ULOG_PREFIX_LEN];
+};
+
+static void ulog_tg_compat_from_user(void *dst, void *src)
+{
+ const struct compat_ipt_ulog_info *cl = src;
+ struct ipt_ulog_info l = {
+ .nl_group = cl->nl_group,
+ .copy_range = cl->copy_range,
+ .qthreshold = cl->qthreshold,
+ };
+
+ memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+ memcpy(dst, &l, sizeof(l));
+}
+
+static int ulog_tg_compat_to_user(void __user *dst, void *src)
+{
+ const struct ipt_ulog_info *l = src;
+ struct compat_ipt_ulog_info cl = {
+ .nl_group = l->nl_group,
+ .copy_range = l->copy_range,
+ .qthreshold = l->qthreshold,
+ };
+
+ memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+ return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target ulog_tg_reg __read_mostly = {
+ .name = "ULOG",
+ .family = NFPROTO_IPV4,
+ .target = ulog_tg,
+ .targetsize = sizeof(struct ipt_ulog_info),
+ .checkentry = ulog_tg_check,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_ulog_info),
+ .compat_from_user = ulog_tg_compat_from_user,
+ .compat_to_user = ulog_tg_compat_to_user,
+#endif
+ .me = THIS_MODULE,
+};
+
+static struct nf_logger ipt_ulog_logger = {
+ .name = "ipt_ULOG",
+ .logfn = ipt_logfn,
+ .me = THIS_MODULE,
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret, i;
+
+ pr_debug("ipt_ULOG: init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ printk("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ /* initialize ulog_buffers */
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+
+ nflognl = netlink_kernel_create(&init_net,
+ NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
+ NULL, THIS_MODULE);
+ if (!nflognl)
+ return -ENOMEM;
+
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0) {
+ netlink_kernel_release(nflognl);
+ return ret;
+ }
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ ulog_buff_t *ub;
+ int i;
+
+ pr_debug("ipt_ULOG: cleanup_module\n");
+
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ netlink_kernel_release(nflognl);
+
+ /* remove pending timers and free allocated skb's */
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ ub = &ulog_buffers[i];
+ if (timer_pending(&ub->timer)) {
+ pr_debug("timer was pending, deleting\n");
+ del_timer(&ub->timer);
+ }
+
+ if (ub->skb) {
+ kfree_skb(ub->skb);
+ ub->skb = NULL;
+ }
+ }
+}
+
+module_init(ulog_tg_init);
+module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
new file mode 100644
index 0000000..88762f0
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -0,0 +1,132 @@
+/*
+ * iptables module to match inet_addr_type() of an ip.
+ *
+ * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4/ipt_addrtype.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: address type match for IPv4");
+
+static inline bool match_type(const struct net_device *dev, __be32 addr,
+ u_int16_t mask)
+{
+ return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr)));
+}
+
+static bool
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_addrtype_info *info = par->matchinfo;
+ const struct iphdr *iph = ip_hdr(skb);
+ bool ret = true;
+
+ if (info->source)
+ ret &= match_type(NULL, iph->saddr, info->source) ^
+ info->invert_source;
+ if (info->dest)
+ ret &= match_type(NULL, iph->daddr, info->dest) ^
+ info->invert_dest;
+
+ return ret;
+}
+
+static bool
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_addrtype_info_v1 *info = par->matchinfo;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct net_device *dev = NULL;
+ bool ret = true;
+
+ if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
+ dev = par->in;
+ else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
+ dev = par->out;
+
+ if (info->source)
+ ret &= match_type(dev, iph->saddr, info->source) ^
+ (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
+ if (ret && info->dest)
+ ret &= match_type(dev, iph->daddr, info->dest) ^
+ !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
+ return ret;
+}
+
+static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+{
+ struct ipt_addrtype_info_v1 *info = par->matchinfo;
+
+ if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
+ info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
+ printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
+ "interface limitation cannot be selected\n");
+ return false;
+ }
+
+ if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN)) &&
+ info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
+ printk(KERN_ERR "ipt_addrtype: output interface limitation "
+ "not valid in PRE_ROUTING and INPUT\n");
+ return false;
+ }
+
+ if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT)) &&
+ info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
+ printk(KERN_ERR "ipt_addrtype: input interface limitation "
+ "not valid in POST_ROUTING and OUTPUT\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct xt_match addrtype_mt_reg[] __read_mostly = {
+ {
+ .name = "addrtype",
+ .family = NFPROTO_IPV4,
+ .match = addrtype_mt_v0,
+ .matchsize = sizeof(struct ipt_addrtype_info),
+ .me = THIS_MODULE
+ },
+ {
+ .name = "addrtype",
+ .family = NFPROTO_IPV4,
+ .revision = 1,
+ .match = addrtype_mt_v1,
+ .checkentry = addrtype_mt_checkentry_v1,
+ .matchsize = sizeof(struct ipt_addrtype_info_v1),
+ .me = THIS_MODULE
+ }
+};
+
+static int __init addrtype_mt_init(void)
+{
+ return xt_register_matches(addrtype_mt_reg,
+ ARRAY_SIZE(addrtype_mt_reg));
+}
+
+static void __exit addrtype_mt_exit(void)
+{
+ xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
+}
+
+module_init(addrtype_mt_init);
+module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
new file mode 100644
index 0000000..0104c0b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -0,0 +1,97 @@
+/* Kernel module to match AH parameters. */
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_ah.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
+
+#ifdef DEBUG_CONNTRACK
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+ bool r;
+ duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+ min,spi,max);
+ r=(spi >= min && spi <= max) ^ invert;
+ duprintf(" result %s\n",r? "PASS" : "FAILED");
+ return r;
+}
+
+static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+ const struct ipt_ah *ahinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil AH tinygram.\n");
+ *par->hotdrop = true;
+ return 0;
+ }
+
+ return spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ ntohl(ah->spi),
+ !!(ahinfo->invflags & IPT_AH_INV_SPI));
+}
+
+static bool ah_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct ipt_ah *ahinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
+ duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags);
+ return false;
+ }
+ return true;
+}
+
+static struct xt_match ah_mt_reg __read_mostly = {
+ .name = "ah",
+ .family = NFPROTO_IPV4,
+ .match = ah_mt,
+ .matchsize = sizeof(struct ipt_ah),
+ .proto = IPPROTO_AH,
+ .checkentry = ah_mt_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ah_mt_init(void)
+{
+ return xt_register_match(&ah_mt_reg);
+}
+
+static void __exit ah_mt_exit(void)
+{
+ xt_unregister_match(&ah_mt_reg);
+}
+
+module_init(ah_mt_init);
+module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
new file mode 100644
index 0000000..6289b64
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -0,0 +1,129 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
+MODULE_LICENSE("GPL");
+
+static inline bool match_ip(const struct sk_buff *skb,
+ const struct ipt_ecn_info *einfo)
+{
+ return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
+}
+
+static inline bool match_tcp(const struct sk_buff *skb,
+ const struct ipt_ecn_info *einfo,
+ bool *hotdrop)
+{
+ struct tcphdr _tcph;
+ const struct tcphdr *th;
+
+ /* In practice, TCP match does this, so can't fail. But let's
+ * be good citizens.
+ */
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ *hotdrop = false;
+ return false;
+ }
+
+ if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+ if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+ if (th->ece == 1)
+ return false;
+ } else {
+ if (th->ece == 0)
+ return false;
+ }
+ }
+
+ if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+ if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+ if (th->cwr == 1)
+ return false;
+ } else {
+ if (th->cwr == 0)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_ecn_info *info = par->matchinfo;
+
+ if (info->operation & IPT_ECN_OP_MATCH_IP)
+ if (!match_ip(skb, info))
+ return false;
+
+ if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+ if (ip_hdr(skb)->protocol != IPPROTO_TCP)
+ return false;
+ if (!match_tcp(skb, info, par->hotdrop))
+ return false;
+ }
+
+ return true;
+}
+
+static bool ecn_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct ipt_ecn_info *info = par->matchinfo;
+ const struct ipt_ip *ip = par->entryinfo;
+
+ if (info->operation & IPT_ECN_OP_MATCH_MASK)
+ return false;
+
+ if (info->invert & IPT_ECN_OP_MATCH_MASK)
+ return false;
+
+ if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
+ && ip->proto != IPPROTO_TCP) {
+ printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
+ " non-tcp packets\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct xt_match ecn_mt_reg __read_mostly = {
+ .name = "ecn",
+ .family = NFPROTO_IPV4,
+ .match = ecn_mt,
+ .matchsize = sizeof(struct ipt_ecn_info),
+ .checkentry = ecn_mt_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ecn_mt_init(void)
+{
+ return xt_register_match(&ecn_mt_reg);
+}
+
+static void __exit ecn_mt_exit(void)
+{
+ xt_unregister_match(&ecn_mt_reg);
+}
+
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
new file mode 100644
index 0000000..297f1cb
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -0,0 +1,63 @@
+/* IP tables module for matching the value of the TTL
+ *
+ * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_ttl.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
+MODULE_LICENSE("GPL");
+
+static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct ipt_ttl_info *info = par->matchinfo;
+ const u8 ttl = ip_hdr(skb)->ttl;
+
+ switch (info->mode) {
+ case IPT_TTL_EQ:
+ return ttl == info->ttl;
+ case IPT_TTL_NE:
+ return ttl != info->ttl;
+ case IPT_TTL_LT:
+ return ttl < info->ttl;
+ case IPT_TTL_GT:
+ return ttl > info->ttl;
+ default:
+ printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
+ info->mode);
+ return false;
+ }
+
+ return false;
+}
+
+static struct xt_match ttl_mt_reg __read_mostly = {
+ .name = "ttl",
+ .family = NFPROTO_IPV4,
+ .match = ttl_mt,
+ .matchsize = sizeof(struct ipt_ttl_info),
+ .me = THIS_MODULE,
+};
+
+static int __init ttl_mt_init(void)
+{
+ return xt_register_match(&ttl_mt_reg);
+}
+
+static void __exit ttl_mt_exit(void)
+{
+ xt_unregister_match(&ttl_mt_reg);
+}
+
+module_init(ttl_mt_init);
+module_exit(ttl_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
new file mode 100644
index 0000000..c922431
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -0,0 +1,190 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT))
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_filter);
+}
+
+static unsigned int
+ipt_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_filter);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("iptable_filter: ignoring short SOCK_RAW "
+ "packet.\n");
+ return NF_ACCEPT;
+ }
+
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.iptable_filter);
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_FILTER,
+ },
+ {
+ .hook = ipt_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_FILTER,
+ },
+ {
+ .hook = ipt_local_out_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_FILTER,
+ },
+};
+
+/* Default to forward because I got too much mail already. */
+static int forward = NF_ACCEPT;
+module_param(forward, bool, 0000);
+
+static int __net_init iptable_filter_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_filter =
+ ipt_register_table(net, &packet_filter, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_filter))
+ return PTR_ERR(net->ipv4.iptable_filter);
+ return 0;
+}
+
+static void __net_exit iptable_filter_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_filter);
+}
+
+static struct pernet_operations iptable_filter_net_ops = {
+ .init = iptable_filter_net_init,
+ .exit = iptable_filter_net_exit,
+};
+
+static int __init iptable_filter_init(void)
+{
+ int ret;
+
+ if (forward < 0 || forward > NF_MAX_VERDICT) {
+ printk("iptables forward must be 0 or 1\n");
+ return -EINVAL;
+ }
+
+ /* Entry 1 is the FORWARD hook */
+ initial_table.entries[1].target.verdict = -forward - 1;
+
+ ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ return ret;
+}
+
+static void __exit iptable_filter_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+}
+
+module_init(iptable_filter_init);
+module_exit(iptable_filter_fini);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
new file mode 100644
index 0000000..69f2c42
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -0,0 +1,251 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables mangle table");
+
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+ (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT) | \
+ (1 << NF_INET_POST_ROUTING))
+
+/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[5];
+ struct ipt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "mangle",
+ .valid_hooks = MANGLE_VALID_HOOKS,
+ .num_entries = 6,
+ .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
+ [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
+ },
+ .underflow = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
+ [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table packet_mangler = {
+ .name = "mangle",
+ .valid_hooks = MANGLE_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_pre_routing_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_post_routing_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_local_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ const struct iphdr *iph;
+ u_int8_t tos;
+ __be32 saddr, daddr;
+ u_int32_t mark;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("iptable_mangle: ignoring short SOCK_RAW "
+ "packet.\n");
+ return NF_ACCEPT;
+ }
+
+ /* Save things which could affect route */
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = ipt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.iptable_mangle);
+ /* Reroute for ANY change. */
+ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+
+ return ret;
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_pre_routing_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_MANGLE,
+ },
+ {
+ .hook = ipt_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_MANGLE,
+ },
+ {
+ .hook = ipt_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_MANGLE,
+ },
+ {
+ .hook = ipt_local_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_MANGLE,
+ },
+ {
+ .hook = ipt_post_routing_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_MANGLE,
+ },
+};
+
+static int __net_init iptable_mangle_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_mangle =
+ ipt_register_table(net, &packet_mangler, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_mangle))
+ return PTR_ERR(net->ipv4.iptable_mangle);
+ return 0;
+}
+
+static void __net_exit iptable_mangle_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_mangle);
+}
+
+static struct pernet_operations iptable_mangle_net_ops = {
+ .init = iptable_mangle_net_init,
+ .exit = iptable_mangle_net_exit,
+};
+
+static int __init iptable_mangle_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_mangle_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ return ret;
+}
+
+static void __exit iptable_mangle_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+}
+
+module_init(iptable_mangle_init);
+module_exit(iptable_mangle_fini);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
new file mode 100644
index 0000000..8faebfe
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -0,0 +1,144 @@
+/*
+ * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
+ *
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[2];
+ struct ipt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .num_entries = 3,
+ .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
+ },
+ .underflow = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table packet_raw = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_raw);
+}
+
+static unsigned int
+ipt_local_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("iptable_raw: ignoring short SOCK_RAW "
+ "packet.\n");
+ return NF_ACCEPT;
+ }
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.iptable_raw);
+}
+
+/* 'raw' is the very first table. */
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_hook,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_RAW,
+ .owner = THIS_MODULE,
+ },
+ {
+ .hook = ipt_local_hook,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_RAW,
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __net_init iptable_raw_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_raw =
+ ipt_register_table(net, &packet_raw, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_raw))
+ return PTR_ERR(net->ipv4.iptable_raw);
+ return 0;
+}
+
+static void __net_exit iptable_raw_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_raw);
+}
+
+static struct pernet_operations iptable_raw_net_ops = {
+ .init = iptable_raw_net_init,
+ .exit = iptable_raw_net_exit,
+};
+
+static int __init iptable_raw_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_raw_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ return ret;
+}
+
+static void __exit iptable_raw_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+}
+
+module_init(iptable_raw_init);
+module_exit(iptable_raw_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 0000000..36f3be3
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,180 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(security_table.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(in)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* Somebody is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "iptable_security: ignoring short "
+ "SOCK_RAW packet.\n");
+ return NF_ACCEPT;
+ }
+ return ipt_do_table(skb, hook, in, out,
+ dev_net(out)->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_local_out_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+};
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, &initial_table.repl);
+
+ if (IS_ERR(net->ipv4.iptable_security))
+ return PTR_ERR(net->ipv4.iptable_security);
+
+ return 0;
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+ .init = iptable_security_net_init,
+ .exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644
index 0000000..4a7c352
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -0,0 +1,456 @@
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
+
+static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const __be32 *ap;
+ __be32 _addrs[2];
+ ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
+ sizeof(u_int32_t) * 2, _addrs);
+ if (ap == NULL)
+ return false;
+
+ tuple->src.u3.ip = ap[0];
+ tuple->dst.u3.ip = ap[1];
+
+ return true;
+}
+
+static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u3.ip = orig->dst.u3.ip;
+ tuple->dst.u3.ip = orig->src.u3.ip;
+
+ return true;
+}
+
+static int ipv4_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+ NIPQUAD(tuple->src.u3.ip),
+ NIPQUAD(tuple->dst.u3.ip));
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ unsigned int *dataoff, u_int8_t *protonum)
+{
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (iph == NULL)
+ return -NF_DROP;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though. */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -NF_DROP;
+
+ *dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv4_confirm(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
+ unsigned int ret;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+ goto out;
+
+ help = nfct_help(ct);
+ if (!help)
+ goto out;
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out;
+
+ ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+ seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+ if (!seq_adjust || !seq_adjust(skb, ct, ctinfo))
+ return NF_DROP;
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+}
+
+static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("ipt_hook: happy cracking.\n");
+ return NF_ACCEPT;
+ }
+ return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ make it the first hook. */
+static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+ {
+ .hook = ipv4_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+};
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static ctl_table ip_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
+ .procname = "ip_conntrack_max",
+ .data = &nf_conntrack_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
+ .procname = "ip_conntrack_count",
+ .data = &init_net.ct.count,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
+ .procname = "ip_conntrack_buckets",
+ .data = &nf_conntrack_htable_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
+ .procname = "ip_conntrack_checksum",
+ .data = &init_net.ct.sysctl_checksum,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+ .procname = "ip_conntrack_log_invalid",
+ .data = &init_net.ct.sysctl_log_invalid,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+ mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+
+ memset(&tuple, 0, sizeof(tuple));
+ tuple.src.u3.ip = inet->rcv_saddr;
+ tuple.src.u.tcp.port = inet->sport;
+ tuple.dst.u3.ip = inet->daddr;
+ tuple.dst.u.tcp.port = inet->dport;
+ tuple.src.l3num = PF_INET;
+ tuple.dst.protonum = IPPROTO_TCP;
+
+ /* We only do TCP at the moment: is there a better way? */
+ if (strcmp(sk->sk_prot->name, "TCP")) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
+ return -ENOPROTOOPT;
+ }
+
+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+ pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
+ *len, sizeof(struct sockaddr_in));
+ return -EINVAL;
+ }
+
+ h = nf_conntrack_find_get(sock_net(sk), &tuple);
+ if (h) {
+ struct sockaddr_in sin;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u.tcp.port;
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u3.ip;
+ memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+ pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+ NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ nf_ct_put(ct);
+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+ return -EFAULT;
+ else
+ return 0;
+ }
+ pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+ NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
+ NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
+ NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
+ [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+ [CTA_IP_V4_DST] = { .type = NLA_U32 },
+};
+
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+ return -EINVAL;
+
+ t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
+ t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
+
+ return 0;
+}
+#endif
+
+static struct nf_sockopt_ops so_getorigdst = {
+ .pf = PF_INET,
+ .get_optmin = SO_ORIGINAL_DST,
+ .get_optmax = SO_ORIGINAL_DST+1,
+ .get = &getorigdst,
+ .owner = THIS_MODULE,
+};
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+ .l3proto = PF_INET,
+ .name = "ipv4",
+ .pkt_to_tuple = ipv4_pkt_to_tuple,
+ .invert_tuple = ipv4_invert_tuple,
+ .print_tuple = ipv4_print_tuple,
+ .get_l4proto = ipv4_get_l4proto,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = ipv4_tuple_to_nlattr,
+ .nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nla_policy = ipv4_nla_policy,
+#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
+ .ctl_table = ip_ct_sysctl_table,
+#endif
+ .me = THIS_MODULE,
+};
+
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+ &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("ip_conntrack");
+MODULE_LICENSE("GPL");
+
+static int __init nf_conntrack_l3proto_ipv4_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv4_enable();
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret < 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+ return ret;
+ }
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register tcp.\n");
+ goto cleanup_sockopt;
+ }
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register udp.\n");
+ goto cleanup_tcp;
+ }
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register icmp.\n");
+ goto cleanup_udp;
+ }
+
+ ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register ipv4\n");
+ goto cleanup_icmp;
+ }
+
+ ret = nf_register_hooks(ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register hooks.\n");
+ goto cleanup_ipv4;
+ }
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ ret = nf_conntrack_ipv4_compat_init();
+ if (ret < 0)
+ goto cleanup_hooks;
+#endif
+ return ret;
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_hooks:
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+#endif
+ cleanup_ipv4:
+ nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ cleanup_icmp:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst);
+ return ret;
+}
+
+static void __exit nf_conntrack_l3proto_ipv4_fini(void)
+{
+ synchronize_net();
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ nf_conntrack_ipv4_compat_fini();
+#endif
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ nf_unregister_sockopt(&so_getorigdst);
+}
+
+module_init(nf_conntrack_l3proto_ipv4_init);
+module_exit(nf_conntrack_l3proto_ipv4_fini);
+
+void need_ipv4_conntrack(void)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
new file mode 100644
index 0000000..313ebf0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -0,0 +1,425 @@
+/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+struct ct_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0;
+ st->bucket < nf_conntrack_htable_size;
+ st->bucket++) {
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+
+ head = rcu_dereference(head->next);
+ while (head == NULL) {
+ if (++st->bucket >= nf_conntrack_htable_size)
+ return NULL;
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
+ }
+ return head;
+}
+
+static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ const struct nf_conntrack_tuple_hash *hash = v;
+ const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+
+ NF_CT_ASSERT(ct);
+
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ return 0;
+ if (nf_ct_l3num(ct) != AF_INET)
+ return 0;
+
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ NF_CT_ASSERT(l3proto);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ NF_CT_ASSERT(l4proto);
+
+ if (seq_printf(s, "%-8s %u %ld ",
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
+ return -ENOSPC;
+
+ if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
+ return -ENOSPC;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ return -ENOSPC;
+
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (seq_printf(s, "[ASSURED] "))
+ return -ENOSPC;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (seq_printf(s, "mark=%u ", ct->mark))
+ return -ENOSPC;
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (seq_printf(s, "secmark=%u ", ct->secmark))
+ return -ENOSPC;
+#endif
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static const struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
+}
+
+static const struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+/* expects */
+struct ct_expect_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+ n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+
+ head = rcu_dereference(head->next);
+ while (head == NULL) {
+ if (++st->bucket >= nf_ct_expect_hsize)
+ return NULL;
+ head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ }
+ return head;
+}
+
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_expect_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_expect_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_expect_get_idx(seq, *pos);
+}
+
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_expect_get_next(seq, v);
+}
+
+static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *n = v;
+
+ exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
+
+ if (exp->tuple.src.l3num != AF_INET)
+ return 0;
+
+ if (exp->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&exp->timeout)
+ ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
+
+ print_tuple(s, &exp->tuple,
+ __nf_ct_l3proto_find(exp->tuple.src.l3num),
+ __nf_ct_l4proto_find(exp->tuple.src.l3num,
+ exp->tuple.dst.protonum));
+ return seq_putc(s, '\n');
+}
+
+static const struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
+}
+
+static const struct file_operations ip_exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
+ const struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete
+ );
+ return 0;
+}
+
+static const struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init ip_conntrack_net_init(struct net *net)
+{
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+
+ proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
+ if (!proc)
+ goto err1;
+
+ proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
+ &ip_exp_file_ops);
+ if (!proc_exp)
+ goto err2;
+
+ proc_stat = proc_create("ip_conntrack", S_IRUGO,
+ net->proc_net_stat, &ct_cpu_seq_fops);
+ if (!proc_stat)
+ goto err3;
+ return 0;
+
+err3:
+ proc_net_remove(net, "ip_conntrack_expect");
+err2:
+ proc_net_remove(net, "ip_conntrack");
+err1:
+ return -ENOMEM;
+}
+
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+ remove_proc_entry("ip_conntrack", net->proc_net_stat);
+ proc_net_remove(net, "ip_conntrack_expect");
+ proc_net_remove(net, "ip_conntrack");
+}
+
+static struct pernet_operations ip_conntrack_net_ops = {
+ .init = ip_conntrack_net_init,
+ .exit = ip_conntrack_net_exit,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+ return register_pernet_subsys(&ip_conntrack_net_ops);
+}
+
+void __exit nf_conntrack_ipv4_compat_fini(void)
+{
+ unregister_pernet_subsys(&ip_conntrack_net_ops);
+}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 0000000..625707a
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,322 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct icmphdr *hp;
+ struct icmphdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
+
+ return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ if (orig->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[orig->dst.u.icmp.type])
+ return false;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum)
+{
+ /* Try to delete connection immediately after all replies:
+ won't actually vanish as we still have skb, and del_timer
+ means this will only run once even if count hits zero twice
+ (theoretically possible with SMP) */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ if (atomic_dec_and_test(&ct->proto.icmp.count))
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ } else {
+ atomic_inc(&ct->proto.icmp.count);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ static const u_int8_t valid_new[] = {
+ [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1
+ };
+
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+ || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+ /* Can't create a new ICMP `conn' with this. */
+ pr_debug("icmp: can't create new conn with type %u\n",
+ ct->tuplehash[0].tuple.dst.u.icmp.type);
+ nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+ return false;
+ }
+ atomic_set(&ct->proto.icmp.count, 0);
+ return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple innertuple, origtuple;
+ const struct nf_conntrack_l4proto *innerproto;
+ const struct nf_conntrack_tuple_hash *h;
+
+ NF_CT_ASSERT(skb->nfct == NULL);
+
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb) + ip_hdrlen(skb)
+ + sizeof(struct icmphdr),
+ PF_INET, &origtuple)) {
+ pr_debug("icmp_error_message: failed to get tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+ &nf_conntrack_l3proto_ipv4, innerproto)) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(net, &innertuple);
+ if (!h) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
+{
+ const struct icmphdr *icmph;
+ struct icmphdr _ih;
+
+ /* Not enough header? */
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+ if (icmph == NULL) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: bad HW ICMP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph->type > NR_ICMP_TYPES) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: invalid ICMP type ");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph->type != ICMP_DEST_UNREACH
+ && icmph->type != ICMP_SOURCE_QUENCH
+ && icmph->type != ICMP_TIME_EXCEEDED
+ && icmph->type != ICMP_PARAMETERPROB
+ && icmph->type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(net, skb, ctinfo, hooknum);
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
+ NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
+ NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMP_TYPE]
+ || !tb[CTA_PROTO_ICMP_CODE]
+ || !tb[CTA_PROTO_ICMP_ID])
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+ if (tuple->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[tuple->dst.u.icmp.type])
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *icmp_sysctl_header;
+static struct ctl_table icmp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmp_timeout",
+ .data = &nf_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table icmp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_icmp_timeout",
+ .data = &nf_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_ICMP,
+ .name = "icmp",
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .print_tuple = icmp_print_tuple,
+ .packet = icmp_packet,
+ .new = icmp_new,
+ .error = icmp_error,
+ .destroy = NULL,
+ .me = NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = icmp_tuple_to_nlattr,
+ .nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nla_policy = icmp_nla_policy,
+#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &icmp_sysctl_header,
+ .ctl_table = icmp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = icmp_compat_sysctl_table,
+#endif
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
new file mode 100644
index 0000000..fa2d6b6
--- /dev/null
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -0,0 +1,97 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+/* Returns new sk_buff, or NULL */
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ int err;
+
+ skb_orphan(skb);
+
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (!err)
+ ip_send_check(ip_hdr(skb));
+
+ return err;
+}
+
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if (skb->nfct)
+ return NF_ACCEPT;
+#endif
+#endif
+ /* Gather fragments. */
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (nf_ct_ipv4_gather_frags(skb,
+ hooknum == NF_INET_PRE_ROUTING ?
+ IP_DEFRAG_CONNTRACK_IN :
+ IP_DEFRAG_CONNTRACK_OUT))
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ipv4_defrag_ops[] = {
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+};
+
+static int __init nf_defrag_init(void)
+{
+ return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+static void __exit nf_defrag_fini(void)
+{
+ nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+void nf_defrag_ipv4_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
new file mode 100644
index 0000000..c31b876
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -0,0 +1,78 @@
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_amanda");
+
+static unsigned int help(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+{
+ char buffer[sizeof("65535")];
+ u_int16_t port;
+ unsigned int ret;
+
+ /* Connection comes from client. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_ORIGINAL;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one (ie. same IP: it will be TCP and master is UDP). */
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ sprintf(buffer, "%u", port);
+ ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
+ matchoff, matchlen,
+ buffer, strlen(buffer));
+ if (ret != NF_ACCEPT)
+ nf_ct_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit nf_nat_amanda_fini(void)
+{
+ rcu_assign_pointer(nf_nat_amanda_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_amanda_init(void)
+{
+ BUG_ON(nf_nat_amanda_hook != NULL);
+ rcu_assign_pointer(nf_nat_amanda_hook, help);
+ return 0;
+}
+
+module_init(nf_nat_amanda_init);
+module_exit(nf_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
new file mode 100644
index 0000000..a65cf69
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -0,0 +1,773 @@
+/* NAT for netfilter; shared with compatibility layer. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h> /* For tcp_prot in getorigdst */
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <linux/jhash.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+
+static DEFINE_SPINLOCK(nf_nat_lock);
+
+static struct nf_conntrack_l3proto *l3proto __read_mostly;
+
+/* Calculated at init based on memory size */
+static unsigned int nf_nat_htable_size __read_mostly;
+
+#define MAX_IP_NAT_PROTO 256
+static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
+ __read_mostly;
+
+static inline const struct nf_nat_protocol *
+__nf_nat_proto_find(u_int8_t protonum)
+{
+ return rcu_dereference(nf_nat_protos[protonum]);
+}
+
+const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ const struct nf_nat_protocol *p;
+
+ rcu_read_lock();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ rcu_read_unlock();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
+
+void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_put);
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct nf_conntrack_tuple *tuple)
+{
+ unsigned int hash;
+
+ /* Original src, to ensure we map it consistently if poss. */
+ hash = jhash_3words((__force u32)tuple->src.u3.ip,
+ (__force u32)tuple->src.u.all,
+ tuple->dst.protonum, 0);
+ return ((u64)hash * nf_nat_htable_size) >> 32;
+}
+
+/* Is this tuple already taken? (not by us) */
+int
+nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack)
+{
+ /* Conntrack tracking doesn't keep track of outgoing tuples; only
+ incoming ones. NAT means they don't have a fixed mapping,
+ so we invert the tuple and look for the incoming reply.
+
+ We could keep a separate hash if this proves too slow. */
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuplepr(&reply, tuple);
+ return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+EXPORT_SYMBOL(nf_nat_used_tuple);
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range. */
+static int
+in_range(const struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range)
+{
+ const struct nf_nat_protocol *proto;
+ int ret = 0;
+
+ /* If we are supposed to map IPs, then we must be in the
+ range specified, otherwise let this drag us onto a new src IP. */
+ if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+ if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
+ ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
+ return 0;
+ }
+
+ rcu_read_lock();
+ proto = __nf_nat_proto_find(tuple->dst.protonum);
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+ proto->in_range(tuple, IP_NAT_MANIP_SRC,
+ &range->min, &range->max))
+ ret = 1;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static inline int
+same_src(const struct nf_conn *ct,
+ const struct nf_conntrack_tuple *tuple)
+{
+ const struct nf_conntrack_tuple *t;
+
+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ return (t->dst.protonum == tuple->dst.protonum &&
+ t->src.u3.ip == tuple->src.u3.ip &&
+ t->src.u.all == tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *result,
+ const struct nf_nat_range *range)
+{
+ unsigned int h = hash_by_src(tuple);
+ const struct nf_conn_nat *nat;
+ const struct nf_conn *ct;
+ const struct hlist_node *n;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
+ ct = nat->ct;
+ if (same_src(ct, tuple)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(result, range)) {
+ rcu_read_unlock();
+ return 1;
+ }
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+ src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
+ if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ 1-65535, we don't do pro-rata allocation based on ports; we choose
+ the ip with the lowest src-ip/dst-ip/proto usage.
+*/
+static void
+find_best_ips_proto(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ const struct nf_conn *ct,
+ enum nf_nat_manip_type maniptype)
+{
+ __be32 *var_ipp;
+ /* Host order */
+ u_int32_t minip, maxip, j;
+
+ /* No IP mapping? Do nothing. */
+ if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+ return;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ var_ipp = &tuple->src.u3.ip;
+ else
+ var_ipp = &tuple->dst.u3.ip;
+
+ /* Fast path: only one choice. */
+ if (range->min_ip == range->max_ip) {
+ *var_ipp = range->min_ip;
+ return;
+ }
+
+ /* Hashing source and destination IPs gives a fairly even
+ * spread in practice (if there are a small number of IPs
+ * involved, there usually aren't that many connections
+ * anyway). The consistency means that servers see the same
+ * client coming from the same IP (some Internet Banking sites
+ * like this), even across reboots. */
+ minip = ntohl(range->min_ip);
+ maxip = ntohl(range->max_ip);
+ j = jhash_2words((__force u32)tuple->src.u3.ip,
+ (__force u32)tuple->dst.u3.ip, 0);
+ j = ((u64)j * (maxip - minip + 1)) >> 32;
+ *var_ipp = htonl(minip + j);
+}
+
+/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
+ * we change the source to map into the range. For NF_INET_PRE_ROUTING
+ * and NF_INET_LOCAL_OUT, we change the destination to map into the
+ * range. It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig_tuple,
+ const struct nf_nat_range *range,
+ struct nf_conn *ct,
+ enum nf_nat_manip_type maniptype)
+{
+ struct net *net = nf_ct_net(ct);
+ const struct nf_nat_protocol *proto;
+
+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
+ and that same mapping gives a unique tuple within the given
+ range, use that.
+
+ This is only required for source (ie. NAT/masq) mappings.
+ So far, we don't do local source mappings, so multiple
+ manips not an issue. */
+ if (maniptype == IP_NAT_MANIP_SRC &&
+ !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+ if (find_appropriate_src(net, orig_tuple, tuple, range)) {
+ pr_debug("get_unique_tuple: Found current src map\n");
+ if (!nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ }
+
+ /* 2) Select the least-used IP/proto combination in the given
+ range. */
+ *tuple = *orig_tuple;
+ find_best_ips_proto(tuple, range, ct, maniptype);
+
+ /* 3) The per-protocol part of the manip is made to map into
+ the range to make a unique tuple. */
+
+ rcu_read_lock();
+ proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
+
+ /* Change protocol info to have some randomization */
+ if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
+ proto->unique_tuple(tuple, range, maniptype, ct);
+ goto out;
+ }
+
+ /* Only bother mapping if it's not already in range and unique */
+ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+ proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
+ !nf_nat_used_tuple(tuple, ct))
+ goto out;
+
+ /* Last change: get protocol to try to obtain unique tuple. */
+ proto->unique_tuple(tuple, range, maniptype, ct);
+out:
+ rcu_read_unlock();
+}
+
+unsigned int
+nf_nat_setup_info(struct nf_conn *ct,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_tuple curr_tuple, new_tuple;
+ struct nf_conn_nat *nat;
+ int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
+
+ /* nat helper or nfctnetlink also setup binding */
+ nat = nfct_nat(ct);
+ if (!nat) {
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL) {
+ pr_debug("failed to add NAT extension\n");
+ return NF_ACCEPT;
+ }
+ }
+
+ NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
+ maniptype == IP_NAT_MANIP_DST);
+ BUG_ON(nf_nat_initialized(ct, maniptype));
+
+ /* What we've got will look like inverse of reply. Normally
+ this is what is in the conntrack, except for prior
+ manipulations (future optimization: if num_manips == 0,
+ orig_tp =
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+ nf_ct_invert_tuplepr(&curr_tuple,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+
+ if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+ struct nf_conntrack_tuple reply;
+
+ /* Alter conntrack table so will recognize replies. */
+ nf_ct_invert_tuplepr(&reply, &new_tuple);
+ nf_conntrack_alter_reply(ct, &reply);
+
+ /* Non-atomic: we own this at the moment. */
+ if (maniptype == IP_NAT_MANIP_SRC)
+ ct->status |= IPS_SRC_NAT;
+ else
+ ct->status |= IPS_DST_NAT;
+ }
+
+ /* Place in source hash if this is the first time. */
+ if (have_to_hash) {
+ unsigned int srchash;
+
+ srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_lock);
+ /* nf_conntrack_alter_reply might re-allocate exntension aera */
+ nat = nfct_nat(ct);
+ nat->ct = ct;
+ hlist_add_head_rcu(&nat->bysource,
+ &net->ipv4.nat_bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
+ }
+
+ /* It's done. */
+ if (maniptype == IP_NAT_MANIP_DST)
+ set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+ else
+ set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL(nf_nat_setup_info);
+
+/* Returns true if succeeded. */
+static bool
+manip_pkt(u_int16_t proto,
+ struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ const struct nf_nat_protocol *p;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+
+ /* Manipulate protcol part. */
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ p = __nf_nat_proto_find(proto);
+ if (!p->manip_pkt(skb, iphdroff, target, maniptype))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+/* Do packet manipulations according to nf_nat_setup_info. */
+unsigned int nf_nat_packet(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+ if (mtype == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ /* Non-atomic: these bits don't change. */
+ if (ct->status & statusbit) {
+ struct nf_conntrack_tuple target;
+
+ /* We are aiming to look like inverse of other direction. */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+ if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_nat_packet);
+
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int nf_nat_icmp_reply_translation(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ const struct nf_conntrack_l4proto *l4proto;
+ struct nf_conntrack_tuple inner, target;
+ int hdrlen = ip_hdrlen(skb);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+
+ inside = (void *)skb->data + ip_hdrlen(skb);
+
+ /* We're actually going to mangle it beyond trivial checksum
+ adjustment, so make sure the current checksum is correct. */
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ /* Must be RELATED */
+ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+ skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+
+ /* Redirects on non-null nats must be dropped, else they'll
+ start talking to each other without our translation, and be
+ confused... --RR */
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ /* If NAT isn't finished, assume it and drop. */
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ pr_debug("icmp_reply_translation: translating error %p manip %u "
+ "dir %s\n", skb, manip,
+ dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
+
+ if (!nf_ct_get_tuple(skb,
+ ip_hdrlen(skb) + sizeof(struct icmphdr),
+ (ip_hdrlen(skb) +
+ sizeof(struct icmphdr) + inside->ip.ihl * 4),
+ (u_int16_t)AF_INET,
+ inside->ip.protocol,
+ &inner, l3proto, l4proto))
+ return 0;
+
+ /* Change inner back to look like incoming packet. We do the
+ opposite manip on this hook to normal, because it might not
+ pass all hooks (locally-generated ICMP). Consider incoming
+ packet: PREROUTING (DST manip), routing produces ICMP, goes
+ through POSTROUTING (which must correct the DST manip). */
+ if (!manip_pkt(inside->ip.protocol, skb,
+ ip_hdrlen(skb) + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple,
+ !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)skb->data + ip_hdrlen(skb);
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look the reply to an incoming packet
+ * (proto 0 means don't invert per-proto part). */
+ if (manip == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, skb, 0, &target, manip))
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+/* Protocol registration. */
+int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
+{
+ int ret = 0;
+
+ spin_lock_bh(&nf_nat_lock);
+ if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+ rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
+ out:
+ spin_unlock_bh(&nf_nat_lock);
+ return ret;
+}
+EXPORT_SYMBOL(nf_nat_protocol_register);
+
+/* Noone stores the protocol anywhere; simply delete it. */
+void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
+{
+ spin_lock_bh(&nf_nat_lock);
+ rcu_assign_pointer(nf_nat_protos[proto->protonum],
+ &nf_nat_unknown_protocol);
+ spin_unlock_bh(&nf_nat_lock);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_nat_protocol_unregister);
+
+/* Noone using conntrack by the time this called. */
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+ struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+ if (nat == NULL || nat->ct == NULL)
+ return;
+
+ NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ spin_unlock_bh(&nf_nat_lock);
+}
+
+static void nf_nat_move_storage(void *new, void *old)
+{
+ struct nf_conn_nat *new_nat = new;
+ struct nf_conn_nat *old_nat = old;
+ struct nf_conn *ct = old_nat->ct;
+
+ if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+ return;
+
+ spin_lock_bh(&nf_nat_lock);
+ new_nat->ct = ct;
+ hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
+ spin_unlock_bh(&nf_nat_lock);
+}
+
+static struct nf_ct_ext_type nat_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_nat),
+ .align = __alignof__(struct nf_conn_nat),
+ .destroy = nf_nat_cleanup_conntrack,
+ .move = nf_nat_move_storage,
+ .id = NF_CT_EXT_NAT,
+ .flags = NF_CT_EXT_F_PREALLOC,
+};
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+ [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
+ [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+ const struct nf_conn *ct,
+ struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_PROTONAT_MAX+1];
+ const struct nf_nat_protocol *npt;
+ int err;
+
+ err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+ if (err < 0)
+ return err;
+
+ npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+ if (npt->nlattr_to_range)
+ err = npt->nlattr_to_range(tb, range);
+ nf_nat_proto_put(npt);
+ return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+ [CTA_NAT_MINIP] = { .type = NLA_U32 },
+ [CTA_NAT_MAXIP] = { .type = NLA_U32 },
+};
+
+static int
+nfnetlink_parse_nat(struct nlattr *nat,
+ const struct nf_conn *ct, struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_NAT_MAX+1];
+ int err;
+
+ memset(range, 0, sizeof(*range));
+
+ err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[CTA_NAT_MINIP])
+ range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+
+ if (!tb[CTA_NAT_MAXIP])
+ range->max_ip = range->min_ip;
+ else
+ range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+
+ if (range->min_ip)
+ range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+ if (!tb[CTA_NAT_PROTO])
+ return 0;
+
+ err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ struct nf_nat_range range;
+
+ if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+ return -EINVAL;
+ if (nf_nat_initialized(ct, manip))
+ return -EEXIST;
+
+ return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+static int __net_init nf_nat_net_init(struct net *net)
+{
+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+ &net->ipv4.nat_vmalloced);
+ if (!net->ipv4.nat_bysource)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ memset(nat, 0, sizeof(*nat));
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+ return 0;
+}
+
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+ nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+ synchronize_rcu();
+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
+ nf_nat_htable_size);
+}
+
+static struct pernet_operations nf_nat_net_ops = {
+ .init = nf_nat_net_init,
+ .exit = nf_nat_net_exit,
+};
+
+static int __init nf_nat_init(void)
+{
+ size_t i;
+ int ret;
+
+ need_ipv4_conntrack();
+
+ ret = nf_ct_extend_register(&nat_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+ return ret;
+ }
+
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ ret = register_pernet_subsys(&nf_nat_net_ops);
+ if (ret < 0)
+ goto cleanup_extend;
+
+ /* Sew in builtin protocols. */
+ spin_lock_bh(&nf_nat_lock);
+ for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+ rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
+ rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
+ rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
+ rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
+ spin_unlock_bh(&nf_nat_lock);
+
+ /* Initialize fake conntrack so that NAT will skip it */
+ nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+
+ l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
+
+ BUG_ON(nf_nat_seq_adjust_hook != NULL);
+ rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+ BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+ nfnetlink_parse_nat_setup);
+ return 0;
+
+ cleanup_extend:
+ nf_ct_extend_unregister(&nat_extend);
+ return ret;
+}
+
+static void __exit nf_nat_cleanup(void)
+{
+ unregister_pernet_subsys(&nf_nat_net_ops);
+ nf_ct_l3proto_put(l3proto);
+ nf_ct_extend_unregister(&nat_extend);
+ rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
+ synchronize_net();
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
+
+module_init(nf_nat_init);
+module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
new file mode 100644
index 0000000..a1d5d58
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -0,0 +1,165 @@
+/* FTP extension for TCP NAT alteration. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+MODULE_ALIAS("ip_nat_ftp");
+
+/* FIXME: Time out? --RR */
+
+static int
+mangle_rfc959_packet(struct sk_buff *skb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+
+ sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+ NIPQUAD(newip), port>>8, port&0xFF);
+
+ pr_debug("calling nf_nat_mangle_tcp_packet\n");
+
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_eprt_packet(struct sk_buff *skb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ char buffer[sizeof("|1|255.255.255.255|65535|")];
+
+ sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+
+ pr_debug("calling nf_nat_mangle_tcp_packet\n");
+
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_epsv_packet(struct sk_buff *skb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ char buffer[sizeof("|||65535|")];
+
+ sprintf(buffer, "|||%u|", port);
+
+ pr_debug("calling nf_nat_mangle_tcp_packet\n");
+
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
+ unsigned int, unsigned int, struct nf_conn *,
+ enum ip_conntrack_info)
+= {
+ [NF_CT_FTP_PORT] = mangle_rfc959_packet,
+ [NF_CT_FTP_PASV] = mangle_rfc959_packet,
+ [NF_CT_FTP_EPRT] = mangle_eprt_packet,
+ [NF_CT_FTP_EPSV] = mangle_epsv_packet
+};
+
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ enum nf_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+{
+ __be32 newip;
+ u_int16_t port;
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_conn *ct = exp->master;
+
+ pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+
+ /* Connection will come from wherever this packet goes, hence !dir */
+ newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = !dir;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
+ nf_ct_unexpect_related(exp);
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+static void __exit nf_nat_ftp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_ftp_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_ftp_init(void)
+{
+ BUG_ON(nf_nat_ftp_hook != NULL);
+ rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO KBUILD_MODNAME
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_ftp_init);
+module_exit(nf_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
new file mode 100644
index 0000000..ee47bf2
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -0,0 +1,584 @@
+/*
+ * H.323 extension for NAT alteration.
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 NAT module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+/****************************************************************************/
+static int set_addr(struct sk_buff *skb,
+ unsigned char **data, int dataoff,
+ unsigned int addroff, __be32 ip, __be16 port)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct {
+ __be32 ip;
+ __be16 port;
+ } __attribute__ ((__packed__)) buf;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+
+ buf.ip = ip;
+ buf.port = port;
+ addroff += dataoff;
+
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: nf_nat_mangle_tcp_packet"
+ " error\n");
+ return -1;
+ }
+
+ /* Relocate data pointer */
+ th = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return -1;
+ *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
+ } else {
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+ addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: nf_nat_mangle_udp_packet"
+ " error\n");
+ return -1;
+ }
+ /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ * or pull everything in a linear buffer, so we can safely
+ * use the skb pointers now */
+ *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_h225_addr(struct sk_buff *skb,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_h245_addr(struct sk_buff *skb,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, data, dataoff,
+ taddr->unicastAddress.iPAddress.network,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
+ if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GW->GK */
+
+ /* Fix for Gnomemeeting */
+ if (i > 0 &&
+ get_h225_addr(ct, *data, &taddr[0],
+ &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000)
+ i = 0;
+
+ pr_debug("nf_nat_ras: set signal address "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(addr.ip), port,
+ NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.dst.u3,
+ info->sig_port[!dir]);
+ } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GK->GW */
+ pr_debug("nf_nat_ras: set signal address "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(addr.ip), port,
+ NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.src.u3,
+ info->sig_port[!dir]);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port) {
+ pr_debug("nf_nat_ras: set rasAddress "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(addr.ip), ntohs(port),
+ NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
+ return set_h225_addr(skb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.
+ dst.u.udp.port);
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->expectfn = nf_nat_follow_master;
+ rtp_exp->dir = !dir;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->expectfn = nf_nat_follow_master;
+ rtcp_exp->dir = !dir;
+
+ /* Lookup existing expects */
+ for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
+ if (info->rtp_port[i][dir] == rtp_port) {
+ /* Expected */
+
+ /* Use allocated ports first. This will refresh
+ * the expects */
+ rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir];
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(ntohs(info->rtp_port[i][dir]) + 1);
+ break;
+ } else if (info->rtp_port[i][dir] == 0) {
+ /* Not expected */
+ break;
+ }
+ }
+
+ /* Run out of expectations */
+ if (i >= H323_RTP_CHANNEL_MAX) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of expectations\n");
+ return 0;
+ }
+
+ /* Try to get a pair of ports. */
+ for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ nated_port != 0; nated_port += 2) {
+ rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
+ if (nf_ct_expect_related(rtp_exp) == 0) {
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(nated_port + 1);
+ if (nf_ct_expect_related(rtcp_exp) == 0)
+ break;
+ nf_ct_unexpect_related(rtp_exp);
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of RTP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons((port & htons(1)) ? nated_port + 1 :
+ nated_port)) == 0) {
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(rtp_exp);
+ nf_ct_unexpect_related(rtcp_exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(rtp_exp->tuple.src.u3.ip),
+ ntohs(rtp_exp->tuple.src.u.udp.port),
+ NIPQUAD(rtp_exp->tuple.dst.u3.ip),
+ ntohs(rtp_exp->tuple.dst.u.udp.port));
+ pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(rtcp_exp->tuple.src.u3.ip),
+ ntohs(rtcp_exp->tuple.src.u.udp.port),
+ NIPQUAD(rtcp_exp->tuple.dst.u3.ip),
+ ntohs(rtcp_exp->tuple.dst.u.udp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) < 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.u3.ip),
+ ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.u3.ip),
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.u3.ip),
+ ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.u3.ip),
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************
+ * This conntrack expect function replaces nf_conntrack_q931_expect()
+ * which was set by nf_conntrack_h323.c.
+ ****************************************************************************/
+static void ip_nat_q931_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
+ nf_nat_follow_master(new, this);
+ return;
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+ nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = this->saved_proto;
+ range.min_ip = range.max_ip =
+ new->master->tuplehash[!this->dir].tuple.src.u3.ip;
+ nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+ union nf_inet_addr addr;
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_q931_expect;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_ras: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, data, 0, &taddr[idx],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ set_h225_addr(skb, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.u3.ip),
+ ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.u3.ip),
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static void ip_nat_callforwarding_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+ nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = this->saved_proto;
+ range.min_ip = range.max_ip = this->saved_ip;
+ nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_callforwarding_expect;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (!set_h225_addr(skb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_q931: expect Call Forwarding "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.u3.ip),
+ ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.u3.ip),
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int __init init(void)
+{
+ BUG_ON(set_h245_addr_hook != NULL);
+ BUG_ON(set_h225_addr_hook != NULL);
+ BUG_ON(set_sig_addr_hook != NULL);
+ BUG_ON(set_ras_addr_hook != NULL);
+ BUG_ON(nat_rtp_rtcp_hook != NULL);
+ BUG_ON(nat_t120_hook != NULL);
+ BUG_ON(nat_h245_hook != NULL);
+ BUG_ON(nat_callforwarding_hook != NULL);
+ BUG_ON(nat_q931_hook != NULL);
+
+ rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
+ rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
+ rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
+ rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ rcu_assign_pointer(nat_t120_hook, nat_t120);
+ rcu_assign_pointer(nat_h245_hook, nat_h245);
+ rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
+ rcu_assign_pointer(nat_q931_hook, nat_q931);
+ return 0;
+}
+
+/****************************************************************************/
+static void __exit fini(void)
+{
+ rcu_assign_pointer(set_h245_addr_hook, NULL);
+ rcu_assign_pointer(set_h225_addr_hook, NULL);
+ rcu_assign_pointer(set_sig_addr_hook, NULL);
+ rcu_assign_pointer(set_ras_addr_hook, NULL);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
+ rcu_assign_pointer(nat_t120_hook, NULL);
+ rcu_assign_pointer(nat_h245_hook, NULL);
+ rcu_assign_pointer(nat_callforwarding_hook, NULL);
+ rcu_assign_pointer(nat_q931_hook, NULL);
+ synchronize_rcu();
+}
+
+/****************************************************************************/
+module_init(init);
+module_exit(fini);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_h323");
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
new file mode 100644
index 0000000..cf7a42b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -0,0 +1,444 @@
+/* ip_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+#define DUMP_OFFSET(x) \
+ pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
+ x->offset_before, x->offset_after, x->correction_pos);
+
+static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
+
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void
+adjust_tcp_sequence(u32 seq,
+ int sizediff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir;
+ struct nf_nat_seq *this_way, *other_way;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq);
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &nat->seq[dir];
+ other_way = &nat->seq[!dir];
+
+ pr_debug("nf_nat_resize_packet: Seq_offset before: ");
+ DUMP_OFFSET(this_way);
+
+ spin_lock_bh(&nf_nat_seqofs_lock);
+
+ /* SYN adjust. If it's uninitialized, or this is after last
+ * correction, record it: we don't handle more than one
+ * adjustment in the window, but do deal with common case of a
+ * retransmit */
+ if (this_way->offset_before == this_way->offset_after ||
+ before(this_way->correction_pos, seq)) {
+ this_way->correction_pos = seq;
+ this_way->offset_before = this_way->offset_after;
+ this_way->offset_after += sizediff;
+ }
+ spin_unlock_bh(&nf_nat_seqofs_lock);
+
+ pr_debug("nf_nat_resize_packet: Seq_offset after: ");
+ DUMP_OFFSET(this_way);
+}
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+ unsigned int dataoff,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ unsigned char *data;
+
+ BUG_ON(skb_is_nonlinear(skb));
+ data = skb_network_header(skb) + dataoff;
+
+ /* move post-replacement */
+ memmove(data + match_offset + rep_len,
+ data + match_offset + match_len,
+ skb->tail - (skb->network_header + dataoff +
+ match_offset + match_len));
+
+ /* insert data from buffer */
+ memcpy(data + match_offset, rep_buffer, rep_len);
+
+ /* update skb info */
+ if (rep_len > match_len) {
+ pr_debug("nf_nat_mangle_packet: Extending packet by "
+ "%u from %u bytes\n", rep_len - match_len, skb->len);
+ skb_put(skb, rep_len - match_len);
+ } else {
+ pr_debug("nf_nat_mangle_packet: Shrinking packet from "
+ "%u from %u bytes\n", match_len - rep_len, skb->len);
+ __skb_trim(skb, skb->len + rep_len - match_len);
+ }
+
+ /* fix IP hdr checksum information */
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
+}
+
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
+{
+ if (skb->len + extra > 65535)
+ return 0;
+
+ if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
+ return 0;
+
+ return 1;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int
+nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct rtable *rt = skb->rtable;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int oldlen, datalen;
+
+ if (!skb_make_writable(skb, skb->len))
+ return 0;
+
+ if (rep_len > match_len &&
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
+ return 0;
+
+ SKB_LINEAR_ASSERT(skb);
+
+ iph = ip_hdr(skb);
+ tcph = (void *)iph + iph->ihl*4;
+
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
+ match_offset, match_len, rep_buffer, rep_len);
+
+ datalen = skb->len - iph->ihl*4;
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ tcph->check = ~tcp_v4_check(datalen,
+ iph->saddr, iph->daddr, 0);
+ } else {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(datalen,
+ iph->saddr, iph->daddr,
+ csum_partial(tcph,
+ datalen, 0));
+ }
+ } else
+ inet_proto_csum_replace2(&tcph->check, skb,
+ htons(oldlen), htons(datalen), 1);
+
+ if (rep_len != match_len) {
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+ adjust_tcp_sequence(ntohl(tcph->seq),
+ (int)rep_len - (int)match_len,
+ ct, ctinfo);
+ /* Tell TCP window tracking about seq change */
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
+ ct, CTINFO2DIR(ctinfo));
+
+ nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
+ }
+ return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
+ * should be fairly easy to do.
+ */
+int
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct rtable *rt = skb->rtable;
+ struct iphdr *iph;
+ struct udphdr *udph;
+ int datalen, oldlen;
+
+ /* UDP helpers might accidentally mangle the wrong packet */
+ iph = ip_hdr(skb);
+ if (skb->len < iph->ihl*4 + sizeof(*udph) +
+ match_offset + match_len)
+ return 0;
+
+ if (!skb_make_writable(skb, skb->len))
+ return 0;
+
+ if (rep_len > match_len &&
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
+ return 0;
+
+ iph = ip_hdr(skb);
+ udph = (void *)iph + iph->ihl*4;
+
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
+ match_offset, match_len, rep_buffer, rep_len);
+
+ /* update the length of the UDP packet */
+ datalen = skb->len - iph->ihl*4;
+ udph->len = htons(datalen);
+
+ /* fix udp checksum if udp checksum was previously calculated */
+ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
+ return 1;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ 0);
+ } else {
+ udph->check = 0;
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ csum_partial(udph,
+ datalen, 0));
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(&udph->check, skb,
+ htons(oldlen), htons(datalen), 1);
+
+ return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
+
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ unsigned int sackoff,
+ unsigned int sackend,
+ struct nf_nat_seq *natseq)
+{
+ while (sackoff < sackend) {
+ struct tcp_sack_block_wire *sack;
+ __be32 new_start_seq, new_end_seq;
+
+ sack = (void *)skb->data + sackoff;
+ if (after(ntohl(sack->start_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_after);
+ else
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_before);
+
+ if (after(ntohl(sack->end_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_after);
+ else
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_before);
+
+ pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sack->start_seq), new_start_seq,
+ ntohl(sack->end_seq), new_end_seq);
+
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->start_seq, new_start_seq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->end_seq, new_end_seq, 0);
+ sack->start_seq = new_start_seq;
+ sack->end_seq = new_end_seq;
+ sackoff += sizeof(*sack);
+ }
+}
+
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+nf_nat_sack_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dir, optoff, optend;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+ optend = ip_hdrlen(skb) + tcph->doff * 4;
+
+ if (!skb_make_writable(skb, optend))
+ return 0;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (optoff < optend) {
+ /* Usually: option, length. */
+ unsigned char *op = skb->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ /* no partial options */
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_SACK &&
+ op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+ ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+ sack_adjust(skb, tcph, optoff+2,
+ optoff+op[1], &nat->seq[!dir]);
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+
+/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
+int
+nf_nat_seq_adjust(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct tcphdr *tcph;
+ int dir;
+ __be32 newseq, newack;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ struct nf_nat_seq *this_way, *other_way;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &nat->seq[dir];
+ other_way = &nat->seq[!dir];
+
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ return 0;
+
+ tcph = (void *)skb->data + ip_hdrlen(skb);
+ if (after(ntohl(tcph->seq), this_way->correction_pos))
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
+ else
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
+
+ if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
+ else
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
+
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+
+ pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+ ntohl(newack));
+
+ tcph->seq = newseq;
+ tcph->ack_seq = newack;
+
+ if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
+ return 0;
+
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
+
+ return 1;
+}
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void nf_nat_follow_master(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+}
+EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
new file mode 100644
index 0000000..fe6f9ce
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -0,0 +1,92 @@
+/* IRC extension for TCP NAT alteration.
+ *
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_irc");
+
+static unsigned int help(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+{
+ char buffer[sizeof("4294967296 65635")];
+ u_int32_t ip;
+ u_int16_t port;
+ unsigned int ret;
+
+ /* Reply comes from server. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
+ sprintf(buffer, "%u %u", ip, port);
+ pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
+ buffer, NIPQUAD(ip), port);
+
+ ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
+ matchoff, matchlen, buffer,
+ strlen(buffer));
+ if (ret != NF_ACCEPT)
+ nf_ct_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit nf_nat_irc_fini(void)
+{
+ rcu_assign_pointer(nf_nat_irc_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_irc_init(void)
+{
+ BUG_ON(nf_nat_irc_hook != NULL);
+ rcu_assign_pointer(nf_nat_irc_hook, help);
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO KBUILD_MODNAME
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_irc_init);
+module_exit(nf_nat_irc_fini);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
new file mode 100644
index 0000000..9eb1710
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -0,0 +1,307 @@
+/*
+ * nf_nat_pptp.c
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * (needs netfilter tuple reservation)
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_NAT_PPTP_VERSION "3.0"
+
+#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+MODULE_ALIAS("ip_nat_pptp");
+
+static void pptp_nat_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct net *net = nf_ct_net(ct);
+ const struct nf_conn *master = ct->master;
+ struct nf_conntrack_expect *other_exp;
+ struct nf_conntrack_tuple t;
+ const struct nf_ct_pptp_master *ct_pptp_info;
+ const struct nf_nat_pptp *nat_pptp_info;
+ struct nf_nat_range range;
+
+ ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
+
+ /* And here goes the grand finale of corrosion... */
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ pr_debug("we are PNS->PAC\n");
+ /* therefore, build tuple for PAC->PNS */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = ct_pptp_info->pac_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ } else {
+ pr_debug("we are PAC->PNS\n");
+ /* build tuple for PNS->PAC */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = nat_pptp_info->pns_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = nat_pptp_info->pac_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ }
+
+ pr_debug("trying to unexpect other dir: ");
+ nf_ct_dump_tuple_ip(&t);
+ other_exp = nf_ct_expect_find_get(net, &t);
+ if (other_exp) {
+ nf_ct_unexpect_related(other_exp);
+ nf_ct_expect_put(other_exp);
+ pr_debug("success\n");
+ } else {
+ pr_debug("not found!\n");
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ if (exp->dir == IP_CT_DIR_REPLY) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+
+{
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_callid;
+ unsigned int cid_off;
+
+ ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ new_callid = ct_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+ * down to here */
+ pr_debug("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
+
+ /* mangle packet */
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void
+pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+{
+ const struct nf_conn *ct = expect_orig->master;
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+
+ ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ /* save original PAC call ID in nat_info */
+ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+ /* alter expectation for PNS->PAC direction */
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
+
+ /* alter expectation for PAC->PNS direction */
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ expect_reply->dir = IP_CT_DIR_REPLY;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+{
+ const struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
+
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+ new_pcid = nat_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REPLY:
+ pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+ break;
+ case PPTP_IN_CALL_CONNECT:
+ pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
+ break;
+ case PPTP_IN_CALL_REQUEST:
+ /* only need to nat in case PAC is behind NAT box */
+ return NF_ACCEPT;
+ case PPTP_WAN_ERROR_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
+ break;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
+ break;
+ case PPTP_SET_LINK_INFO:
+ pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
+ break;
+ default:
+ pr_debug("unknown inbound packet %s\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+ * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+ /* mangle packet */
+ pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
+
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static int __init nf_nat_helper_pptp_init(void)
+{
+ nf_nat_need_gre();
+
+ BUG_ON(nf_nat_pptp_hook_outbound != NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_inbound != NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+
+ BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ return 0;
+}
+
+static void __exit nf_nat_helper_pptp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL);
+ synchronize_rcu();
+}
+
+module_init(nf_nat_helper_pptp_init);
+module_exit(nf_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
new file mode 100644
index 0000000..6c4f11f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,124 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.all;
+ else
+ port = tuple->dst.u.all;
+
+ return ntohs(port) >= ntohs(min->all) &&
+ ntohs(port) <= ntohs(max->all);
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
+
+bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct,
+ u_int16_t *rover)
+{
+ unsigned int range_size, min, i;
+ __be16 *portptr;
+ u_int16_t off;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.all;
+ else
+ portptr = &tuple->dst.u.all;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return false;
+
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr) < 512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.all);
+ range_size = ntohs(range->max.all) - min + 1;
+ }
+
+ if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+ off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
+ maniptype == IP_NAT_MANIP_SRC
+ ? tuple->dst.u.all
+ : tuple->src.u.all);
+ else
+ off = *rover;
+
+ for (i = 0; i < range_size; i++, off++) {
+ *portptr = htons(min + off % range_size);
+ if (nf_nat_used_tuple(tuple, ct))
+ continue;
+ if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+ *rover = off;
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
+ const struct nf_nat_range *range)
+{
+ NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
+ NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
+
+int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_PROTONAT_PORT_MIN]) {
+ range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+ range->max.all = range->min.tcp.port;
+ range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ if (tb[CTA_PROTONAT_PORT_MAX]) {
+ range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+ range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
+#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 0000000..22485ce
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,108 @@
+/*
+ * DCCP NAT protocol helper
+ *
+ * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/dccp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t dccp_port_rover;
+
+static bool
+dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &dccp_port_rover);
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (const void *)(skb->data + iphdroff);
+ struct dccp_hdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl * 4;
+ __be32 oldip, newip;
+ __be16 *portptr, oldport, newport;
+ int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+ if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+ hdrsize = sizeof(struct dccp_hdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.dccp.port;
+ portptr = &hdr->dccph_sport;
+ } else {
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.dccp.port;
+ portptr = &hdr->dccph_dport;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+ 0);
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_dccp = {
+ .protonum = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ .manip_pkt = dccp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = dccp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_dccp_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_dccp);
+}
+
+static void __exit nf_nat_proto_dccp_fini(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
+}
+
+module_init(nf_nat_proto_dccp_init);
+module_exit(nf_nat_proto_dccp_fini);
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("DCCP NAT protocol helper");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
new file mode 100644
index 0000000..d7e8920
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -0,0 +1,149 @@
+/*
+ * nf_nat_proto_gre.c
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+/* generate unique tuple ... */
+static bool
+gre_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t key;
+ __be16 *keyptr;
+ unsigned int min, i, range_size;
+
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!ct->master)
+ return false;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ pr_debug("%p: NATing GRE PPTP\n", ct);
+ min = 1;
+ range_size = 0xffff;
+ } else {
+ min = ntohs(range->min.gre.key);
+ range_size = ntohs(range->max.gre.key) - min + 1;
+ }
+
+ pr_debug("min = %u, range_size = %u\n", min, range_size);
+
+ for (i = 0; i < range_size; i++, key++) {
+ *keyptr = htons(min + key % range_size);
+ if (!nf_nat_used_tuple(tuple, ct))
+ return true;
+ }
+
+ pr_debug("%p: no NAT mapping\n", ct);
+ return false;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct gre_hdr *greh;
+ struct gre_hdr_pptp *pgreh;
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ unsigned int hdroff = iphdroff + iph->ihl * 4;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ return false;
+
+ greh = (void *)skb->data + hdroff;
+ pgreh = (struct gre_hdr_pptp *)greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype != IP_NAT_MANIP_DST)
+ return true;
+ switch (greh->version) {
+ case GRE_VERSION_1701:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
+ break;
+ case GRE_VERSION_PPTP:
+ pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
+ break;
+ default:
+ pr_debug("can't nat unknown GRE version\n");
+ return false;
+ }
+ return true;
+}
+
+static const struct nf_nat_protocol gre = {
+ .protonum = IPPROTO_GRE,
+ .me = THIS_MODULE,
+ .manip_pkt = gre_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = gre_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_gre_init(void)
+{
+ return nf_nat_protocol_register(&gre);
+}
+
+static void __exit nf_nat_proto_gre_fini(void)
+{
+ nf_nat_protocol_unregister(&gre);
+}
+
+module_init(nf_nat_proto_gre_init);
+module_exit(nf_nat_proto_gre_fini);
+
+void nf_nat_need_gre(void)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
new file mode 100644
index 0000000..19a8b0b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -0,0 +1,84 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static bool
+icmp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static bool
+icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xFFFF;
+
+ for (i = 0; i < range_size; i++, id++) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+ (id % range_size));
+ if (!nf_nat_used_tuple(tuple, ct))
+ return true;
+ }
+ return false;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct icmphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ inet_proto_csum_replace2(&hdr->checksum, skb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return true;
+}
+
+const struct nf_nat_protocol nf_nat_protocol_icmp = {
+ .protonum = IPPROTO_ICMP,
+ .me = THIS_MODULE,
+ .manip_pkt = icmp_manip_pkt,
+ .in_range = icmp_in_range,
+ .unique_tuple = icmp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 0000000..65e470b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t nf_sctp_port_rover;
+
+static bool
+sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &nf_sctp_port_rover);
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ sctp_sctphdr_t *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be32 crc32;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct sctphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ hdr->source = tuple->src.u.sctp.port;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ hdr->dest = tuple->dst.u.sctp.port;
+ }
+
+ crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
+ for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+ crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
+ crc32);
+ crc32 = sctp_end_cksum(crc32);
+ hdr->checksum = crc32;
+
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_sctp = {
+ .protonum = IPPROTO_SCTP,
+ .me = THIS_MODULE,
+ .manip_pkt = sctp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = sctp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_sctp_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_sctp);
+}
+
+static void __exit nf_nat_proto_sctp_exit(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
+}
+
+module_init(nf_nat_proto_sctp_init);
+module_exit(nf_nat_proto_sctp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 0000000..399e2cf
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,93 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+
+static u_int16_t tcp_port_rover;
+
+static bool
+tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &tcp_port_rover);
+}
+
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct tcphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport, oldport;
+ int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+ /* this could be a inner header returned in icmp packet; in such
+ cases we cannot update the checksum field since it is outside of
+ the 8 bytes of transport layer headers we are guaranteed */
+ if (skb->len >= hdroff + sizeof(struct tcphdr))
+ hdrsize = sizeof(struct tcphdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct tcphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.tcp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.tcp.port;
+ portptr = &hdr->dest;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+ return true;
+}
+
+const struct nf_nat_protocol nf_nat_protocol_tcp = {
+ .protonum = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ .manip_pkt = tcp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = tcp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 0000000..9e61c79
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,84 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t udp_port_rover;
+
+static bool
+udp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &udp_port_rover);
+}
+
+static bool
+udp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct udphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct udphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+ if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+ 0);
+ if (!hdr->check)
+ hdr->check = CSUM_MANGLED_0;
+ }
+ *portptr = newport;
+ return true;
+}
+
+const struct nf_nat_protocol nf_nat_protocol_udp = {
+ .protonum = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ .manip_pkt = udp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = udp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 0000000..440a229
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,99 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t udplite_port_rover;
+
+static bool
+udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &udplite_port_rover);
+}
+
+static bool
+udplite_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct udphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct udphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+
+ inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+ if (!hdr->check)
+ hdr->check = CSUM_MANGLED_0;
+
+ *portptr = newport;
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_udplite = {
+ .protonum = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ .manip_pkt = udplite_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = udplite_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_udplite_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_udplite);
+}
+
+static void __exit nf_nat_proto_udplite_fini(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
+}
+
+module_init(nf_nat_proto_udplite_init);
+module_exit(nf_nat_proto_udplite_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 0000000..14381c6
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,53 @@
+/* The "unknown" protocol. This is what is used for protocols we
+ * don't understand. It's returned by ip_ct_find_proto().
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type manip_type,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return true;
+}
+
+static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ /* Sorry: we can't help you; if it's not unique, we can't frob
+ anything. */
+ return false;
+}
+
+static bool
+unknown_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ return true;
+}
+
+const struct nf_nat_protocol nf_nat_unknown_protocol = {
+ /* .me isn't set: getting a ref to this cannot fail. */
+ .manip_pkt = unknown_manip_pkt,
+ .in_range = unknown_in_range,
+ .unique_tuple = unknown_unique_tuple,
+};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
new file mode 100644
index 0000000..8d489e7
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -0,0 +1,262 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Everything about the rules for NAT. */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/bitops.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+
+#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+ (1 << NF_INET_POST_ROUTING) | \
+ (1 << NF_INET_LOCAL_OUT))
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} nat_initial_table __net_initdata = {
+ .repl = {
+ .name = "nat",
+ .valid_hooks = NAT_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
+ },
+ .underflow = {
+ [NF_INET_PRE_ROUTING] = 0,
+ [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table nat_table = {
+ .name = "nat",
+ .valid_hooks = NAT_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* Source NAT */
+static unsigned int
+ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ NF_CT_ASSERT(par->out != NULL);
+
+ return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+}
+
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
+{
+ static int warned = 0;
+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+ struct rtable *rt;
+
+ if (ip_route_output_key(net, &rt, &fl) != 0)
+ return;
+
+ if (rt->rt_src != srcip && !warned) {
+ printk("NAT: no longer support implicit source local NAT\n");
+ printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+ NIPQUAD(srcip), NIPQUAD(dstip));
+ warned = 1;
+ }
+ ip_rt_put(rt);
+}
+
+static unsigned int
+ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT);
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ if (par->hooknum == NF_INET_LOCAL_OUT &&
+ mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr,
+ mr->range[0].min_ip);
+
+ return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+}
+
+static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("SNAT: multiple ranges no longer supported\n");
+ return false;
+ }
+ return true;
+}
+
+static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_multi_range_compat *mr = par->targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("DNAT: multiple ranges no longer supported\n");
+ return false;
+ }
+ return true;
+}
+
+unsigned int
+alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ Use reply in case it's already been mangled (eg local packet).
+ */
+ __be32 ip
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
+ : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+ struct nf_nat_range range
+ = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
+
+ pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
+ ct, NIPQUAD(ip));
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+int nf_nat_rule_find(struct sk_buff *skb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ int ret;
+
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ /* NUL mapping */
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static struct xt_target ipt_snat_reg __read_mostly = {
+ .name = "SNAT",
+ .target = ipt_snat_target,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = ipt_snat_checkentry,
+ .family = AF_INET,
+};
+
+static struct xt_target ipt_dnat_reg __read_mostly = {
+ .name = "DNAT",
+ .target = ipt_dnat_target,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
+ .checkentry = ipt_dnat_checkentry,
+ .family = AF_INET,
+};
+
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+ net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+ &nat_initial_table.repl);
+ if (IS_ERR(net->ipv4.nat_table))
+ return PTR_ERR(net->ipv4.nat_table);
+ return 0;
+}
+
+static void __net_exit nf_nat_rule_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.nat_table);
+}
+
+static struct pernet_operations nf_nat_rule_net_ops = {
+ .init = nf_nat_rule_net_init,
+ .exit = nf_nat_rule_net_exit,
+};
+
+int __init nf_nat_rule_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_nat_rule_net_ops);
+ if (ret != 0)
+ goto out;
+ ret = xt_register_target(&ipt_snat_reg);
+ if (ret != 0)
+ goto unregister_table;
+
+ ret = xt_register_target(&ipt_dnat_reg);
+ if (ret != 0)
+ goto unregister_snat;
+
+ return ret;
+
+ unregister_snat:
+ xt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+ unregister_pernet_subsys(&nf_nat_rule_net_ops);
+ out:
+ return ret;
+}
+
+void nf_nat_rule_cleanup(void)
+{
+ xt_unregister_target(&ipt_dnat_reg);
+ xt_unregister_target(&ipt_snat_reg);
+ unregister_pernet_subsys(&nf_nat_rule_net_ops);
+}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
new file mode 100644
index 0000000..1454432
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -0,0 +1,502 @@
+/* SIP extension for UDP NAT alteration.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_nat_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP NAT helper");
+MODULE_ALIAS("ip_nat_sip");
+
+
+static unsigned int mangle_packet(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int matchoff, unsigned int matchlen,
+ const char *buffer, unsigned int buflen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
+ buffer, buflen))
+ return 0;
+
+ /* Reload data pointer and adjust datalen value */
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ *datalen += buflen - matchlen;
+ return 1;
+}
+
+static int map_addr(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int matchoff, unsigned int matchlen,
+ union nf_inet_addr *addr, __be16 port)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned int buflen;
+ __be32 newaddr;
+ __be16 newport;
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
+ ct->tuplehash[dir].tuple.src.u.udp.port == port) {
+ newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
+ } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
+ ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+ newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+ newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+ } else
+ return 1;
+
+ if (newaddr == addr->ip && newport == port)
+ return 1;
+
+ buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
+ NIPQUAD(newaddr), ntohs(newport));
+
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen);
+}
+
+static int map_sip_addr(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ enum sip_header_types type)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchlen, matchoff;
+ union nf_inet_addr addr;
+ __be16 port;
+
+ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
+ &matchoff, &matchlen, &addr, &port) <= 0)
+ return 1;
+ return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
+}
+
+static unsigned int ip_nat_sip(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned int dataoff, matchoff, matchlen;
+ union nf_inet_addr addr;
+ __be16 port;
+ int request, in_header;
+
+ /* Basic rules: requests and responses. */
+ if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
+ if (ct_sip_parse_request(ct, *dptr, *datalen,
+ &matchoff, &matchlen,
+ &addr, &port) > 0 &&
+ !map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
+ return NF_DROP;
+ request = 1;
+ } else
+ request = 0;
+
+ /* Translate topmost Via header and parameters */
+ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+ SIP_HDR_VIA, NULL, &matchoff, &matchlen,
+ &addr, &port) > 0) {
+ unsigned int matchend, poff, plen, buflen, n;
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+
+ /* We're only interested in headers related to this
+ * connection */
+ if (request) {
+ if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
+ port != ct->tuplehash[dir].tuple.src.u.udp.port)
+ goto next;
+ } else {
+ if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
+ port != ct->tuplehash[dir].tuple.dst.u.udp.port)
+ goto next;
+ }
+
+ if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
+ return NF_DROP;
+
+ matchend = matchoff + matchlen;
+
+ /* The maddr= parameter (RFC 2361) specifies where to send
+ * the reply. */
+ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+ "maddr=", &poff, &plen,
+ &addr) > 0 &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
+ __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
+
+ /* The received= parameter (RFC 2361) contains the address
+ * from which the server received the request. */
+ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+ "received=", &poff, &plen,
+ &addr) > 0 &&
+ addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
+ __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip;
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
+
+ /* The rport= parameter (RFC 3581) contains the port number
+ * from which the server received the request. */
+ if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
+ "rport=", &poff, &plen,
+ &n) > 0 &&
+ htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
+ htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
+ __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
+ buflen = sprintf(buffer, "%u", ntohs(p));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
+ }
+
+next:
+ /* Translate Contact headers */
+ dataoff = 0;
+ in_header = 0;
+ while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+ SIP_HDR_CONTACT, &in_header,
+ &matchoff, &matchlen,
+ &addr, &port) > 0) {
+ if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
+ return NF_DROP;
+ }
+
+ if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
+ !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+/* Handles expected signalling connections and media streams */
+static void ip_nat_sip_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip = exp->saved_ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+
+ /* Change src to where master sends to, but only if the connection
+ * actually came from the same source. */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
+ ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ }
+}
+
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ struct nf_conntrack_expect *exp,
+ unsigned int matchoff,
+ unsigned int matchlen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ __be32 newip;
+ u_int16_t port;
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned buflen;
+
+ /* Connection will come from reply */
+ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
+ newip = exp->tuple.dst.u3.ip;
+ else
+ newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+
+ /* If the signalling port matches the connection's source port in the
+ * original direction, try to use the destination port in the opposite
+ * direction. */
+ if (exp->tuple.dst.u.udp.port ==
+ ct->tuplehash[dir].tuple.src.u.udp.port)
+ port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+ else
+ port = ntohs(exp->tuple.dst.u.udp.port);
+
+ exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->tuple.dst.u3.ip = newip;
+ exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+ exp->dir = !dir;
+ exp->expectfn = ip_nat_sip_expected;
+
+ for (; port != 0; port++) {
+ exp->tuple.dst.u.udp.port = htons(port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ if (exp->tuple.dst.u3.ip != exp->saved_ip ||
+ exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
+ buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
+ NIPQUAD(newip), port);
+ if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen))
+ goto err;
+ }
+ return NF_ACCEPT;
+
+err:
+ nf_ct_unexpect_related(exp);
+ return NF_DROP;
+}
+
+static int mangle_content_len(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchoff, matchlen;
+ char buffer[sizeof("65536")];
+ int buflen, c_len;
+
+ /* Get actual SDP length */
+ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+ SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+ &matchoff, &matchlen) <= 0)
+ return 0;
+ c_len = *datalen - matchoff + strlen("v=");
+
+ /* Now, update SDP length */
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
+ &matchoff, &matchlen) <= 0)
+ return 0;
+
+ buflen = sprintf(buffer, "%u", c_len);
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen);
+}
+
+static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff, unsigned int *datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ char *buffer, int buflen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchlen, matchoff;
+
+ if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
+ &matchoff, &matchlen) <= 0)
+ return -ENOENT;
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen) ? 0 : -EINVAL;
+}
+
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ const union nf_inet_addr *addr)
+{
+ char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ unsigned int buflen;
+
+ buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip));
+ if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
+ buffer, buflen))
+ return 0;
+
+ return mangle_content_len(skb, dptr, datalen);
+}
+
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ u_int16_t port)
+{
+ char buffer[sizeof("nnnnn")];
+ unsigned int buflen;
+
+ buflen = sprintf(buffer, "%u", port);
+ if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen))
+ return 0;
+
+ return mangle_content_len(skb, dptr, datalen);
+}
+
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ const union nf_inet_addr *addr)
+{
+ char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ unsigned int buflen;
+
+ /* Mangle session description owner and contact addresses */
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip));
+ if (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+ SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
+ buffer, buflen))
+ return 0;
+
+ switch (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+ SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
+ buffer, buflen)) {
+ case 0:
+ /*
+ * RFC 2327:
+ *
+ * Session description
+ *
+ * c=* (connection information - not required if included in all media)
+ */
+ case -ENOENT:
+ break;
+ default:
+ return 0;
+ }
+
+ return mangle_content_len(skb, dptr, datalen);
+}
+
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp,
+ unsigned int mediaoff,
+ unsigned int medialen,
+ union nf_inet_addr *rtp_addr)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ u_int16_t port;
+
+ /* Connection will come from reply */
+ if (ct->tuplehash[dir].tuple.src.u3.ip ==
+ ct->tuplehash[!dir].tuple.dst.u3.ip)
+ rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
+ else
+ rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+
+ rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
+ rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->dir = !dir;
+ rtp_exp->expectfn = ip_nat_sip_expected;
+
+ rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
+ rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->dir = !dir;
+ rtcp_exp->expectfn = ip_nat_sip_expected;
+
+ /* Try to get same pair of ports: if not, try to change them. */
+ for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ port != 0; port += 2) {
+ rtp_exp->tuple.dst.u.udp.port = htons(port);
+ if (nf_ct_expect_related(rtp_exp) != 0)
+ continue;
+ rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
+ if (nf_ct_expect_related(rtcp_exp) == 0)
+ break;
+ nf_ct_unexpect_related(rtp_exp);
+ }
+
+ if (port == 0)
+ goto err1;
+
+ /* Update media port. */
+ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
+ !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
+ goto err2;
+
+ return NF_ACCEPT;
+
+err2:
+ nf_ct_unexpect_related(rtp_exp);
+ nf_ct_unexpect_related(rtcp_exp);
+err1:
+ return NF_DROP;
+}
+
+static void __exit nf_nat_sip_fini(void)
+{
+ rcu_assign_pointer(nf_nat_sip_hook, NULL);
+ rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_sip_init(void)
+{
+ BUG_ON(nf_nat_sip_hook != NULL);
+ BUG_ON(nf_nat_sip_expect_hook != NULL);
+ BUG_ON(nf_nat_sdp_addr_hook != NULL);
+ BUG_ON(nf_nat_sdp_port_hook != NULL);
+ BUG_ON(nf_nat_sdp_session_hook != NULL);
+ BUG_ON(nf_nat_sdp_media_hook != NULL);
+ rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
+ rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
+ rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
+ rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
+ rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
+ rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+ return 0;
+}
+
+module_init(nf_nat_sip_init);
+module_exit(nf_nat_sip_fini);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
new file mode 100644
index 0000000..8303e4b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -0,0 +1,1340 @@
+/*
+ * nf_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+#define NOCT1(n) (*(u8 *)(n))
+
+static int debug;
+static DEFINE_SPINLOCK(snmp_lock);
+
+/*
+ * Application layer address mapping mimics the NAT mapping, but
+ * only for the first octet in this case (a more flexible system
+ * can be implemented if needed).
+ */
+struct oct1_map
+{
+ u_int8_t from;
+ u_int8_t to;
+};
+
+
+/*****************************************************************************
+ *
+ * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* Class */
+#define ASN1_UNI 0 /* Universal */
+#define ASN1_APL 1 /* Application */
+#define ASN1_CTX 2 /* Context */
+#define ASN1_PRV 3 /* Private */
+
+/* Tag */
+#define ASN1_EOC 0 /* End Of Contents */
+#define ASN1_BOL 1 /* Boolean */
+#define ASN1_INT 2 /* Integer */
+#define ASN1_BTS 3 /* Bit String */
+#define ASN1_OTS 4 /* Octet String */
+#define ASN1_NUL 5 /* Null */
+#define ASN1_OJI 6 /* Object Identifier */
+#define ASN1_OJD 7 /* Object Description */
+#define ASN1_EXT 8 /* External */
+#define ASN1_SEQ 16 /* Sequence */
+#define ASN1_SET 17 /* Set */
+#define ASN1_NUMSTR 18 /* Numerical String */
+#define ASN1_PRNSTR 19 /* Printable String */
+#define ASN1_TEXSTR 20 /* Teletext String */
+#define ASN1_VIDSTR 21 /* Video String */
+#define ASN1_IA5STR 22 /* IA5 String */
+#define ASN1_UNITIM 23 /* Universal Time */
+#define ASN1_GENTIM 24 /* General Time */
+#define ASN1_GRASTR 25 /* Graphical String */
+#define ASN1_VISSTR 26 /* Visible String */
+#define ASN1_GENSTR 27 /* General String */
+
+/* Primitive / Constructed methods*/
+#define ASN1_PRI 0 /* Primitive */
+#define ASN1_CON 1 /* Constructed */
+
+/*
+ * Error codes.
+ */
+#define ASN1_ERR_NOERROR 0
+#define ASN1_ERR_DEC_EMPTY 2
+#define ASN1_ERR_DEC_EOC_MISMATCH 3
+#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
+#define ASN1_ERR_DEC_BADVALUE 5
+
+/*
+ * ASN.1 context.
+ */
+struct asn1_ctx
+{
+ int error; /* Error condition */
+ unsigned char *pointer; /* Octet just to be decoded */
+ unsigned char *begin; /* First octet */
+ unsigned char *end; /* Octet after last octet */
+};
+
+/*
+ * Octet string (not null terminated)
+ */
+struct asn1_octstr
+{
+ unsigned char *data;
+ unsigned int len;
+};
+
+static void asn1_open(struct asn1_ctx *ctx,
+ unsigned char *buf,
+ unsigned int len)
+{
+ ctx->begin = buf;
+ ctx->end = buf + len;
+ ctx->pointer = buf;
+ ctx->error = ASN1_ERR_NOERROR;
+}
+
+static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
+{
+ if (ctx->pointer >= ctx->end) {
+ ctx->error = ASN1_ERR_DEC_EMPTY;
+ return 0;
+ }
+ *ch = *(ctx->pointer)++;
+ return 1;
+}
+
+static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
+{
+ unsigned char ch;
+
+ *tag = 0;
+
+ do
+ {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *tag <<= 7;
+ *tag |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned char ch;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *cls = (ch & 0xC0) >> 6;
+ *con = (ch & 0x20) >> 5;
+ *tag = (ch & 0x1F);
+
+ if (*tag == 0x1F) {
+ if (!asn1_tag_decode(ctx, tag))
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
+ unsigned int *def,
+ unsigned int *len)
+{
+ unsigned char ch, cnt;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch == 0x80)
+ *def = 0;
+ else {
+ *def = 1;
+
+ if (ch < 0x80)
+ *len = ch;
+ else {
+ cnt = ch & 0x7F;
+ *len = 0;
+
+ while (cnt > 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *len <<= 8;
+ *len |= ch;
+ cnt--;
+ }
+ }
+ }
+
+ /* don't trust len bigger than ctx buffer */
+ if (*len > ctx->end - ctx->pointer)
+ return 0;
+
+ return 1;
+}
+
+static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
+ unsigned char **eoc,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned int def, len;
+
+ if (!asn1_id_decode(ctx, cls, con, tag))
+ return 0;
+
+ def = len = 0;
+ if (!asn1_length_decode(ctx, &def, &len))
+ return 0;
+
+ /* primitive shall be definite, indefinite shall be constructed */
+ if (*con == ASN1_PRI && !def)
+ return 0;
+
+ if (def)
+ *eoc = ctx->pointer + len;
+ else
+ *eoc = NULL;
+ return 1;
+}
+
+static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ unsigned char ch;
+
+ if (eoc == NULL) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+ return 1;
+ } else {
+ if (ctx->pointer != eoc) {
+ ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
+ return 0;
+ }
+ return 1;
+ }
+}
+
+static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ ctx->pointer = eoc;
+ return 1;
+}
+
+static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = (signed char) ch;
+ len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned int *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned int)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned char **octets,
+ unsigned int *len)
+{
+ unsigned char *ptr;
+
+ *len = 0;
+
+ *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
+ if (*octets == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ ptr = *octets;
+ while (ctx->pointer < eoc) {
+ if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ kfree(*octets);
+ *octets = NULL;
+ return 0;
+ }
+ (*len)++;
+ }
+ return 1;
+}
+
+static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
+ unsigned long *subid)
+{
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long **oid,
+ unsigned int *len)
+{
+ unsigned long subid;
+ unsigned long *optr;
+ size_t size;
+
+ size = eoc - ctx->pointer + 1;
+
+ /* first subid actually encodes first two subids */
+ if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+ return 0;
+
+ *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+ if (*oid == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(ctx, &subid)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (subid < 40) {
+ optr [0] = 0;
+ optr [1] = subid;
+ } else if (subid < 80) {
+ optr [0] = 1;
+ optr [1] = subid - 40;
+ } else {
+ optr [0] = 2;
+ optr [1] = subid - 80;
+ }
+
+ *len = 2;
+ optr += 2;
+
+ while (ctx->pointer < eoc) {
+ if (++(*len) > size) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (!asn1_subid_decode(ctx, optr++)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * SNMP decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* SNMP Versions */
+#define SNMP_V1 0
+#define SNMP_V2C 1
+#define SNMP_V2 2
+#define SNMP_V3 3
+
+/* Default Sizes */
+#define SNMP_SIZE_COMM 256
+#define SNMP_SIZE_OBJECTID 128
+#define SNMP_SIZE_BUFCHR 256
+#define SNMP_SIZE_BUFINT 128
+#define SNMP_SIZE_SMALLOBJECTID 16
+
+/* Requests */
+#define SNMP_PDU_GET 0
+#define SNMP_PDU_NEXT 1
+#define SNMP_PDU_RESPONSE 2
+#define SNMP_PDU_SET 3
+#define SNMP_PDU_TRAP1 4
+#define SNMP_PDU_BULK 5
+#define SNMP_PDU_INFORM 6
+#define SNMP_PDU_TRAP2 7
+
+/* Errors */
+#define SNMP_NOERROR 0
+#define SNMP_TOOBIG 1
+#define SNMP_NOSUCHNAME 2
+#define SNMP_BADVALUE 3
+#define SNMP_READONLY 4
+#define SNMP_GENERROR 5
+#define SNMP_NOACCESS 6
+#define SNMP_WRONGTYPE 7
+#define SNMP_WRONGLENGTH 8
+#define SNMP_WRONGENCODING 9
+#define SNMP_WRONGVALUE 10
+#define SNMP_NOCREATION 11
+#define SNMP_INCONSISTENTVALUE 12
+#define SNMP_RESOURCEUNAVAILABLE 13
+#define SNMP_COMMITFAILED 14
+#define SNMP_UNDOFAILED 15
+#define SNMP_AUTHORIZATIONERROR 16
+#define SNMP_NOTWRITABLE 17
+#define SNMP_INCONSISTENTNAME 18
+
+/* General SNMP V1 Traps */
+#define SNMP_TRAP_COLDSTART 0
+#define SNMP_TRAP_WARMSTART 1
+#define SNMP_TRAP_LINKDOWN 2
+#define SNMP_TRAP_LINKUP 3
+#define SNMP_TRAP_AUTFAILURE 4
+#define SNMP_TRAP_EQPNEIGHBORLOSS 5
+#define SNMP_TRAP_ENTSPECIFIC 6
+
+/* SNMPv1 Types */
+#define SNMP_NULL 0
+#define SNMP_INTEGER 1 /* l */
+#define SNMP_OCTETSTR 2 /* c */
+#define SNMP_DISPLAYSTR 2 /* c */
+#define SNMP_OBJECTID 3 /* ul */
+#define SNMP_IPADDR 4 /* uc */
+#define SNMP_COUNTER 5 /* ul */
+#define SNMP_GAUGE 6 /* ul */
+#define SNMP_TIMETICKS 7 /* ul */
+#define SNMP_OPAQUE 8 /* c */
+
+/* Additional SNMPv2 Types */
+#define SNMP_UINTEGER 5 /* ul */
+#define SNMP_BITSTR 9 /* uc */
+#define SNMP_NSAP 10 /* uc */
+#define SNMP_COUNTER64 11 /* ul */
+#define SNMP_NOSUCHOBJECT 12
+#define SNMP_NOSUCHINSTANCE 13
+#define SNMP_ENDOFMIBVIEW 14
+
+union snmp_syntax
+{
+ unsigned char uc[0]; /* 8 bit unsigned */
+ char c[0]; /* 8 bit signed */
+ unsigned long ul[0]; /* 32 bit unsigned */
+ long l[0]; /* 32 bit signed */
+};
+
+struct snmp_object
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned short type;
+ unsigned int syntax_len;
+ union snmp_syntax syntax;
+};
+
+struct snmp_request
+{
+ unsigned long id;
+ unsigned int error_status;
+ unsigned int error_index;
+};
+
+struct snmp_v1_trap
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned long ip_address; /* pointer */
+ unsigned int general;
+ unsigned int specific;
+ unsigned long time;
+};
+
+/* SNMP types */
+#define SNMP_IPA 0
+#define SNMP_CNT 1
+#define SNMP_GGE 2
+#define SNMP_TIT 3
+#define SNMP_OPQ 4
+#define SNMP_C64 6
+
+/* SNMP errors */
+#define SERR_NSO 0
+#define SERR_NSI 1
+#define SERR_EOM 2
+
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check);
+struct snmp_cnv
+{
+ unsigned int class;
+ unsigned int tag;
+ int syntax;
+};
+
+static const struct snmp_cnv snmp_conv[] = {
+ {ASN1_UNI, ASN1_NUL, SNMP_NULL},
+ {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
+ {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
+ {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
+ {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
+ {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
+ {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
+ {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
+ {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
+ {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
+
+ /* SNMPv2 data types and errors */
+ {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
+ {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
+ {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
+ {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
+ {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
+ {0, 0, -1}
+};
+
+static unsigned char snmp_tag_cls2syntax(unsigned int tag,
+ unsigned int cls,
+ unsigned short *syntax)
+{
+ const struct snmp_cnv *cnv;
+
+ cnv = snmp_conv;
+
+ while (cnv->syntax != -1) {
+ if (cnv->tag == tag && cnv->class == cls) {
+ *syntax = cnv->syntax;
+ return 1;
+ }
+ cnv++;
+ }
+ return 0;
+}
+
+static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
+ struct snmp_object **obj)
+{
+ unsigned int cls, con, tag, len, idlen;
+ unsigned short type;
+ unsigned char *eoc, *end, *p;
+ unsigned long *lp, *id;
+ unsigned long ul;
+ long l;
+
+ *obj = NULL;
+ id = NULL;
+
+ if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &id, &idlen))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
+ kfree(id);
+ return 0;
+ }
+
+ if (con != ASN1_PRI) {
+ kfree(id);
+ return 0;
+ }
+
+ type = 0;
+ if (!snmp_tag_cls2syntax(tag, cls, &type)) {
+ kfree(id);
+ return 0;
+ }
+
+ l = 0;
+ switch (type) {
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(lp);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
+ }
+
+ (*obj)->syntax_len = len;
+ (*obj)->type = type;
+ (*obj)->id = id;
+ (*obj)->id_len = idlen;
+
+ if (!asn1_eoc_decode(ctx, eoc)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
+ struct snmp_request *request)
+{
+ unsigned int cls, con, tag;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_ulong_decode(ctx, end, &request->id))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_status))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_index))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Fast checksum update for possibly oddly-aligned UDP byte, from the
+ * code example in the draft.
+ */
+static void fast_csum(__sum16 *csum,
+ const unsigned char *optr,
+ const unsigned char *nptr,
+ int offset)
+{
+ unsigned char s[4];
+
+ if (offset & 1) {
+ s[0] = s[2] = 0;
+ s[1] = ~*optr;
+ s[3] = *nptr;
+ } else {
+ s[1] = s[3] = 0;
+ s[0] = ~*optr;
+ s[2] = *nptr;
+ }
+
+ *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
+}
+
+/*
+ * Mangle IP address.
+ * - begin points to the start of the snmp messgae
+ * - addr points to the start of the address
+ */
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ if (map->from == NOCT1(addr)) {
+ u_int32_t old;
+
+ if (debug)
+ memcpy(&old, addr, sizeof(old));
+
+ *addr = map->to;
+
+ /* Update UDP checksum if being used */
+ if (*check) {
+ fast_csum(check,
+ &map->from, &map->to, addr - begin);
+
+ }
+
+ if (debug)
+ printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
+ "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
+ }
+}
+
+static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
+ struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned int cls, con, tag, len;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_id_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
+ goto err_id_free;
+
+ if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
+ goto err_id_free;
+
+ /* IPv4 only */
+ if (len != 4)
+ goto err_addr_free;
+
+ mangle_address(ctx->begin, ctx->pointer - 4, map, check);
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->general))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->specific))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
+ goto err_addr_free;
+
+ if (!asn1_ulong_decode(ctx, end, &trap->time))
+ goto err_addr_free;
+
+ return 1;
+
+err_addr_free:
+ kfree((unsigned long *)trap->ip_address);
+
+err_id_free:
+ kfree(trap->id);
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Misc. routines
+ *
+ *****************************************************************************/
+
+static void hex_dump(const unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ printk("\n");
+ printk("%02x ", *(buf + i));
+ }
+ printk("\n");
+}
+
+/*
+ * Parse and mangle SNMP message according to mapping.
+ * (And this is the fucking 'basic' method).
+ */
+static int snmp_parse_mangle(unsigned char *msg,
+ u_int16_t len,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned char *eoc, *end;
+ unsigned int cls, con, tag, vers, pdutype;
+ struct asn1_ctx ctx;
+ struct asn1_octstr comm;
+ struct snmp_object **obj;
+
+ if (debug > 1)
+ hex_dump(msg, len);
+
+ asn1_open(&ctx, msg, len);
+
+ /*
+ * Start of SNMP message.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ /*
+ * Version 1 or 2 handled.
+ */
+ if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+ if (!asn1_uint_decode (&ctx, end, &vers))
+ return 0;
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+ if (vers > 1)
+ return 1;
+
+ /*
+ * Community.
+ */
+ if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
+ return 0;
+ if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
+ return 0;
+ if (debug > 1) {
+ unsigned int i;
+
+ printk(KERN_DEBUG "bsalg: community: ");
+ for (i = 0; i < comm.len; i++)
+ printk("%c", comm.data[i]);
+ printk("\n");
+ }
+ kfree(comm.data);
+
+ /*
+ * PDU type
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
+ return 0;
+ if (cls != ASN1_CTX || con != ASN1_CON)
+ return 0;
+ if (debug > 1) {
+ static const unsigned char *const pdus[] = {
+ [SNMP_PDU_GET] = "get",
+ [SNMP_PDU_NEXT] = "get-next",
+ [SNMP_PDU_RESPONSE] = "response",
+ [SNMP_PDU_SET] = "set",
+ [SNMP_PDU_TRAP1] = "trapv1",
+ [SNMP_PDU_BULK] = "bulk",
+ [SNMP_PDU_INFORM] = "inform",
+ [SNMP_PDU_TRAP2] = "trapv2"
+ };
+
+ if (pdutype > SNMP_PDU_TRAP2)
+ printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+ else
+ printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+ }
+ if (pdutype != SNMP_PDU_RESPONSE &&
+ pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
+ return 1;
+
+ /*
+ * Request header or v1 trap
+ */
+ if (pdutype == SNMP_PDU_TRAP1) {
+ struct snmp_v1_trap trap;
+ unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+
+ if (ret) {
+ kfree(trap.id);
+ kfree((unsigned long *)trap.ip_address);
+ } else
+ return ret;
+
+ } else {
+ struct snmp_request req;
+
+ if (!snmp_request_decode(&ctx, &req))
+ return 0;
+
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+ "error_index=%u\n", req.id, req.error_status,
+ req.error_index);
+ }
+
+ /*
+ * Loop through objects, look for IP addresses to mangle.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (obj == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
+ return 0;
+ }
+
+ while (!asn1_eoc_decode(&ctx, eoc)) {
+ unsigned int i;
+
+ if (!snmp_object_decode(&ctx, obj)) {
+ if (*obj) {
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+ return 0;
+ }
+
+ if (debug > 1) {
+ printk(KERN_DEBUG "bsalg: object: ");
+ for (i = 0; i < (*obj)->id_len; i++) {
+ if (i > 0)
+ printk(".");
+ printk("%lu", (*obj)->id[i]);
+ }
+ printk(": type=%u\n", (*obj)->type);
+
+ }
+
+ if ((*obj)->type == SNMP_IPADDR)
+ mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+
+ if (!asn1_eoc_decode(&ctx, eoc))
+ return 0;
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * NAT routines.
+ *
+ *****************************************************************************/
+
+/*
+ * SNMP translation routine.
+ */
+static int snmp_translate(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+ u_int16_t udplen = ntohs(udph->len);
+ u_int16_t paylen = udplen - sizeof(struct udphdr);
+ int dir = CTINFO2DIR(ctinfo);
+ struct oct1_map map;
+
+ /*
+ * Determine mappping for application layer addresses based
+ * on NAT manipulations for the packet.
+ */
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ /* SNAT traps */
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ } else {
+ /* DNAT replies */
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ }
+
+ if (map.from == map.to)
+ return NF_ACCEPT;
+
+ if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
+ paylen, &map, &udph->check)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "bsalg: parser failed\n");
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /*
+ * Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "SNMP: dropping malformed packet "
+ "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+ return NF_DROP;
+ }
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, ctinfo, skb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper snmp_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+/*****************************************************************************
+ *
+ * Module stuff.
+ *
+ *****************************************************************************/
+
+static int __init nf_nat_snmp_basic_init(void)
+{
+ int ret = 0;
+
+ ret = nf_conntrack_helper_register(&snmp_helper);
+ if (ret < 0)
+ return ret;
+ ret = nf_conntrack_helper_register(&snmp_trap_helper);
+ if (ret < 0) {
+ nf_conntrack_helper_unregister(&snmp_helper);
+ return ret;
+ }
+ return ret;
+}
+
+static void __exit nf_nat_snmp_basic_fini(void)
+{
+ nf_conntrack_helper_unregister(&snmp_helper);
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
+
+module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
new file mode 100644
index 0000000..b7dd695
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -0,0 +1,332 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/spinlock.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+ const struct nf_conn *ct;
+ const struct nf_conntrack_tuple *t;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return;
+ dir = CTINFO2DIR(ctinfo);
+ t = &ct->tuplehash[dir].tuple;
+
+ if (dir == IP_CT_DIR_ORIGINAL)
+ statusbit = IPS_DST_NAT;
+ else
+ statusbit = IPS_SRC_NAT;
+
+ if (ct->status & statusbit) {
+ fl->fl4_dst = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl->fl_ip_dport = t->dst.u.tcp.port;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl->fl4_src = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl->fl_ip_sport = t->src.u.tcp.port;
+ }
+}
+#endif
+
+static unsigned int
+nf_nat_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ and local-out, and nf_nat_out protects post-routing. */
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ have dropped it. Hence it's the user's responsibilty to
+ packet filter it out, or implement conntrack/NAT for that
+ protocol. 8) --RR */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (ct == &nf_conntrack_untracked)
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (!nat) {
+ /* NAT module was loaded late. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL) {
+ pr_debug("failed to add NAT extension\n");
+ return NF_ACCEPT;
+ }
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED+IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(ct, ctinfo,
+ hooknum, skb))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+
+ /* Seen it before? This can happen for loopback, retrans,
+ or local packets.. */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ if (hooknum == NF_INET_LOCAL_IN)
+ /* LOCAL_IN hook doesn't have a chain! */
+ ret = alloc_null_binding(ct, hooknum);
+ else
+ ret = nf_nat_rule_find(skb, hooknum, in, out,
+ ct);
+
+ if (ret != NF_ACCEPT) {
+ return ret;
+ }
+ } else
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+ }
+
+ return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+
+static unsigned int
+nf_nat_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(skb)->daddr) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_out(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip
+ || ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all
+ )
+ return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_local_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (ip_xfrm_me_harder(skb))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+/* We must be after connection tracking and before packet filtering. */
+
+static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_local_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+};
+
+static int __init nf_nat_standalone_init(void)
+{
+ int ret = 0;
+
+ need_ipv4_conntrack();
+
+#ifdef CONFIG_XFRM
+ BUG_ON(ip_nat_decode_session != NULL);
+ rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
+#endif
+ ret = nf_nat_rule_init();
+ if (ret < 0) {
+ printk("nf_nat_init: can't setup rules.\n");
+ goto cleanup_decode_session;
+ }
+ ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+ if (ret < 0) {
+ printk("nf_nat_init: can't register hooks.\n");
+ goto cleanup_rule_init;
+ }
+ return ret;
+
+ cleanup_rule_init:
+ nf_nat_rule_cleanup();
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+ rcu_assign_pointer(ip_nat_decode_session, NULL);
+ synchronize_net();
+#endif
+ return ret;
+}
+
+static void __exit nf_nat_standalone_fini(void)
+{
+ nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+ nf_nat_rule_cleanup();
+#ifdef CONFIG_XFRM
+ rcu_assign_pointer(ip_nat_decode_session, NULL);
+ synchronize_net();
+#endif
+ /* Conntrack caches are unregistered in nf_conntrack_cleanup */
+}
+
+module_init(nf_nat_standalone_init);
+module_exit(nf_nat_standalone_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
new file mode 100644
index 0000000..b096e81
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -0,0 +1,52 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_tftp");
+
+static unsigned int help(struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp)
+{
+ const struct nf_conn *ct = exp->master;
+
+ exp->saved_proto.udp.port
+ = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = nf_nat_follow_master;
+ if (nf_ct_expect_related(exp) != 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void __exit nf_nat_tftp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_tftp_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_tftp_init(void)
+{
+ BUG_ON(nf_nat_tftp_hook != NULL);
+ rcu_assign_pointer(nf_nat_tftp_hook, help);
+ return 0;
+}
+
+module_init(nf_nat_tftp_init);
+module_exit(nf_nat_tftp_fini);
OpenPOWER on IntegriCloud