From 6ede2463c8d7ea949f8e7ef35243490c415ddc2f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Oct 2005 15:04:59 -0700 Subject: [BRIDGE]: Use ether_compare Use compare_ether_addr in bridge code. Signed-off-by: Stephen Hemminger Signed-off-by: Arnaldo Carvalho de Melo --- net/bridge/br_fdb.c | 12 ++++++------ net/bridge/br_input.c | 2 +- net/bridge/br_stp_if.c | 9 +++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 24396b9..1f08a59 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -86,8 +86,8 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) struct net_bridge_port *op; list_for_each_entry(op, &br->port_list, list) { if (op != p && - !memcmp(op->dev->dev_addr, - f->addr.addr, ETH_ALEN)) { + !compare_ether_addr(op->dev->dev_addr, + f->addr.addr)) { f->dst = op; goto insert; } @@ -151,8 +151,8 @@ void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p) struct net_bridge_port *op; list_for_each_entry(op, &br->port_list, list) { if (op != p && - !memcmp(op->dev->dev_addr, - f->addr.addr, ETH_ALEN)) { + !compare_ether_addr(op->dev->dev_addr, + f->addr.addr)) { f->dst = op; goto skip_delete; } @@ -174,7 +174,7 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, struct net_bridge_fdb_entry *fdb; hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { - if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) { + if (!compare_ether_addr(fdb->addr.addr, addr)) { if (unlikely(has_expired(br, fdb))) break; return fdb; @@ -264,7 +264,7 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, struct net_bridge_fdb_entry *fdb; hlist_for_each_entry_rcu(fdb, h, head, hlist) { - if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) + if (!compare_ether_addr(fdb->addr.addr, addr)) return fdb; } return NULL; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 9a45e62..b88220a 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -128,7 +128,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) dest = eth_hdr(skb)->h_dest; } - if (!memcmp(p->br->dev->dev_addr, dest, ETH_ALEN)) + if (!compare_ether_addr(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 0da11ff..ac09b6a 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -15,6 +15,7 @@ #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -133,10 +134,10 @@ static void br_stp_change_bridge_id(struct net_bridge *br, memcpy(br->dev->dev_addr, addr, ETH_ALEN); list_for_each_entry(p, &br->port_list, list) { - if (!memcmp(p->designated_bridge.addr, oldaddr, ETH_ALEN)) + if (!compare_ether_addr(p->designated_bridge.addr, oldaddr)) memcpy(p->designated_bridge.addr, addr, ETH_ALEN); - if (!memcmp(p->designated_root.addr, oldaddr, ETH_ALEN)) + if (!compare_ether_addr(p->designated_root.addr, oldaddr)) memcpy(p->designated_root.addr, addr, ETH_ALEN); } @@ -157,12 +158,12 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || - memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) + compare_ether_addr(p->dev->dev_addr, addr) < 0) addr = p->dev->dev_addr; } - if (memcmp(br->bridge_id.addr, addr, ETH_ALEN)) + if (compare_ether_addr(br->bridge_id.addr, addr)) br_stp_change_bridge_id(br, addr); } -- cgit v1.1 From 6b7d31fcdda5938e5d3f1f8b0922cc25aa200dfc Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Wed, 26 Oct 2005 09:34:24 +0200 Subject: [NETFILTER]: Add "revision" support to arp_tables and ip6_tables Like ip_tables already has it for some time, this adds support for having multiple revisions for each match/target. We steal one byte from the name in order to accomodate a 8 bit version number. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/netfilter_arp/arp_tables.h | 20 +- include/linux/netfilter_ipv6/ip6_tables.h | 27 ++- net/ipv4/netfilter/arp_tables.c | 201 +++++++++++++------- net/ipv6/netfilter/ip6_tables.c | 298 +++++++++++++++++++----------- net/ipv6/netfilter/ip6t_MARK.c | 8 +- 5 files changed, 362 insertions(+), 192 deletions(-) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index d759a637..e98a870 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -68,7 +68,8 @@ struct arpt_entry_target u_int16_t target_size; /* Used by userspace */ - char name[ARPT_FUNCTION_MAXNAMELEN]; + char name[ARPT_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; } user; struct { u_int16_t target_size; @@ -148,7 +149,9 @@ struct arpt_entry #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) -#define ARPT_SO_GET_MAX ARPT_SO_GET_ENTRIES +/* #define ARPT_SO_GET_REVISION_MATCH (ARPT_BASE_CTL + 2)*/ +#define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) +#define ARPT_SO_GET_MAX ARPT_SO_GET_REVISION_TARGET /* CONTINUE verdict for targets */ #define ARPT_CONTINUE 0xFFFFFFFF @@ -236,6 +239,15 @@ struct arpt_get_entries struct arpt_entry entrytable[0]; }; +/* The argument to ARPT_SO_GET_REVISION_*. Returns highest revision + * kernel supports, if >= revision. */ +struct arpt_get_revision +{ + char name[ARPT_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; +}; + /* Standard return verdict, or do jump. */ #define ARPT_STANDARD_TARGET "" /* Error verdict. */ @@ -274,7 +286,9 @@ struct arpt_target { struct list_head list; - const char name[ARPT_FUNCTION_MAXNAMELEN]; + const char name[ARPT_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; /* Returns verdict. */ unsigned int (*target)(struct sk_buff **pskb, diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 59f70b3..2efc046 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -57,7 +57,8 @@ struct ip6t_entry_match u_int16_t match_size; /* Used by userspace */ - char name[IP6T_FUNCTION_MAXNAMELEN]; + char name[IP6T_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; } user; struct { u_int16_t match_size; @@ -80,7 +81,8 @@ struct ip6t_entry_target u_int16_t target_size; /* Used by userspace */ - char name[IP6T_FUNCTION_MAXNAMELEN]; + char name[IP6T_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; } user; struct { u_int16_t target_size; @@ -161,7 +163,9 @@ struct ip6t_entry #define IP6T_SO_GET_INFO (IP6T_BASE_CTL) #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) -#define IP6T_SO_GET_MAX IP6T_SO_GET_ENTRIES +#define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 2) +#define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 3) +#define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET /* CONTINUE verdict for targets */ #define IP6T_CONTINUE 0xFFFFFFFF @@ -291,6 +295,15 @@ struct ip6t_get_entries struct ip6t_entry entrytable[0]; }; +/* The argument to IP6T_SO_GET_REVISION_*. Returns highest revision + * kernel supports, if >= revision. */ +struct ip6t_get_revision +{ + char name[IP6T_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; +}; + /* Standard return verdict, or do jump. */ #define IP6T_STANDARD_TARGET "" /* Error verdict. */ @@ -352,7 +365,9 @@ struct ip6t_match { struct list_head list; - const char name[IP6T_FUNCTION_MAXNAMELEN]; + const char name[IP6T_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ @@ -387,7 +402,9 @@ struct ip6t_target { struct list_head list; - const char name[IP6T_FUNCTION_MAXNAMELEN]; + const char name[IP6T_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a796928..3c2e963 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -347,58 +347,106 @@ unsigned int arpt_do_table(struct sk_buff **pskb, return verdict; } -static inline void *find_inlist_lock_noload(struct list_head *head, - const char *name, - int *error, - struct semaphore *mutex) +/* + * These are weird, but module loading must not be done with mutex + * held (since they will register), and we have to have a single + * function to use try_then_request_module(). + */ + +/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +static inline struct arpt_table *find_table_lock(const char *name) { - void *ret; + struct arpt_table *t; - *error = down_interruptible(mutex); - if (*error != 0) - return NULL; + if (down_interruptible(&arpt_mutex) != 0) + return ERR_PTR(-EINTR); - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - up(mutex); - } - return ret; + list_for_each_entry(t, &arpt_tables, list) + if (strcmp(t->name, name) == 0 && try_module_get(t->me)) + return t; + up(&arpt_mutex); + return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else -static void * -find_inlist_lock(struct list_head *head, - const char *name, - const char *prefix, - int *error, - struct semaphore *mutex) + +/* Find target, grabs ref. Returns ERR_PTR() on error. */ +static inline struct arpt_target *find_target(const char *name, u8 revision) { - void *ret; + struct arpt_target *t; + int err = 0; - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - duprintf("find_inlist: loading `%s%s'.\n", prefix, name); - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); + if (down_interruptible(&arpt_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(t, &arpt_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision == revision) { + if (try_module_get(t->me)) { + up(&arpt_mutex); + return t; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } + up(&arpt_mutex); + return ERR_PTR(err); +} - return ret; +struct arpt_target *arpt_find_target(const char *name, u8 revision) +{ + struct arpt_target *target; + + target = try_then_request_module(find_target(name, revision), + "arpt_%s", name); + if (IS_ERR(target) || !target) + return NULL; + return target; } -#endif -static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex) +static int target_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex); + struct arpt_target *t; + int have_rev = 0; + + list_for_each_entry(t, &arpt_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision > *bestp) + *bestp = t->revision; + if (t->revision == revision) + have_rev =1; + } + } + return have_rev; } -static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex) +/* Returns true or false (if no such extension at all) */ +static inline int find_revision(const char *name, u8 revision, + int (*revfn)(const char *, u8, int *), + int *err) { - return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex); + int have_rev, best = -1; + + if (down_interruptible(&arpt_mutex) != 0) { + *err = -EINTR; + return 1; + } + have_rev = revfn(name, revision, &best); + up(&arpt_mutex); + + /* Nothing at all? Return 0 to try loading module. */ + if (best == -1) { + *err = -ENOENT; + return 0; + } + + *err = best; + if (!have_rev) + *err = -EPROTONOSUPPORT; + return 1; } + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct arpt_arp *arp) { @@ -544,17 +592,15 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i } t = arpt_get_target(e); - target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex); - if (!target) { + target = try_then_request_module(find_target(t->u.user.name, + t->u.user.revision), + "arpt_%s", t->u.user.name); + if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); + ret = target ? PTR_ERR(target) : -ENOENT; goto out; } - if (!try_module_get((target->me))) { - ret = -ENOENT; - goto out_unlock; - } t->u.kernel.target = target; - up(&arpt_mutex); if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { @@ -576,8 +622,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i (*i)++; return 0; -out_unlock: - up(&arpt_mutex); out: return ret; } @@ -846,8 +890,8 @@ static int get_entries(const struct arpt_get_entries *entries, int ret; struct arpt_table *t; - t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex); - if (t) { + t = find_table_lock(entries->name); + if (t || !IS_ERR(t)) { duprintf("t->private->number = %u\n", t->private->number); if (entries->size == t->private->size) @@ -859,10 +903,10 @@ static int get_entries(const struct arpt_get_entries *entries, entries->size); ret = -EINVAL; } + module_put(t->me); up(&arpt_mutex); } else - duprintf("get_entries: Can't find %s!\n", - entries->name); + ret = t ? PTR_ERR(t) : -ENOENT; return ret; } @@ -913,22 +957,19 @@ static int do_replace(void __user *user, unsigned int len) duprintf("arp_tables: Translated table\n"); - t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex); - if (!t) + t = try_then_request_module(find_table_lock(tmp.name), + "arptable_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; + } /* You lied! */ if (tmp.valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", tmp.valid_hooks, t->valid_hooks); ret = -EINVAL; - goto free_newinfo_counters_untrans_unlock; - } - - /* Get a reference in advance, we're not allowed fail later */ - if (!try_module_get(t->me)) { - ret = -EBUSY; - goto free_newinfo_counters_untrans_unlock; + goto put_module; } oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); @@ -959,7 +1000,6 @@ static int do_replace(void __user *user, unsigned int len) put_module: module_put(t->me); - free_newinfo_counters_untrans_unlock: up(&arpt_mutex); free_newinfo_counters_untrans: ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); @@ -989,7 +1029,7 @@ static int do_add_counters(void __user *user, unsigned int len) unsigned int i; struct arpt_counters_info tmp, *paddc; struct arpt_table *t; - int ret; + int ret = 0; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1006,9 +1046,11 @@ static int do_add_counters(void __user *user, unsigned int len) goto free; } - t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex); - if (!t) + t = find_table_lock(tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free; + } write_lock_bh(&t->lock); if (t->private->number != paddc->num_counters) { @@ -1025,6 +1067,7 @@ static int do_add_counters(void __user *user, unsigned int len) unlock_up_free: write_unlock_bh(&t->lock); up(&arpt_mutex); + module_put(t->me); free: vfree(paddc); @@ -1079,8 +1122,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; - t = arpt_find_table_lock(name, &ret, &arpt_mutex); - if (t) { + + t = try_then_request_module(find_table_lock(name), + "arptable_%s", name); + if (t && !IS_ERR(t)) { struct arpt_getinfo info; info.valid_hooks = t->valid_hooks; @@ -1096,9 +1141,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; else ret = 0; - up(&arpt_mutex); - } + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; } break; @@ -1119,6 +1165,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } + case ARPT_SO_GET_REVISION_TARGET: { + struct arpt_get_revision rev; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + + try_then_request_module(find_revision(rev.name, rev.revision, + target_revfn, &ret), + "arpt_%s", rev.name); + break; + } + default: duprintf("do_arpt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; @@ -1136,12 +1200,9 @@ int arpt_register_target(struct arpt_target *target) if (ret != 0) return ret; - if (!list_named_insert(&arpt_target, target)) { - duprintf("arpt_register_target: `%s' already in list!\n", - target->name); - ret = -EINVAL; - } + list_add(&target->list, &arpt_target); up(&arpt_mutex); + return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 21deec2..7d49222 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2,7 +2,7 @@ * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling - * Copyright (C) 2000-2002 Netfilter core team + * Copyright (C) 2000-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -80,13 +79,12 @@ static DECLARE_MUTEX(ip6t_mutex); #define inline #endif -/* Locking is simple: we assume at worst case there will be one packet - in user context and one from bottom halves (or soft irq if Alexey's - softnet patch was applied). - +/* We keep a set of rules for each CPU, so we can avoid write-locking - them; doing a readlock_bh() stops packets coming through if we're - in user context. + them in the softirq when updating the counters and therefore + only need to read-lock in the softirq; doing a write_lock_bh() in user + context stops packets coming through and allows user context to read + the counters or update the rules. To be cache friendly on SMP, we arrange them like so: [ n-entries ] @@ -356,7 +354,7 @@ ip6t_do_table(struct sk_buff **pskb, struct ip6t_table *table, void *userdata) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; unsigned int protoff = 0; int hotdrop = 0; @@ -369,7 +367,6 @@ ip6t_do_table(struct sk_buff **pskb, /* Initialization */ indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -497,75 +494,145 @@ ip6t_do_table(struct sk_buff **pskb, #endif } -/* If it succeeds, returns element and locks mutex */ -static inline void * -find_inlist_lock_noload(struct list_head *head, - const char *name, - int *error, - struct semaphore *mutex) +/* + * These are weird, but module loading must not be done with mutex + * held (since they will register), and we have to have a single + * function to use try_then_request_module(). + */ + +/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_table *find_table_lock(const char *name) { - void *ret; + struct ip6t_table *t; -#if 1 - duprintf("find_inlist: searching for `%s' in %s.\n", - name, head == &ip6t_target ? "ip6t_target" - : head == &ip6t_match ? "ip6t_match" - : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN"); -#endif + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); - *error = down_interruptible(mutex); - if (*error != 0) - return NULL; + list_for_each_entry(t, &ip6t_tables, list) + if (strcmp(t->name, name) == 0 && try_module_get(t->me)) + return t; + up(&ip6t_mutex); + return NULL; +} + +/* Find match, grabs ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_match *find_match(const char *name, u8 revision) +{ + struct ip6t_match *m; + int err = 0; - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - up(mutex); + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(m, &ip6t_match, list) { + if (strcmp(m->name, name) == 0) { + if (m->revision == revision) { + if (try_module_get(m->me)) { + up(&ip6t_mutex); + return m; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } - return ret; + up(&ip6t_mutex); + return ERR_PTR(err); } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else -static void * -find_inlist_lock(struct list_head *head, - const char *name, - const char *prefix, - int *error, - struct semaphore *mutex) +/* Find target, grabs ref. Returns ERR_PTR() on error. */ +static inline struct ip6t_target *find_target(const char *name, u8 revision) { - void *ret; + struct ip6t_target *t; + int err = 0; - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - duprintf("find_inlist: loading `%s%s'.\n", prefix, name); - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); + if (down_interruptible(&ip6t_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(t, &ip6t_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision == revision) { + if (try_module_get(t->me)) { + up(&ip6t_mutex); + return t; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } + up(&ip6t_mutex); + return ERR_PTR(err); +} - return ret; +struct ip6t_target *ip6t_find_target(const char *name, u8 revision) +{ + struct ip6t_target *target; + + target = try_then_request_module(find_target(name, revision), + "ip6t_%s", name); + if (IS_ERR(target) || !target) + return NULL; + return target; } -#endif -static inline struct ip6t_table * -ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex) +static int match_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex); + struct ip6t_match *m; + int have_rev = 0; + + list_for_each_entry(m, &ip6t_match, list) { + if (strcmp(m->name, name) == 0) { + if (m->revision > *bestp) + *bestp = m->revision; + if (m->revision == revision) + have_rev = 1; + } + } + return have_rev; } -static inline struct ip6t_match * -find_match_lock(const char *name, int *error, struct semaphore *mutex) +static int target_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex); + struct ip6t_target *t; + int have_rev = 0; + + list_for_each_entry(t, &ip6t_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision > *bestp) + *bestp = t->revision; + if (t->revision == revision) + have_rev = 1; + } + } + return have_rev; } -static struct ip6t_target * -ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex) +/* Returns true or fals (if no such extension at all) */ +static inline int find_revision(const char *name, u8 revision, + int (*revfn)(const char *, u8, int *), + int *err) { - return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex); + int have_rev, best = -1; + + if (down_interruptible(&ip6t_mutex) != 0) { + *err = -EINTR; + return 1; + } + have_rev = revfn(name, revision, &best); + up(&ip6t_mutex); + + /* Nothing at all? Return 0 to try loading module. */ + if (best == -1) { + *err = -ENOENT; + return 0; + } + + *err = best; + if (!have_rev) + *err = -EPROTONOSUPPORT; + return 1; } + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct ip6t_ip6 *ipv6) @@ -725,20 +792,16 @@ check_match(struct ip6t_entry_match *m, unsigned int hookmask, unsigned int *i) { - int ret; struct ip6t_match *match; - match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex); - if (!match) { - // duprintf("check_match: `%s' not found\n", m->u.name); - return ret; - } - if (!try_module_get(match->me)) { - up(&ip6t_mutex); - return -ENOENT; + match = try_then_request_module(find_match(m->u.user.name, + m->u.user.revision), + "ip6t_%s", m->u.user.name); + if (IS_ERR(match) || !match) { + duprintf("check_match: `%s' not found\n", m->u.user.name); + return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - up(&ip6t_mutex); if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, m->data, @@ -776,22 +839,16 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, goto cleanup_matches; t = ip6t_get_target(e); - target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex); - if (!target) { + target = try_then_request_module(find_target(t->u.user.name, + t->u.user.revision), + "ip6t_%s", t->u.user.name); + if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); - goto cleanup_matches; - } - if (!try_module_get(target->me)) { - up(&ip6t_mutex); - ret = -ENOENT; + ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - up(&ip6t_mutex); - if (!t->u.kernel.target) { - ret = -EBUSY; - goto cleanup_matches; - } + if (t->u.kernel.target == &ip6t_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; @@ -1118,8 +1175,8 @@ get_entries(const struct ip6t_get_entries *entries, int ret; struct ip6t_table *t; - t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex); - if (t) { + t = find_table_lock(entries->name); + if (t && !IS_ERR(t)) { duprintf("t->private->number = %u\n", t->private->number); if (entries->size == t->private->size) @@ -1131,10 +1188,10 @@ get_entries(const struct ip6t_get_entries *entries, entries->size); ret = -EINVAL; } + module_put(t->me); up(&ip6t_mutex); } else - duprintf("get_entries: Can't find %s!\n", - entries->name); + ret = t ? PTR_ERR(t) : -ENOENT; return ret; } @@ -1182,22 +1239,19 @@ do_replace(void __user *user, unsigned int len) duprintf("ip_tables: Translated table\n"); - t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); - if (!t) + t = try_then_request_module(find_table_lock(tmp.name), + "ip6table_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; + } /* You lied! */ if (tmp.valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", tmp.valid_hooks, t->valid_hooks); ret = -EINVAL; - goto free_newinfo_counters_untrans_unlock; - } - - /* Get a reference in advance, we're not allowed fail later */ - if (!try_module_get(t->me)) { - ret = -EBUSY; - goto free_newinfo_counters_untrans_unlock; + goto put_module; } oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); @@ -1219,7 +1273,6 @@ do_replace(void __user *user, unsigned int len) /* Decrease module usage counts and free resource */ IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); vfree(oldinfo); - /* Silent error: too late now. */ if (copy_to_user(tmp.counters, counters, sizeof(struct ip6t_counters) * tmp.num_counters) != 0) ret = -EFAULT; @@ -1229,7 +1282,6 @@ do_replace(void __user *user, unsigned int len) put_module: module_put(t->me); - free_newinfo_counters_untrans_unlock: up(&ip6t_mutex); free_newinfo_counters_untrans: IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); @@ -1268,7 +1320,7 @@ do_add_counters(void __user *user, unsigned int len) unsigned int i; struct ip6t_counters_info tmp, *paddc; struct ip6t_table *t; - int ret; + int ret = 0; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1285,9 +1337,11 @@ do_add_counters(void __user *user, unsigned int len) goto free; } - t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex); - if (!t) + t = find_table_lock(tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free; + } write_lock_bh(&t->lock); if (t->private->number != paddc->num_counters) { @@ -1304,6 +1358,7 @@ do_add_counters(void __user *user, unsigned int len) unlock_up_free: write_unlock_bh(&t->lock); up(&ip6t_mutex); + module_put(t->me); free: vfree(paddc); @@ -1360,8 +1415,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) break; } name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - t = ip6t_find_table_lock(name, &ret, &ip6t_mutex); - if (t) { + + t = try_then_request_module(find_table_lock(name), + "ip6table_%s", name); + if (t && !IS_ERR(t)) { struct ip6t_getinfo info; info.valid_hooks = t->valid_hooks; @@ -1377,9 +1434,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; else ret = 0; - up(&ip6t_mutex); - } + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; } break; @@ -1400,6 +1458,31 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) break; } + case IP6T_SO_GET_REVISION_MATCH: + case IP6T_SO_GET_REVISION_TARGET: { + struct ip6t_get_revision rev; + int (*revfn)(const char *, u8, int *); + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + + if (cmd == IP6T_SO_GET_REVISION_TARGET) + revfn = target_revfn; + else + revfn = match_revfn; + + try_then_request_module(find_revision(rev.name, rev.revision, + revfn, &ret), + "ip6t_%s", rev.name); + break; + } + default: duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; @@ -1417,12 +1500,7 @@ ip6t_register_target(struct ip6t_target *target) ret = down_interruptible(&ip6t_mutex); if (ret != 0) return ret; - - if (!list_named_insert(&ip6t_target, target)) { - duprintf("ip6t_register_target: `%s' already in list!\n", - target->name); - ret = -EINVAL; - } + list_add(&target->list, &ip6t_target); up(&ip6t_mutex); return ret; } @@ -1444,11 +1522,7 @@ ip6t_register_match(struct ip6t_match *match) if (ret != 0) return ret; - if (!list_named_insert(&ip6t_match, match)) { - duprintf("ip6t_register_match: `%s' already in list!\n", - match->name); - ret = -EINVAL; - } + list_add(&match->list, &ip6t_match); up(&ip6t_mutex); return ret; diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index 81924fc..0c7584f 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -56,8 +56,12 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_mark_reg -= { { NULL, NULL }, "MARK", target, checkentry, NULL, THIS_MODULE }; +static struct ip6t_target ip6t_mark_reg = { + .name = "MARK", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE +}; static int __init init(void) { -- cgit v1.1 From a3d7a9d77533d7516a8cfb8e2b612cd5ead4fa59 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 28 Oct 2005 15:12:02 -0700 Subject: [ROSE]: rose_heartbeat_expiry() locking fix Missing unlock, as noted by Ted Unangst . Signed-off-by: Andrew Morton Signed-off-by: Arnaldo Carvalho de Melo --- net/rose/rose_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 50ae037..b6c8f38 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -138,6 +138,7 @@ static void rose_heartbeat_expiry(unsigned long param) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { + bh_unlock_sock(sk); rose_destroy_socket(sk); return; } -- cgit v1.1 From 9d17f218936a0fee43ad9493a841136589c942cd Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 28 Oct 2005 15:12:00 -0700 Subject: [IPV6]: Fix behavior of ip6_route_input() for link local address I find that linux will reply echo request destined to an address which belongs to an interface other than the one from which the request received. This behavior doesn't make sense for link local address. YOSHIFUJI Hideaki said: Please note that sender does need to setup neighbor entry by hand to reproduce this bug. (Link-local address on eth1 is not visible on eth0, from the point of view of neighbor discovery in IPv6.) +--------+ +--------+ | sender | | router | +---+----+ +-+----+-+ |eth0 eth0| |eth1 -----+----------------------+- -+-------------- Signed-off-by: Yan Zheng Acked-by: YOSHIFUJI Hideaki Signed-off-by: Andrew Morton (forwarded) Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5d5bbb4..227e99e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -483,7 +483,7 @@ restart: goto out; } - rt = rt6_device_match(rt, skb->dev->ifindex, 0); + rt = rt6_device_match(rt, skb->dev->ifindex, strict); BACKTRACK(); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { -- cgit v1.1 From 48918a4dbd6c599d6af30bd64cb355fadca708eb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Oct 2005 11:20:59 +1100 Subject: [DCCP]: Simplify skb_set_owner_w semantics While we're at it let's reorganise the set_owner_w calls a little so that: 1) dccp_transmit_skb sets the owner for all packets except data packets. 2) Add dccp_skb_entail to set owner for packets queued for retransmission. 3) Make dccp_transmit_skb static. Signed-off-by: Herbert Xu Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/dccp.h | 1 - net/dccp/output.c | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5871c02..f97b85d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -118,7 +118,6 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); #define DCCP_ADD_STATS_USER(field, val) \ SNMP_ADD_STATS_USER(dccp_statistics, field, val) -extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern int dccp_send_response(struct sock *sk); diff --git a/net/dccp/output.c b/net/dccp/output.c index d59f86f..c25b042 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -25,13 +26,20 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } +static inline void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +{ + skb_set_owner_w(skb, sk); + WARN_ON(sk->sk_send_head); + sk->sk_send_head = skb; +} + /* * All SKB's seen here are completely headerless. It is our * job to build the DCCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. */ -int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) +static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { const struct inet_sock *inet = inet_sk(sk); @@ -63,6 +71,9 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); + /* + * Only data packets should come through with skb->sk set. + */ if (!skb->sk) skb_set_owner_w(skb, sk); @@ -393,10 +404,8 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; skb->csum = 0; - skb_set_owner_w(skb, sk); - BUG_TRAP(sk->sk_send_head == NULL); - sk->sk_send_head = skb; + dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); @@ -425,7 +434,6 @@ void dccp_send_ack(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; - skb_set_owner_w(skb, sk); dccp_transmit_skb(sk, skb); } } @@ -482,7 +490,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; - skb_set_owner_w(skb, sk); dccp_transmit_skb(sk, skb); } @@ -507,10 +514,8 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; - skb_set_owner_w(skb, sk); if (active) { - BUG_TRAP(sk->sk_send_head == NULL); - sk->sk_send_head = skb; + dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, prio)); } else dccp_transmit_skb(sk, skb); -- cgit v1.1 From edc9e81917157d1e73bf081d4fbcad7c34d32783 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Oct 2005 11:20:59 +1100 Subject: [DCCP]: Set socket owner iff packet is not data Here is a complimentary insurance policy for those feeling a bit insecure. You don't have to accept this. However, if you do, you can't blame me for it :) > 1) dccp_transmit_skb sets the owner for all packets except data packets. We can actually verify this by looking at pkt_type. Signed-off-by: Herbert Xu Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/dccp/output.c b/net/dccp/output.c index c25b042..74ff870 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -58,10 +58,21 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; + /* fall through */ + case DCCP_PKT_DATAACK: break; + case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: ackno = dcb->dccpd_seq; + /* fall through */ + default: + /* + * Only data packets should come through with skb->sk + * set. + */ + WARN_ON(skb->sk); + skb_set_owner_w(skb, sk); break; } @@ -71,12 +82,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); - /* - * Only data packets should come through with skb->sk set. - */ - if (!skb->sk) - skb_set_owner_w(skb, sk); - /* Build DCCP header and checksum it. */ memset(dh, 0, dccp_header_size); dh->dccph_type = dcb->dccpd_type; -- cgit v1.1 From 97300b5fdfe28c6edae926926f9467a27cf5889c Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 31 Oct 2005 20:09:45 +0800 Subject: [MCAST] IPv6: Check packet size when process Multicast Signed-off-by: Yan Zheng Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv6/mcast.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index c4f2a0e..966b237 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1087,7 +1087,7 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, int igmp6_event_query(struct sk_buff *skb) { - struct mld2_query *mlh2 = (struct mld2_query *) skb->h.raw; + struct mld2_query *mlh2 = NULL; struct ifmcaddr6 *ma; struct in6_addr *group; unsigned long max_delay; @@ -1140,6 +1140,13 @@ int igmp6_event_query(struct sk_buff *skb) /* clear deleted report items */ mld_clear_delrec(idev); } else if (len >= 28) { + int srcs_offset = sizeof(struct mld2_query) - + sizeof(struct icmp6hdr); + if (!pskb_may_pull(skb, srcs_offset)) { + in6_dev_put(idev); + return -EINVAL; + } + mlh2 = (struct mld2_query *) skb->h.raw; max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; if (!max_delay) max_delay = 1; @@ -1156,7 +1163,15 @@ int igmp6_event_query(struct sk_buff *skb) return 0; } /* mark sources to include, if group & source-specific */ - mark = mlh2->nsrcs != 0; + if (mlh2->nsrcs != 0) { + if (!pskb_may_pull(skb, srcs_offset + + mlh2->nsrcs * sizeof(struct in6_addr))) { + in6_dev_put(idev); + return -EINVAL; + } + mlh2 = (struct mld2_query *) skb->h.raw; + mark = 1; + } } else { in6_dev_put(idev); return -EINVAL; -- cgit v1.1