diff options
Diffstat (limited to 'net')
60 files changed, 1333 insertions, 971 deletions
diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 17a81eb..526d953 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -105,7 +105,7 @@ extern void mpc_proc_clean(void); struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; -static struct timer_list mpc_timer = TIMER_INITIALIZER(NULL, 0, 0); +static DEFINE_TIMER(mpc_timer, NULL, 0, 0); static struct mpoa_client *find_mpc_by_itfnum(int itf) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ed705dd..8e37e71 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1695,16 +1695,12 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /* These two are safe on a single CPU system as only user tasks fiddle here */ if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; - res = put_user(amount, (int __user *)argp); + res = put_user(amount, (int __user *) argp); break; } case SIOCGSTAMP: - if (sk != NULL) { - res = sock_get_timestamp(sk, argp); - break; - } - res = -EINVAL; + res = sock_get_timestamp(sk, argp); break; case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ @@ -1951,24 +1947,24 @@ static struct net_proto_family ax25_family_ops = { }; static struct proto_ops ax25_proto_ops = { - .family = PF_AX25, - .owner = THIS_MODULE, - .release = ax25_release, - .bind = ax25_bind, - .connect = ax25_connect, - .socketpair = sock_no_socketpair, - .accept = ax25_accept, - .getname = ax25_getname, - .poll = datagram_poll, - .ioctl = ax25_ioctl, - .listen = ax25_listen, - .shutdown = ax25_shutdown, - .setsockopt = ax25_setsockopt, - .getsockopt = ax25_getsockopt, - .sendmsg = ax25_sendmsg, - .recvmsg = ax25_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, + .family = PF_AX25, + .owner = THIS_MODULE, + .release = ax25_release, + .bind = ax25_bind, + .connect = ax25_connect, + .socketpair = sock_no_socketpair, + .accept = ax25_accept, + .getname = ax25_getname, + .poll = datagram_poll, + .ioctl = ax25_ioctl, + .listen = ax25_listen, + .shutdown = ax25_shutdown, + .setsockopt = ax25_setsockopt, + .getsockopt = ax25_getsockopt, + .sendmsg = ax25_sendmsg, + .recvmsg = ax25_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, }; /* @@ -1984,7 +1980,7 @@ static struct notifier_block ax25_dev_notifier = { .notifier_call =ax25_device_event, }; -EXPORT_SYMBOL(ax25_encapsulate); +EXPORT_SYMBOL(ax25_hard_header); EXPORT_SYMBOL(ax25_rebuild_header); EXPORT_SYMBOL(ax25_findbyuid); EXPORT_SYMBOL(ax25_find_cb); diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index dca179d..0164a15 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -67,37 +67,34 @@ char *ax2asc(char *buf, ax25_address *a) /* * ascii -> ax25 conversion */ -ax25_address *asc2ax(char *callsign) +void asc2ax(ax25_address *addr, char *callsign) { - static ax25_address addr; char *s; int n; for (s = callsign, n = 0; n < 6; n++) { if (*s != '\0' && *s != '-') - addr.ax25_call[n] = *s++; + addr->ax25_call[n] = *s++; else - addr.ax25_call[n] = ' '; - addr.ax25_call[n] <<= 1; - addr.ax25_call[n] &= 0xFE; + addr->ax25_call[n] = ' '; + addr->ax25_call[n] <<= 1; + addr->ax25_call[n] &= 0xFE; } if (*s++ == '\0') { - addr.ax25_call[6] = 0x00; - return &addr; + addr->ax25_call[6] = 0x00; + return; } - addr.ax25_call[6] = *s++ - '0'; + addr->ax25_call[6] = *s++ - '0'; if (*s != '\0') { - addr.ax25_call[6] *= 10; - addr.ax25_call[6] += *s++ - '0'; + addr->ax25_call[6] *= 10; + addr->ax25_call[6] += *s++ - '0'; } - addr.ax25_call[6] <<= 1; - addr.ax25_call[6] &= 0x1E; - - return &addr; + addr->ax25_call[6] <<= 1; + addr->ax25_call[6] &= 0x1E; } /* diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index bba0173..d643dac 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -47,7 +47,7 @@ #ifdef CONFIG_INET -int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { unsigned char *buff; @@ -88,7 +88,7 @@ int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short *buff++ = AX25_P_ARP; break; default: - printk(KERN_ERR "AX.25: ax25_encapsulate - wrong protocol type 0x%2.2x\n", type); + printk(KERN_ERR "AX.25: ax25_hard_header - wrong protocol type 0x%2.2x\n", type); *buff++ = 0; break; } @@ -209,7 +209,7 @@ put: #else /* INET */ -int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) +int ax25_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { return -AX25_HEADER_LEN; } diff --git a/net/compat.c b/net/compat.c index d99ab96..e593dac 100644 --- a/net/compat.c +++ b/net/compat.c @@ -135,13 +135,14 @@ static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *ms * thus placement) of cmsg headers and length are different for * 32-bit apps. -DaveM */ -int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, +int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, unsigned char *stackbuf, int stackbuf_size) { struct compat_cmsghdr __user *ucmsg; struct cmsghdr *kcmsg, *kcmsg_base; compat_size_t ucmlen; __kernel_size_t kcmlen, tmp; + int err = -EFAULT; kcmlen = 0; kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; @@ -156,6 +157,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) + CMSG_ALIGN(sizeof(struct cmsghdr))); + tmp = CMSG_ALIGN(tmp); kcmlen += tmp; ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } @@ -167,30 +169,34 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, * until we have successfully copied over all of the data * from the user. */ - if(kcmlen > stackbuf_size) - kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL); - if(kcmsg == NULL) + if (kcmlen > stackbuf_size) + kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); + if (kcmsg == NULL) return -ENOBUFS; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while(ucmsg != NULL) { - __get_user(ucmlen, &ucmsg->cmsg_len); + if (__get_user(ucmlen, &ucmsg->cmsg_len)) + goto Efault; + if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) + goto Einval; tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) + CMSG_ALIGN(sizeof(struct cmsghdr))); + if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) + goto Einval; kcmsg->cmsg_len = tmp; - __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level); - __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type); - - /* Copy over the data. */ - if(copy_from_user(CMSG_DATA(kcmsg), - CMSG_COMPAT_DATA(ucmsg), - (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))))) - goto out_free_efault; + tmp = CMSG_ALIGN(tmp); + if (__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level) || + __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type) || + copy_from_user(CMSG_DATA(kcmsg), + CMSG_COMPAT_DATA(ucmsg), + (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))))) + goto Efault; /* Advance. */ - kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp)); + kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } @@ -199,10 +205,12 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, kmsg->msg_controllen = kcmlen; return 0; -out_free_efault: - if(kcmsg_base != (struct cmsghdr *)stackbuf) - kfree(kcmsg_base); - return -EFAULT; +Einval: + err = -EINVAL; +Efault: + if (kcmsg_base != (struct cmsghdr *)stackbuf) + sock_kfree_s(sk, kcmsg_base, kcmlen); + return err; } int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) diff --git a/net/core/dst.c b/net/core/dst.c index 334790d..470c05b 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -39,8 +39,7 @@ static unsigned long dst_gc_timer_inc = DST_GC_MAX; static void dst_run_gc(unsigned long); static void ___dst_free(struct dst_entry * dst); -static struct timer_list dst_gc_timer = - TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0); +static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0); static void dst_run_gc(unsigned long dummy) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a1a9a7a..5265dfd 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -645,10 +645,10 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; npinfo->tries = MAX_RETRIES; - npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&npinfo->rx_lock); } else npinfo = ndev->npinfo; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8eb083b..ef430b1e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -503,7 +503,7 @@ static int pg_delay_d = 0; static int pg_clone_skb_d = 0; static int debug = 0; -static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(_thread_lock); static struct pktgen_thread *pktgen_threads = NULL; static char module_fname[128]; @@ -1452,8 +1452,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, thread_lock(); t->control |= T_REMDEV; thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; @@ -1716,10 +1715,9 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); while (now < spin_until_us) { /* TODO: optimise sleeping behavior */ - if (spin_until_us - now > (1000000/HZ)+1) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } else if (spin_until_us - now > 100) { + if (spin_until_us - now > jiffies_to_usecs(1)+1) + schedule_timeout_interruptible(1); + else if (spin_until_us - now > 100) { do_softirq(); if (!pkt_dev->running) return; @@ -2449,8 +2447,7 @@ static void pktgen_run_all_threads(void) } thread_unlock(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ pktgen_wait_all_threads_run(); } diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7bf3b3a..38aa849 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -43,12 +43,22 @@ #include "ccid3.h" /* - * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + * Reason for maths here is to avoid 32 bit overflow when a is big. + * With this we get close to the limit. */ static inline u32 usecs_div(const u32 a, const u32 b) { - const u32 tmp = a * (USEC_PER_SEC / 10); - return b > 20 ? tmp / (b / 10) : tmp; + const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : + a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : + a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : + a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : + a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : + a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : + a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : + a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : + 100000; + const u32 tmp = a * (USEC_PER_SEC / div); + return (b >= 2 * div) ? tmp / (b / div) : tmp; } static int ccid3_debug; @@ -68,13 +78,11 @@ static struct dccp_li_hist *ccid3_li_hist; static int ccid3_init(struct sock *sk) { - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); return 0; } static void ccid3_exit(struct sock *sk) { - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); } /* TFRC sender states */ @@ -102,8 +110,7 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -144,8 +151,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) */ static void ccid3_hc_tx_update_x(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { @@ -159,7 +165,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk) } else { struct timeval now; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, @@ -174,9 +180,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk) static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -274,20 +279,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; int rc = -ENOTCONN; - /* Check if pure ACK or Terminating*/ + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + /* Check if pure ACK or Terminating*/ /* * XXX: We only call this function for DATA and DATAACK, on, these * packets can have zero length, but why the comment about "pure ACK"? */ - if (hctx == NULL || len == 0 || - hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + if (unlikely(len == 0)) goto out; /* See if last packet allocated was not sent */ @@ -297,23 +302,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, SLAB_ATOMIC); rc = -ENOBUFS; - if (new_packet == NULL) { - ccid3_pr_debug("%s, sk=%p, not enough mem to add " - "to history, send refused\n", - dccp_role(sk), sk); + if (unlikely(new_packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough " + "mem to add to history, send refused\n", + __FUNCTION__, dccp_role(sk), sk); goto out; } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", - dccp_role(sk), sk, dp->dccps_gss); - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, @@ -321,7 +323,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI; /* Set nominal send time for initial packet */ hctx->ccid3hctx_t_nom = now; @@ -334,7 +336,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, case TFRC_SSTATE_FBACK: delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); - ccid3_pr_debug("send_packet delay=%ld\n", delay); delay /= -1000; /* divide by -1000 is to convert to ms and get sign right */ rc = delay > 0 ? delay : 0; @@ -348,29 +349,25 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, } /* Can we send? if so add options and add to packet history */ - if (rc == 0) + if (rc == 0) { + dp->dccps_hc_tx_insert_options = 1; new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + } out: return rc; } static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; - BUG_ON(hctx == NULL); - - if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", - dccp_role(sk), sk); - return; - } + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); /* check if we have sent a data packet */ if (len > 0) { @@ -378,14 +375,14 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) struct dccp_tx_hist_entry *packet; packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (packet == NULL) { - printk(KERN_CRIT "%s: packet doesn't exists in " - "history!\n", __FUNCTION__); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't " + "exists in history!\n", __FUNCTION__); return; } - if (packet->dccphtx_sent) { - printk(KERN_CRIT "%s: no unsent packet in history!\n", - __FUNCTION__); + if (unlikely(packet->dccphtx_sent)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in " + "history!\n", __FUNCTION__); return; } packet->dccphtx_tstamp = now; @@ -445,24 +442,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; + struct timeval now; unsigned long next_tmout; u32 t_elapsed; u32 pinv; u32 x_recv; u32 r_sample; - if (hctx == NULL) - return; - - if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { - ccid3_pr_debug("%s, sk=%p, received a packet when " - "terminating!\n", dccp_role(sk), sk); - return; - } + BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || @@ -471,7 +462,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) opt_recv = &hctx->ccid3hctx_options_received; - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; x_recv = opt_recv->ccid3or_receive_rate; pinv = opt_recv->ccid3or_loss_event_rate; @@ -486,19 +477,24 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* get t_recvdata from history */ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " - "exist in history!\n", - dccp_role(sk), sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno " + "%llu(%s) does't exist in history!\n", + __FUNCTION__, dccp_role(sk), sk, + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } /* Update RTT */ - r_sample = timeval_now_delta(&packet->dccphtx_tstamp); - /* FIXME: */ - // r_sample -= usecs_to_jiffies(t_elapsed * 10); + dccp_timestamp(sk, &now); + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " + "t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; /* Update RTT estimate by * If (No feedback recv) @@ -591,11 +587,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + + BUG_ON(hctx == NULL); - if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || - sk->sk_state == DCCP_PARTOPEN)) + if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; @@ -606,12 +602,11 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char *value) { int rc = 0; - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; - if (hctx == NULL) - return 0; + BUG_ON(hctx == NULL); opt_recv = &hctx->ccid3hctx_options_received; @@ -625,10 +620,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: - if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for " - "TFRC_OPT_LOSS_EVENT_RATE\n", - dccp_role(sk), sk); + if (unlikely(len != 4)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " + "len for TFRC_OPT_LOSS_EVENT_RATE\n", + __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); @@ -646,10 +641,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, opt_recv->ccid3or_loss_intervals_len); break; case TFRC_OPT_RECEIVE_RATE: - if (len != 4) { - ccid3_pr_debug("%s, sk=%p, invalid len for " - "TFRC_OPT_RECEIVE_RATE\n", - dccp_role(sk), sk); + if (unlikely(len != 4)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " + "len for TFRC_OPT_RECEIVE_RATE\n", + __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); @@ -668,13 +663,11 @@ static int ccid3_hc_tx_init(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), - gfp_any()); - if (hctx == NULL) + dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (dp->dccps_hc_tx_ccid_private == NULL) return -ENOMEM; + hctx = ccid3_hc_tx_sk(sk); memset(hctx, 0, sizeof(*hctx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -696,9 +689,8 @@ static int ccid3_hc_tx_init(struct sock *sk) static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); @@ -738,8 +730,7 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -751,14 +742,14 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, static void ccid3_hc_rx_send_feedback(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; struct timeval now; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: @@ -767,11 +758,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_DATA: { const u32 delta = timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_feedback); - - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC); - if (likely(delta > 1)) - hcrx->ccid3hcrx_x_recv /= delta; + hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, + delta); } break; default: @@ -782,10 +770,10 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); - if (packet == NULL) { - printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", - __FUNCTION__, dccp_role(sk), sk); - dump_stack(); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet " + "in history!\n", + __FUNCTION__, dccp_role(sk), sk); return; } @@ -801,17 +789,18 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_pinv = ~0; else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u32 x_recv, pinv; - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || - sk->sk_state == DCCP_PARTOPEN)) + BUG_ON(hcrx == NULL); + + if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return; DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; @@ -837,8 +826,7 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp = { 0, }; @@ -869,17 +857,17 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } } - if (step == 0) { - printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " - "data packets!\n", - __FUNCTION__, dccp_role(sk), sk); + if (unlikely(step == 0)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history " + "contains no data packets!\n", + __FUNCTION__, dccp_role(sk), sk); return ~0; } - if (interval == 0) { - ccid3_pr_debug("%s, sk=%p, Could not find a win_count " - "interval > 0. Defaulting to 1\n", - dccp_role(sk), sk); + if (unlikely(interval == 0)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a " + "win_count interval > 0. Defaulting to 1\n", + __FUNCTION__, dccp_role(sk), sk); interval = 1; } found: @@ -889,10 +877,9 @@ found: if (rtt == 0) rtt = 1; - delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; - if (likely(delta > 1)) - x_recv /= delta; + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -911,8 +898,7 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (seq_loss != DCCP_MAX_SEQNO + 1 && list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -924,14 +910,14 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) if (li_tail == NULL) return; li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } - /* FIXME: find end of interval */ + } else + LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " + "interval\n", __FUNCTION__); } static void ccid3_hc_rx_detect_loss(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u8 win_loss; const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, @@ -942,22 +928,19 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev; + u32 p_prev, r_sample, t_elapsed; int ins; - if (hcrx == NULL) - return; - - BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || + BUG_ON(hcrx == NULL || + !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); - opt_recv = &dp->dccps_options_received; + opt_recv = &dccp_sk(sk)->dccps_options_received; switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: @@ -967,10 +950,24 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; - do_gettimeofday(&now); - hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - - (opt_recv->dccpor_timestamp_echo - - opt_recv->dccpor_elapsed_time) * 10; + dccp_timestamp(sk, &now); + timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); + r_sample = timeval_usecs(&now); + t_elapsed = opt_recv->dccpor_elapsed_time * 10; + + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " + "t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; + + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + hcrx->ccid3hcrx_rtt = r_sample; + else + hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + + r_sample / 10; + if (p_prev != hcrx->ccid3hcrx_rtt) ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, @@ -978,19 +975,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; case DCCP_PKT_DATA: break; - default: - ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", - dccp_role(sk), sk, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + default: /* We're not interested in other packet types, move along */ return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); - if (packet == NULL) { - ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " - "to history (consider it lost)!", - dccp_role(sk), sk); + if (unlikely(packet == NULL)) { + LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to " + "add rx packet to history, consider it lost!\n", + __FUNCTION__, dccp_role(sk), sk); return; } @@ -1017,7 +1011,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (ins != 0) break; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { hcrx->ccid3hcrx_tstamp_last_ack = now; @@ -1056,11 +1050,11 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), - gfp_any()); - if (hcrx == NULL) + dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (dp->dccps_hc_rx_ccid_private == NULL) return -ENOMEM; + hcrx = ccid3_hc_rx_sk(sk); memset(hcrx, 0, sizeof(*hcrx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -1072,23 +1066,18 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - /* - * XXX this seems to be paranoid, need to think more about this, for - * now start with something different than zero. -acme - */ - hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; + dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); + hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; } static void ccid3_hc_rx_exit(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - - if (hcrx == NULL) - return; + BUG_ON(hcrx == NULL); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); @@ -1104,12 +1093,14 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - if (hcrx == NULL) + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) return; + BUG_ON(hcrx == NULL); + info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; @@ -1117,12 +1108,14 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - if (hctx == NULL) + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) return; + BUG_ON(hctx == NULL); + info->tcpi_rto = hctx->ccid3hctx_t_rto; info->tcpi_rtt = hctx->ccid3hctx_rtt; } diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index ee8cbac..eb24877 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -48,6 +48,8 @@ /* Two seconds as per CCID3 spec */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) +#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4) + /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) @@ -115,7 +117,7 @@ struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_rtt; u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; @@ -128,10 +130,14 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_x_recv; }; -#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_tx_ccid_private; +} -#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) +static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_rx_ccid_private; +} #endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index fb90a91..b375ebd 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry * static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const unsigned int __nocast prio) @@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); + dccp_timestamp(sk, &entry->dccphrx_tstamp); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33456c0..95c4630b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,10 +426,13 @@ extern struct dccp_ackpkts * dccp_ackpkts_alloc(unsigned int len, const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); + static inline suseconds_t timeval_usecs(const struct timeval *tv) { return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; @@ -468,17 +471,6 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -/* - * Returns the difference in usecs between timeval - * passed in and current time - */ -static inline suseconds_t timeval_now_delta(const struct timeval *tv) -{ - struct timeval now; - do_gettimeofday(&now); - return timeval_delta(&now, tv); -} - #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index ef29cef..c60bc34 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -170,7 +170,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dp->dccps_options.dccpo_send_ack_vector) { struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " @@ -498,7 +498,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * DCCP_ACKPKTS_STATE_ECN_MARKED */ if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3fc75db..2afaa46 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -641,16 +641,12 @@ int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) skb = dccp_make_reset(sk, sk->sk_dst_cache, code); if (skb != NULL) { - const struct dccp_sock *dp = dccp_sk(sk); const struct inet_sock *inet = inet_sk(sk); err = ip_build_and_send_pkt(skb, sk, inet->saddr, inet->daddr, NULL); if (err == NET_XMIT_CN) err = 0; - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); } return err; @@ -1243,6 +1239,7 @@ static int dccp_v4_init_sock(struct sock *sk) static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); + do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { dp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ce5dff4..18461bc 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -96,6 +96,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/options.c b/net/dccp/options.c index 382c589..d4c4242 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -72,6 +72,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; + u32 elapsed_time; memset(opt_recv, 0, sizeof(*opt_recv)); @@ -139,7 +140,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - do_gettimeofday(&dp->dccps_timestamp_time); + dccp_timestamp(sk, &dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -159,18 +160,18 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (len > 4) { - if (len == 6) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)(value + 4)); - else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)(value + 4)); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", - debug_prefix, - opt_recv->dccpor_elapsed_time); - } + if (len == 4) + break; + + if (len == 6) + elapsed_time = ntohs(*(u16 *)(value + 4)); + else + elapsed_time = ntohl(*(u32 *)(value + 4)); + + /* Give precedence to the biggest ELAPSED_TIME */ + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; break; case DCCPO_ELAPSED_TIME: if (len != 2 && len != 4) @@ -180,14 +181,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) continue; if (len == 2) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)value); + elapsed_time = ntohs(*(u16 *)value); else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)value); + elapsed_time = ntohl(*(u32 *)value); + + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - opt_recv->dccpor_elapsed_time); + elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: @@ -359,9 +361,13 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; + struct timeval now; + u32 elapsed_time; unsigned char *to, *from; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; + if (elapsed_time != 0) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); @@ -426,13 +432,29 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } +void dccp_timestamp(const struct sock *sk, struct timeval *tv) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + do_gettimeofday(tv); + tv->tv_sec -= dp->dccps_epoch.tv_sec; + tv->tv_usec -= dp->dccps_epoch.tv_usec; + + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + +EXPORT_SYMBOL_GPL(dccp_timestamp); + void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - do_gettimeofday(&tv); - now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + dccp_timestamp(sk, &tv); + now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -450,13 +472,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif + struct timeval now; u32 tstamp_echo; - const u32 elapsed_time = - timeval_now_delta(&dp->dccps_timestamp_time) / 10; - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 6 + elapsed_time_len; + u32 elapsed_time; + int len, elapsed_time_len; unsigned char *to; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + len = 6 + elapsed_time_len; + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " "timestamp echo!\n"); @@ -505,13 +531,18 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } - ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + if (dp->dccps_hc_rx_insert_options) { + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + dp->dccps_hc_rx_insert_options = 0; + } + if (dp->dccps_hc_tx_insert_options) { + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + dp->dccps_hc_tx_insert_options = 0; + } /* XXX: insert other options when appropriate */ @@ -616,7 +647,8 @@ static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, /* * Implements the draft-ietf-dccp-spec-11.txt Appendix A */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state) { /* * Check at the right places if the buffer is full, if it is, tell the @@ -697,7 +729,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - do_gettimeofday(&ap->dccpap_time); + dccp_timestamp(sk, &ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); diff --git a/net/dccp/output.c b/net/dccp/output.c index 28de157..ea6d0e9 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -522,7 +522,4 @@ void dccp_send_close(struct sock *sk, const int active) dccp_transmit_skb(sk, skb_clone(skb, prio)); } else dccp_transmit_skb(sk, skb); - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2c915f3..3407f19 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table; static unsigned dn_rt_hash_mask; static struct timer_list dn_route_timer; -static struct timer_list dn_rt_flush_timer = - TIMER_INITIALIZER(dn_run_flush, 0, 0); +static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0); int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index 58ed431..91b16fb 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -1,6 +1,5 @@ config IEEE80211 tristate "Generic IEEE 802.11 Networking Stack" - select NET_RADIO ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf147f8..a9d84f9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1248,11 +1248,6 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_FIB_TRIE -extern int fib_stat_proc_init(void); -extern void fib_stat_proc_exit(void); -#endif - static int __init ipv4_proc_init(void) { int rc = 0; @@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void) goto out_udp; if (fib_proc_init()) goto out_fib; -#ifdef CONFIG_IP_FIB_TRIE - if (fib_stat_proc_init()) - goto out_fib_stat; -#endif if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: -#ifdef CONFIG_IP_FIB_TRIE - fib_stat_proc_exit(); -out_fib_stat: -#endif fib_proc_exit(); out_fib: udp4_proc_exit(); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b2dea4e..1b63b48 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.402" +#define VERSION "0.403" #include <linux/config.h> #include <asm/uaccess.h> @@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static void trie_dump_seq(struct seq_file *seq, struct trie *t); static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -1971,558 +1970,525 @@ struct fib_table * __init fib_hash_init(int id) return tb; } -/* Trie dump functions */ +#ifdef CONFIG_PROC_FS +/* Depth first Trie walk iterator */ +struct fib_trie_iter { + struct tnode *tnode; + struct trie *trie; + unsigned index; + unsigned depth; +}; -static void putspace_seq(struct seq_file *seq, int n) +static struct node *fib_trie_get_next(struct fib_trie_iter *iter) { - while (n--) - seq_printf(seq, " "); -} + struct tnode *tn = iter->tnode; + unsigned cindex = iter->index; + struct tnode *p; -static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) -{ - while (bits--) - seq_printf(seq, "%s", (v & (1<<bits))?"1":"0"); -} + pr_debug("get_next iter={node=%p index=%d depth=%d}\n", + iter->tnode, iter->index, iter->depth); +rescan: + while (cindex < (1<<tn->bits)) { + struct node *n = tnode_get_child(tn, cindex); -static void printnode_seq(struct seq_file *seq, int indent, struct node *n, - int pend, int cindex, int bits) -{ - putspace_seq(seq, indent); - if (IS_LEAF(n)) - seq_printf(seq, "|"); - else - seq_printf(seq, "+"); - if (bits) { - seq_printf(seq, "%d/", cindex); - printbin_seq(seq, cindex, bits); - seq_printf(seq, ": "); - } else - seq_printf(seq, "<root>: "); - seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); + if (n) { + if (IS_LEAF(n)) { + iter->tnode = tn; + iter->index = cindex + 1; + } else { + /* push down one level */ + iter->tnode = (struct tnode *) n; + iter->index = 0; + ++iter->depth; + } + return n; + } - if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; - struct fib_alias *fa; - int i; + ++cindex; + } - seq_printf(seq, "key=%d.%d.%d.%d\n", - n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); - - for (i = 32; i >= 0; i--) - if (find_leaf_info(&l->list, i)) { - struct list_head *fa_head = get_fa_head(l, i); - - if (!fa_head) - continue; - - if (list_empty(fa_head)) - continue; - - putspace_seq(seq, indent+2); - seq_printf(seq, "{/%d...dumping}\n", i); - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - putspace_seq(seq, indent+2); - if (fa->fa_info == NULL) { - seq_printf(seq, "Error fa_info=NULL\n"); - continue; - } - if (fa->fa_info->fib_nh == NULL) { - seq_printf(seq, "Error _fib_nh=NULL\n"); - continue; - } - - seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", - fa->fa_type, - fa->fa_scope, - fa->fa_tos); - } - } - } else { - struct tnode *tn = (struct tnode *)n; - int plen = ((struct tnode *)n)->pos; - t_key prf = MASK_PFX(n->key, plen); - - seq_printf(seq, "key=%d.%d.%d.%d/%d\n", - prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); - - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); - printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); - seq_printf(seq, "}\n"); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{pos=%d", tn->pos); - seq_printf(seq, " (skip=%d bits)", tn->pos - pend); - seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); + /* Current node exhausted, pop back up */ + p = NODE_PARENT(tn); + if (p) { + cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + tn = p; + --iter->depth; + goto rescan; } + + /* got root? */ + return NULL; } -static void trie_dump_seq(struct seq_file *seq, struct trie *t) +static struct node *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { - struct node *n; - int cindex = 0; - int indent = 1; - int pend = 0; - int depth = 0; - struct tnode *tn; - - rcu_read_lock(); - n = rcu_dereference(t->trie); - seq_printf(seq, "------ trie_dump of t=%p ------\n", t); + struct node *n = rcu_dereference(t->trie); - if (!n) { - seq_printf(seq, "------ trie is empty\n"); - - rcu_read_unlock(); - return; + if (n && IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 0; + return n; } + return NULL; +} - printnode_seq(seq, indent, n, pend, cindex, 0); - - if (!IS_TNODE(n)) { - rcu_read_unlock(); - return; - } - - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent += 3; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *child = rcu_dereference(tn->child[cindex]); - if (!child) - cindex++; - else { - /* Got a child */ - printnode_seq(seq, indent, child, pend, - cindex, tn->bits); - - if (IS_LEAF(child)) - cindex++; - - else { - /* - * New tnode. Decend one level - */ - - depth++; - n = child; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); - seq_printf(seq, "\\--\n"); - indent += 3; - cindex = 0; - } - } - - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ +static void trie_collect_stats(struct trie *t, struct trie_stat *s) +{ + struct node *n; + struct fib_trie_iter iter; - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - break; - } + memset(s, 0, sizeof(*s)); - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - cindex++; - tn = NODE_PARENT(tn); - pend = tn->pos + tn->bits; - indent -= 3; - depth--; + rcu_read_lock(); + for (n = fib_trie_get_first(&iter, t); n; + n = fib_trie_get_next(&iter)) { + if (IS_LEAF(n)) { + s->leaves++; + s->totdepth += iter.depth; + if (iter.depth > s->maxdepth) + s->maxdepth = iter.depth; + } else { + const struct tnode *tn = (const struct tnode *) n; + int i; + + s->tnodes++; + s->nodesizes[tn->bits]++; + for (i = 0; i < (1<<tn->bits); i++) + if (!tn->child[i]) + s->nullpointers++; } } rcu_read_unlock(); } -static struct trie_stat *trie_stat_new(void) +/* + * This outputs /proc/net/fib_triestats + */ +static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { - struct trie_stat *s; - int i; + unsigned i, max, pointers, bytes, avdepth; - s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); - if (!s) - return NULL; + if (stat->leaves) + avdepth = stat->totdepth*100 / stat->leaves; + else + avdepth = 0; - s->totdepth = 0; - s->maxdepth = 0; - s->tnodes = 0; - s->leaves = 0; - s->nullpointers = 0; + seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); - for (i = 0; i < MAX_CHILDS; i++) - s->nodesizes[i] = 0; + seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - return s; -} + bytes = sizeof(struct leaf) * stat->leaves; + seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); + bytes += sizeof(struct tnode) * stat->tnodes; -static struct trie_stat *trie_collect_stats(struct trie *t) -{ - struct node *n; - struct trie_stat *s = trie_stat_new(); - int cindex = 0; - int pend = 0; - int depth = 0; + max = MAX_CHILDS-1; + while (max >= 0 && stat->nodesizes[max] == 0) + max--; - if (!s) - return NULL; + pointers = 0; + for (i = 1; i <= max; i++) + if (stat->nodesizes[i] != 0) { + seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); + pointers += (1<<i) * stat->nodesizes[i]; + } + seq_putc(seq, '\n'); + seq_printf(seq, "\tPointers: %d\n", pointers); - rcu_read_lock(); - n = rcu_dereference(t->trie); + bytes += sizeof(struct node *) * pointers; + seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); + seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); - if (!n) - return s; +#ifdef CONFIG_IP_FIB_TRIE_STATS + seq_printf(seq, "Counters:\n---------\n"); + seq_printf(seq,"gets = %d\n", t->stats.gets); + seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); + seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); + seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); + seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); + seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); +#ifdef CLEAR_STATS + memset(&(t->stats), 0, sizeof(t->stats)); +#endif +#endif /* CONFIG_IP_FIB_TRIE_STATS */ +} - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - s->nodesizes[tn->bits]++; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *ch = rcu_dereference(tn->child[cindex]); - if (ch) { - - /* Got a child */ - - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - /* stats */ - if (depth > s->maxdepth) - s->maxdepth = depth; - s->totdepth += depth; - s->leaves++; - } else { - /* - * New tnode. Decend one level - */ - - s->tnodes++; - s->nodesizes[tn->bits]++; - depth++; - - n = ch; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - - cindex = 0; - } - } else { - cindex++; - s->nullpointers++; - } +static int fib_triestat_seq_show(struct seq_file *seq, void *v) +{ + struct trie_stat *stat; - /* - * Test if we are done - */ + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + return -ENOMEM; - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ + seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", + sizeof(struct leaf), sizeof(struct tnode)); - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } + if (trie_local) { + seq_printf(seq, "Local:\n"); + trie_collect_stats(trie_local, stat); + trie_show_stats(seq, stat); + } - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - depth--; - } - } + if (trie_main) { + seq_printf(seq, "Main:\n"); + trie_collect_stats(trie_main, stat); + trie_show_stats(seq, stat); } + kfree(stat); - rcu_read_unlock(); - return s; + return 0; } -#ifdef CONFIG_PROC_FS - -static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) +static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - return NULL; + return single_open(file, fib_triestat_seq_show, NULL); } -static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) +static struct file_operations fib_triestat_fops = { + .owner = THIS_MODULE, + .open = fib_triestat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, + loff_t pos) { + loff_t idx = 0; + struct node *n; + + for (n = fib_trie_get_first(iter, trie_local); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } + + for (n = fib_trie_get_first(iter, trie_main); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } return NULL; } -static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) +static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { - if (!ip_fib_main_table) - return NULL; - - if (*pos) - return fib_triestat_get_next(seq); - else + rcu_read_lock(); + if (*pos == 0) return SEQ_START_TOKEN; + return fib_trie_get_idx(seq->private, *pos - 1); } -static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + void *l = v; + ++*pos; if (v == SEQ_START_TOKEN) - return fib_triestat_get_first(seq); - else - return fib_triestat_get_next(seq); -} + return fib_trie_get_idx(iter, 0); -static void fib_triestat_seq_stop(struct seq_file *seq, void *v) -{ + v = fib_trie_get_next(iter); + BUG_ON(v == l); + if (v) + return v; -} + /* continue scan in next trie */ + if (iter->trie == trie_local) + return fib_trie_get_first(iter, trie_main); -/* - * This outputs /proc/net/fib_triestats - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + return NULL; +} -static void collect_and_show(struct trie *t, struct seq_file *seq) +static void fib_trie_seq_stop(struct seq_file *seq, void *v) { - int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ - int i, max, pointers; - struct trie_stat *stat; - int avdepth; - - stat = trie_collect_stats(t); - - bytes = 0; - seq_printf(seq, "trie=%p\n", t); - - if (stat) { - if (stat->leaves) - avdepth = stat->totdepth*100 / stat->leaves; - else - avdepth = 0; - seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); - seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); + rcu_read_unlock(); +} - seq_printf(seq, "Leaves: %d\n", stat->leaves); - bytes += sizeof(struct leaf) * stat->leaves; - seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; +static void seq_indent(struct seq_file *seq, int n) +{ + while (n-- > 0) seq_puts(seq, " "); +} - max = MAX_CHILDS-1; +static inline const char *rtn_scope(enum rt_scope_t s) +{ + static char buf[32]; - while (max >= 0 && stat->nodesizes[max] == 0) - max--; - pointers = 0; + switch(s) { + case RT_SCOPE_UNIVERSE: return "universe"; + case RT_SCOPE_SITE: return "site"; + case RT_SCOPE_LINK: return "link"; + case RT_SCOPE_HOST: return "host"; + case RT_SCOPE_NOWHERE: return "nowhere"; + default: + snprintf(buf, sizeof(buf), "scope=%d", s); + return buf; + } +} - for (i = 1; i <= max; i++) - if (stat->nodesizes[i] != 0) { - seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); - pointers += (1<<i) * stat->nodesizes[i]; - } - seq_printf(seq, "\n"); - seq_printf(seq, "Pointers: %d\n", pointers); - bytes += sizeof(struct node *) * pointers; - seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); - seq_printf(seq, "Total size: %d kB\n", bytes / 1024); +static const char *rtn_type_names[__RTN_MAX] = { + [RTN_UNSPEC] = "UNSPEC", + [RTN_UNICAST] = "UNICAST", + [RTN_LOCAL] = "LOCAL", + [RTN_BROADCAST] = "BROADCAST", + [RTN_ANYCAST] = "ANYCAST", + [RTN_MULTICAST] = "MULTICAST", + [RTN_BLACKHOLE] = "BLACKHOLE", + [RTN_UNREACHABLE] = "UNREACHABLE", + [RTN_PROHIBIT] = "PROHIBIT", + [RTN_THROW] = "THROW", + [RTN_NAT] = "NAT", + [RTN_XRESOLVE] = "XRESOLVE", +}; - kfree(stat); - } +static inline const char *rtn_type(unsigned t) +{ + static char buf[32]; -#ifdef CONFIG_IP_FIB_TRIE_STATS - seq_printf(seq, "Counters:\n---------\n"); - seq_printf(seq,"gets = %d\n", t->stats.gets); - seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); - seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); - seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); - seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); - seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); -#ifdef CLEAR_STATS - memset(&(t->stats), 0, sizeof(t->stats)); -#endif -#endif /* CONFIG_IP_FIB_TRIE_STATS */ + if (t < __RTN_MAX && rtn_type_names[t]) + return rtn_type_names[t]; + snprintf(buf, sizeof(buf), "type %d", t); + return buf; } -static int fib_triestat_seq_show(struct seq_file *seq, void *v) +/* Pretty print the trie */ +static int fib_trie_seq_show(struct seq_file *seq, void *v) { - char bf[128]; + const struct fib_trie_iter *iter = seq->private; + struct node *n = v; - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); - if (trie_local) - collect_and_show(trie_local, seq); + if (v == SEQ_START_TOKEN) + return 0; - if (trie_main) - collect_and_show(trie_main, seq); - } else { - snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); + if (IS_TNODE(n)) { + struct tnode *tn = (struct tnode *) n; + t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); - seq_printf(seq, "%-127s\n", bf); + if (!NODE_PARENT(n)) { + if (iter->trie == trie_local) + seq_puts(seq, "<local>:\n"); + else + seq_puts(seq, "<main>:\n"); + } else { + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", + NIPQUAD(prf), tn->pos); + } + } else { + struct leaf *l = (struct leaf *) n; + int i; + u32 val = ntohl(l->key); + + seq_indent(seq, iter->depth); + seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); + for (i = 32; i >= 0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + if (li) { + struct fib_alias *fa; + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + seq_indent(seq, iter->depth+1); + seq_printf(seq, " /%d %s %s", i, + rtn_scope(fa->fa_scope), + rtn_type(fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, "tos =%d\n", + fa->fa_tos); + seq_putc(seq, '\n'); + } + } + } } + return 0; } -static struct seq_operations fib_triestat_seq_ops = { - .start = fib_triestat_seq_start, - .next = fib_triestat_seq_next, - .stop = fib_triestat_seq_stop, - .show = fib_triestat_seq_show, +static struct seq_operations fib_trie_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_trie_seq_show, }; -static int fib_triestat_seq_open(struct inode *inode, struct file *file) +static int fib_trie_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - rc = seq_open(file, &fib_triestat_seq_ops); + if (!s) + goto out; + + rc = seq_open(file, &fib_trie_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_triestat_seq_fops = { - .owner = THIS_MODULE, - .open = fib_triestat_seq_open, - .read = seq_read, - .llseek = seq_lseek, +static struct file_operations fib_trie_fops = { + .owner = THIS_MODULE, + .open = fib_trie_seq_open, + .read = seq_read, + .llseek = seq_lseek, .release = seq_release_private, }; -int __init fib_stat_proc_init(void) -{ - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) - return -ENOMEM; - return 0; -} - -void __init fib_stat_proc_exit(void) +static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) { - proc_net_remove("fib_triestat"); -} + static unsigned type2flags[RTN_MAX + 1] = { + [7] = RTF_REJECT, [8] = RTF_REJECT, + }; + unsigned flags = type2flags[type]; -static struct fib_alias *fib_trie_get_first(struct seq_file *seq) -{ - return NULL; + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + flags |= RTF_UP; + return flags; } -static struct fib_alias *fib_trie_get_next(struct seq_file *seq) +/* + * This outputs /proc/net/route. + * The format of the file is not supposed to be changed + * and needs to be same as fib_hash output to avoid breaking + * legacy utilities + */ +static int fib_route_seq_show(struct seq_file *seq, void *v) { - return NULL; -} + struct leaf *l = v; + int i; + char bf[128]; -static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (!ip_fib_main_table) - return NULL; + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " + "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" + "\tWindow\tIRTT"); + return 0; + } - if (*pos) - return fib_trie_get_next(seq); - else - return SEQ_START_TOKEN; -} + if (IS_TNODE(l)) + return 0; -static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - if (v == SEQ_START_TOKEN) - return fib_trie_get_first(seq); - else - return fib_trie_get_next(seq); + for (i=32; i>=0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + struct fib_alias *fa; + u32 mask, prefix; -} + if (!li) + continue; -static void fib_trie_seq_stop(struct seq_file *seq, void *v) -{ -} + mask = inet_make_mask(li->plen); + prefix = htonl(l->key); -/* - * This outputs /proc/net/fib_trie. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + const struct fib_info *fi = rcu_dereference(fa->fa_info); + unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); -static int fib_trie_seq_show(struct seq_file *seq, void *v) -{ - char bf[128]; + if (fa->fa_type == RTN_BROADCAST + || fa->fa_type == RTN_MULTICAST) + continue; - if (v == SEQ_START_TOKEN) { - if (trie_local) - trie_dump_seq(seq, trie_local); + if (fi) + snprintf(bf, sizeof(bf), + "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + snprintf(bf, sizeof(bf), + "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); - if (trie_main) - trie_dump_seq(seq, trie_main); - } else { - snprintf(bf, sizeof(bf), - "*\t%08X\t%08X", 200, 400); - seq_printf(seq, "%-127s\n", bf); + seq_printf(seq, "%-127s\n", bf); + } } return 0; } -static struct seq_operations fib_trie_seq_ops = { - .start = fib_trie_seq_start, - .next = fib_trie_seq_next, - .stop = fib_trie_seq_stop, - .show = fib_trie_seq_show, +static struct seq_operations fib_route_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_route_seq_show, }; -static int fib_trie_seq_open(struct inode *inode, struct file *file) +static int fib_route_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); - rc = seq_open(file, &fib_trie_seq_ops); + if (!s) + goto out; + + rc = seq_open(file, &fib_route_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_trie_seq_fops = { - .owner = THIS_MODULE, - .open = fib_trie_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release= seq_release_private, +static struct file_operations fib_route_fops = { + .owner = THIS_MODULE, + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, }; int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) - return -ENOMEM; + if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + goto out1; + + if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + goto out2; + + if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + goto out3; + return 0; + +out3: + proc_net_remove("fib_triestat"); +out2: + proc_net_remove("fib_trie"); +out1: + return -ENOMEM; } void __init fib_proc_exit(void) { proc_net_remove("fib_trie"); + proc_net_remove("fib_triestat"); + proc_net_remove("route"); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f84ba9c..2fc3fd3 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -100,8 +100,7 @@ DEFINE_SPINLOCK(inet_peer_unused_lock); #define PEER_MAX_CLEANUP_WORK 30 static void peer_check_expire(unsigned long dummy); -static struct timer_list peer_periodic_timer = - TIMER_INITIALIZER(peer_check_expire, 0, 0); +static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); /* Exported for sysctl_net_ipv4. */ int inet_peer_gc_mintime = 10 * HZ, diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 953129d..e8674ba 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1103,10 +1103,8 @@ static int __init ic_dynamic(void) #endif jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 2b5cf9c..bb72466 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -104,12 +104,28 @@ out: static struct ip_conntrack_helper helper = { .name = "netbios-ns", .tuple = { - .src.u.udp.port = __constant_htons(137), - .dst.protonum = IPPROTO_UDP, + .src = { + .u = { + .udp = { + .port = __constant_htons(137), + } + } + }, + .dst = { + .protonum = IPPROTO_UDP, + }, }, .mask = { - .src.u.udp.port = 0xFFFF, - .dst.protonum = 0xFF, + .src = { + .u = { + .udp = { + .port = 0xFFFF, + } + } + }, + .dst = { + .protonum = 0xFF, + }, }, .max_expected = 1, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f115a84..f057025 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -92,10 +92,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; - if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) { - dst_release(&rt->u.dst); - rt = NULL; - } + xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); return rt; } diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index c1889f8..0cee286 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/rcupdate.h> #include <net/sock.h> #include <linux/netfilter_ipv4/ipt_owner.h> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c0b14e..8549f26 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1760,6 +1760,7 @@ static inline int __mkroute_input(struct sk_buff *skb, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi->fib_nhs > 1) @@ -1820,7 +1821,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); if (err) return err; - atomic_set(&rth->u.dst.__refcnt, 1); /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); @@ -1834,8 +1834,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, u32 daddr, u32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL; - unsigned char hop, hopcount, lasthop; + struct rtable* rth = NULL, *rtres; + unsigned char hop, hopcount; int err = -EINVAL; unsigned int hash; @@ -1844,8 +1844,6 @@ static inline int ip_mkroute_input(struct sk_buff *skb, else hopcount = 1; - lasthop = hopcount - 1; - /* distinguish between multipath and singlepath */ if (hopcount < 2) return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, @@ -1855,6 +1853,10 @@ static inline int ip_mkroute_input(struct sk_buff *skb, for (hop = 0; hop < hopcount; hop++) { res->nh_sel = hop; + /* put reference to previous result */ + if (hop) + ip_rt_put(rtres); + /* create a routing cache entry */ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); @@ -1863,7 +1865,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); - err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -1873,13 +1875,8 @@ static inline int ip_mkroute_input(struct sk_buff *skb, FIB_RES_NETMASK(*res), res->prefixlen, &FIB_RES_NH(*res)); - - /* only for the last hop the reference count is handled - * outside - */ - if (hop == lasthop) - atomic_set(&(skb->dst->__refcnt), 1); } + skb->dst = &rtres->u.dst; return err; #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); @@ -2208,6 +2205,7 @@ static inline int __mkroute_output(struct rtable **result, goto cleanup; } + atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if (res->fi) { @@ -2290,8 +2288,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, if (err == 0) { u32 tos = RT_FL_TOS(oldflp); - atomic_set(&rth->u.dst.__refcnt, 1); - hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos); err = rt_intern_hash(hash, rth, rp); @@ -2326,6 +2322,10 @@ static inline int ip_mkroute_output(struct rtable** rp, dev2nexthop = FIB_RES_DEV(*res); dev_hold(dev2nexthop); + /* put reference to previous result */ + if (hop) + ip_rt_put(*rp); + err = __mkroute_output(&rth, res, fl, oldflp, dev2nexthop, flags); @@ -2350,7 +2350,6 @@ static inline int ip_mkroute_output(struct rtable** rp, if (err != 0) return err; } - atomic_set(&(*rp)->u.dst.__refcnt, 1); return err; } else { return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6094db5..c10e443 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -485,11 +485,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; buff->tstamp = skb->tstamp; - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tp->lost_out -= tcp_skb_pcount(skb); - tp->left_out -= tcp_skb_pcount(skb); - } - old_factor = tcp_skb_pcount(skb); /* Fix up tso_factor for both original and new SKB. */ @@ -499,7 +494,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss /* If this packet has been sent out already, we must * adjust the various packet counters. */ - if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { + if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e5beca7..e0bd101 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,7 +1141,7 @@ int udp_rcv(struct sk_buff *skb) if (ulen > len || ulen < sizeof(*uh)) goto short_packet; - if (pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6d6fb74..2fea3f4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -123,8 +123,7 @@ DEFINE_RWLOCK(addrconf_lock); static void addrconf_verify(unsigned long); -static struct timer_list addr_chk_timer = - TIMER_INITIALIZER(addrconf_verify, 0, 0); +static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); static DEFINE_SPINLOCK(addrconf_verify_lock); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 01468fa..cc51840 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,10 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } /* source address lookup done in ip6_dst_lookup */ @@ -390,32 +388,101 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff; + put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } + if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } + + /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && opt->hop) { u8 *ptr = skb->nh.raw + opt->hop; put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.dstopts && opt->dst0) { + + if (opt->lastopt && + (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) { + /* + * Silly enough, but we need to reparse in order to + * report extension headers (except for HbH) + * in order. + * + * Also note that IPV6_RECVRTHDRDSTOPTS is NOT + * (and WILL NOT be) defined because + * IPV6_RECVDSTOPTS is more generic. --yoshfuji + */ + unsigned int off = sizeof(struct ipv6hdr); + u8 nexthdr = skb->nh.ipv6h->nexthdr; + + while (off <= opt->lastopt) { + unsigned len; + u8 *ptr = skb->nh.raw + off; + + switch(nexthdr) { + case IPPROTO_DSTOPTS: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.dstopts) + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); + break; + case IPPROTO_ROUTING: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.srcrt) + put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); + break; + case IPPROTO_AH: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 2; + break; + default: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + break; + } + + off += len; + } + } + + /* socket options in old style */ + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + + src_info.ipi6_ifindex = opt->iif; + ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); + put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = skb->nh.ipv6h->hop_limit; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } + if (np->rxopt.bits.ohopopts && opt->hop) { + u8 *ptr = skb->nh.raw + opt->hop; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = skb->nh.raw + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.srcrt && opt->srcrt) { + if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); } - if (np->rxopt.bits.dstopts && opt->dst1) { + if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = skb->nh.raw + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } return 0; } int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit) + int *hlimit, int *tclass) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -438,6 +505,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: + case IPV6_2292PKTINFO: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -492,6 +560,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); break; + case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; @@ -512,7 +581,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->hopopt = hdr; break; - case IPV6_DSTOPTS: + case IPV6_2292DSTOPTS: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; goto exit_f; @@ -536,6 +605,33 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->dst1opt = hdr; break; + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + if (cmsg->cmsg_type == IPV6_DSTOPTS) { + opt->opt_flen += len; + opt->dst1opt = hdr; + } else { + opt->opt_nflen += len; + opt->dst0opt = hdr; + } + break; + + case IPV6_2292RTHDR: case IPV6_RTHDR: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; @@ -568,7 +664,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->opt_nflen += len; opt->srcrt = rthdr; - if (opt->dst1opt) { + if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); opt->opt_nflen += dsthdrlen; @@ -579,6 +675,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; + case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; @@ -588,6 +685,24 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, *hlimit = *(int *)CMSG_DATA(cmsg); break; + case IPV6_TCLASS: + { + int tc; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + tc = *(int *)CMSG_DATA(cmsg); + if (tc < 0 || tc > 0xff) + goto exit_f; + + err = 0; + *tclass = tc; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5be6da2..9225495 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -164,6 +164,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) return -1; } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { @@ -243,6 +244,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) looped_back: if (hdr->segments_left == 0) { + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; @@ -404,8 +406,7 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) memcpy(opt->srcrt, hdr, sizeof(*hdr)); irthdr = (struct rt0_hdr*)opt->srcrt; - /* Obsolete field, MBZ, when originated by us */ - irthdr->bitmap = 0; + irthdr->reserved = 0; opt->srcrt->segments_left = n; for (i=0; i<n; i++) memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16); @@ -459,11 +460,10 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } - if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto drop; + return 1; drop: @@ -539,10 +539,15 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr) { - if (opt->srcrt) + if (opt->srcrt) { ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); - if (opt->dst0opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + /* + * IPV6_RTHDRDSTOPTS is ignored + * unless IPV6_RTHDR is set (RFC3542). + */ + if (opt->dst0opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + } if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } @@ -573,3 +578,97 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } return opt2; } + +static int ipv6_renew_option(void *ohdr, + struct ipv6_opt_hdr __user *newopt, int newoptlen, + int inherit, + struct ipv6_opt_hdr **hdr, + char **p) +{ + if (inherit) { + if (ohdr) { + memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); + *hdr = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr)); + } + } else { + if (newopt) { + if (copy_from_user(*p, newopt, newoptlen)) + return -EFAULT; + *hdr = (struct ipv6_opt_hdr *)*p; + if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen) + return -EINVAL; + *p += CMSG_ALIGN(newoptlen); + } + } + return 0; +} + +struct ipv6_txoptions * +ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, int newoptlen) +{ + int tot_len = 0; + char *p; + struct ipv6_txoptions *opt2; + int err; + + if (newtype != IPV6_HOPOPTS && opt->hopopt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); + if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); + if (newtype != IPV6_RTHDR && opt->srcrt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); + if (newtype != IPV6_DSTOPTS && opt->dst1opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + if (newopt && newoptlen) + tot_len += CMSG_ALIGN(newoptlen); + + if (!tot_len) + return NULL; + + opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC); + if (!opt2) + return ERR_PTR(-ENOBUFS); + + memset(opt2, 0, tot_len); + + opt2->tot_len = tot_len; + p = (char *)(opt2 + 1); + + err = ipv6_renew_option(opt->hopopt, newopt, newoptlen, + newtype != IPV6_HOPOPTS, + &opt2->hopopt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen, + newtype != IPV6_RTHDRDSTOPTS, + &opt2->dst0opt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->srcrt, newopt, newoptlen, + newtype != IPV6_RTHDR, + (struct ipv6_opt_hdr **)opt2->srcrt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen, + newtype != IPV6_DSTOPTS, + &opt2->dst1opt, &p); + if (err) + goto out; + + opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + + (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0); + opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); + + return opt2; +out: + sock_kfree_s(sk, p, tot_len); + return ERR_PTR(err); +} + diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fa8f1bb..b7185fb 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int iif = 0; int addr_type = 0; int len; - int hlimit; + int hlimit, tclass; int err = 0; if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) @@ -374,7 +374,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -385,6 +385,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -400,7 +404,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - hlimit, NULL, &fl, (struct rt6_info*)dst, + hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { ip6_flush_pending_frames(sk); @@ -434,6 +438,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + int tclass; saddr = &skb->nh.ipv6h->daddr; @@ -464,7 +469,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) goto out; if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out_dst_release; + goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; @@ -475,13 +480,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + idev = in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { @@ -496,7 +505,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) out_put: if (likely(idev != NULL)) in6_dev_put(idev); -out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 16af874..4fcc5a7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -92,7 +92,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); static __u32 rt_sernum; -static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0); +static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b6c73da5..f841bde 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -50,7 +50,7 @@ static atomic_t fl_size = ATOMIC_INIT(0); static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); -static struct timer_list ip6_fl_gc_timer = TIMER_INITIALIZER(ip6_fl_gc, 0, 0); +static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ @@ -225,16 +225,20 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, struct ipv6_txoptions * fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions * fl_opt = fl ? fl->opt : NULL; - if (fopt == NULL || fopt->opt_flen == 0) - return fl_opt; + if (fopt == NULL || fopt->opt_flen == 0) { + if (!fl_opt || !fl_opt->dst0opt || fl_opt->srcrt) + return fl_opt; + } if (fl_opt != NULL) { opt_space->hopopt = fl_opt->hopopt; - opt_space->dst0opt = fl_opt->dst0opt; + opt_space->dst0opt = fl_opt->srcrt ? fl_opt->dst0opt : NULL; opt_space->srcrt = fl_opt->srcrt; opt_space->opt_nflen = fl_opt->opt_nflen; + if (fl_opt->dst0opt && !fl_opt->srcrt) + opt_space->opt_nflen -= ipv6_optlen(fl_opt->dst0opt); } else { if (fopt->opt_nflen == 0) return fopt; @@ -310,7 +314,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk); + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01ef94f..2f589f2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,7 +166,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; - int hlimit; + int hlimit, tclass; u32 mtu; if (opt) { @@ -202,7 +202,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, * Fill in the IPv6 header */ - *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; hlimit = -1; if (np) hlimit = np->hop_limit; @@ -211,6 +210,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = -1; + if (np) + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + + *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; @@ -762,10 +769,11 @@ out_err_release: return err; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, - unsigned int flags) +int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + int offset, int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -803,6 +811,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; + np->cork.tclass = tclass; inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; @@ -1084,7 +1093,8 @@ int ip6_push_pending_frames(struct sock *sk) skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + *(u32*)hdr = fl->fl6_flowlabel | + htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0961372..cf94372 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -673,11 +673,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); - else + else { dst = ip6_route_output(NULL, &fl); - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) - goto tx_err_link_failure; + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) + goto tx_err_link_failure; + } tdev = dst->dev; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76466af..8567873 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,39 +210,139 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: np->rxopt.bits.rxinfo = valbool; retv = 0; break; + + case IPV6_2292PKTINFO: + np->rxopt.bits.rxoinfo = valbool; + retv = 0; + break; - case IPV6_HOPLIMIT: + case IPV6_RECVHOPLIMIT: np->rxopt.bits.rxhlim = valbool; retv = 0; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + np->rxopt.bits.rxohlim = valbool; + retv = 0; + break; + + case IPV6_RECVRTHDR: if (val < 0 || val > 2) goto e_inval; np->rxopt.bits.srcrt = val; retv = 0; break; - case IPV6_HOPOPTS: + case IPV6_2292RTHDR: + if (val < 0 || val > 2) + goto e_inval; + np->rxopt.bits.osrcrt = val; + retv = 0; + break; + + case IPV6_RECVHOPOPTS: np->rxopt.bits.hopopts = valbool; retv = 0; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + np->rxopt.bits.ohopopts = valbool; + retv = 0; + break; + + case IPV6_RECVDSTOPTS: np->rxopt.bits.dstopts = valbool; retv = 0; break; + case IPV6_2292DSTOPTS: + np->rxopt.bits.odstopts = valbool; + retv = 0; + break; + + case IPV6_TCLASS: + if (val < 0 || val > 0xff) + goto e_inval; + np->tclass = val; + retv = 0; + break; + + case IPV6_RECVTCLASS: + np->rxopt.bits.rxtclass = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; break; - case IPV6_PKTOPTIONS: + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + struct ipv6_txoptions *opt; + if (optlen == 0) + optval = 0; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) + break; + + retv = -EINVAL; + if (optlen & 0x7 || optlen > 8 * 255) + break; + + opt = ipv6_renew_options(sk, np->opt, optname, + (struct ipv6_opt_hdr __user *)optval, + optlen); + if (IS_ERR(opt)) { + retv = PTR_ERR(opt); + break; + } + + /* routing header option needs extra check */ + if (optname == IPV6_RTHDR && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = opt->srcrt; + if (rthdr->type) + goto sticky_done; + if ((rthdr->hdrlen & 1) || + (rthdr->hdrlen >> 1) != rthdr->segments_left) + goto sticky_done; + } + + retv = 0; + if (sk->sk_type == SOCK_STREAM) { + if (opt) { + struct tcp_sock *tp = tcp_sk(sk); + if (!((1 << sk->sk_state) & + (TCPF_LISTEN | TCPF_CLOSE)) + && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } + } + opt = xchg(&np->opt, opt); + sk_dst_reset(sk); + } else { + write_lock(&sk->sk_dst_lock); + opt = xchg(&np->opt, opt); + write_unlock(&sk->sk_dst_lock); + sk_dst_reset(sk); + } +sticky_done: + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + break; + } + + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; @@ -276,7 +376,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk); + retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -529,6 +629,17 @@ e_inval: return -EINVAL; } +int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr, + char __user *optval, int len) +{ + if (!hdr) + return 0; + len = min_t(int, len, ipv6_optlen(hdr)); + if (copy_to_user(optval, hdr, ipv6_optlen(hdr))) + return -EFAULT; + return len; +} + int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -567,7 +678,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, return err; } - case IPV6_PKTOPTIONS: + case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; @@ -601,6 +712,16 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + src_info.ipi6_ifindex = np->mcast_oif; + ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = np->mcast_hops; + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -625,26 +746,67 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->ipv6only; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; - case IPV6_HOPLIMIT: + case IPV6_2292PKTINFO: + val = np->rxopt.bits.rxoinfo; + break; + + case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + val = np->rxopt.bits.rxohlim; + break; + + case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; + case IPV6_2292RTHDR: + val = np->rxopt.bits.osrcrt; + break; + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + + lock_sock(sk); + len = ipv6_getsockopt_sticky(sk, np->opt->hopopt, + optval, len); + release_sock(sk); + return put_user(len, optlen); + } + + case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + val = np->rxopt.bits.ohopopts; + break; + + case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; + case IPV6_2292DSTOPTS: + val = np->rxopt.bits.odstopts; + break; + + case IPV6_TCLASS: + val = np->tclass; + break; + + case IPV6_RECVTCLASS: + val = np->rxopt.bits.rxtclass; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a7eae30..555a313 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -447,10 +447,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } if (inc_opt) { if (dev->addr_len) @@ -539,10 +537,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); send_llinfo = dev->addr_len && !ipv6_addr_any(saddr); @@ -616,10 +612,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) { - dst_release(dst); + if (err < 0) return; - } len = sizeof(struct icmp6hdr); if (dev->addr_len) @@ -1353,10 +1347,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err) { - dst_release(dst); + if (err) return; - } rt = (struct rt6_info *) dst; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 14316c3..b03e87a 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -100,11 +100,8 @@ static void send_reset(struct sk_buff *oldskb) dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; - if (dst->error || - xfrm_lookup(&dst, &fl, NULL, 0)) { - dst_release(dst); + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) return; - } hh_len = (dst->dev->hard_header_len + 15)&~15; nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 9b91dec..4de4cda 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/rcupdate.h> #include <net/sock.h> #include <linux/netfilter_ipv6/ip6t_owner.h> diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index a9526b7..2bb6700 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -161,8 +161,8 @@ match(const struct sk_buff *skb, ((rtinfo->hdrlen == hdrlen) ^ !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); DEBUGP("res %02X %02X %02X ", - (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->bitmap, - !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->bitmap))); + (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->reserved, + !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->reserved))); ret = (rh != NULL) && @@ -179,12 +179,12 @@ match(const struct sk_buff *skb, !!(rtinfo->invflags & IP6T_RT_INV_TYP))); if (ret && (rtinfo->flags & IP6T_RT_RES)) { - u_int32_t *bp, _bitmap; - bp = skb_header_pointer(skb, - ptr + offsetof(struct rt0_hdr, bitmap), - sizeof(_bitmap), &_bitmap); + u_int32_t *rp, _reserved; + rp = skb_header_pointer(skb, + ptr + offsetof(struct rt0_hdr, reserved), + sizeof(_reserved), &_reserved); - ret = (*bp == 0); + ret = (*rp == 0); } DEBUGP("#%d ",rtinfo->addrnr); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ed3a76b..5aa3691 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -655,6 +655,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct flowi fl; int addr_len = msg->msg_namelen; int hlimit = -1; + int tclass = -1; u16 proto; int err; @@ -740,7 +741,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -755,8 +756,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; rawv6_probe_proto_opt(&fl, msg); @@ -782,10 +782,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) @@ -798,6 +796,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -806,8 +810,9 @@ back_from_confirm: err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); } else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 9d9e043..e4fe9ee 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -479,12 +479,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto err; - if (end-offset < skb->len) { - if (pskb_trim(skb, end - offset)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } + + if (pskb_trim_rcsum(skb, end - offset)) + goto err; /* Find out which fragments are in front and at the back of us * in the chain of fragments so far. We must know where to put diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 794734f..80643e6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -632,10 +632,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto failure; - } if (saddr == NULL) { saddr = &fl.fl6_src; @@ -849,7 +847,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; if (opt == NULL && - np->rxopt.bits.srcrt == 2 && + np->rxopt.bits.osrcrt == 2 && treq->pktopts) { struct sk_buff *pktopts = treq->pktopts; struct inet6_skb_parm *rxopt = IP6CB(pktopts); @@ -888,7 +886,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, } done: - dst_release(dst); if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); return err; @@ -915,11 +912,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { - if ((opt->hop && np->rxopt.bits.hopopts) || - ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) && - np->rxopt.bits.rxflow) || - (opt->srcrt && np->rxopt.bits.srcrt) || - ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts)) + if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return 1; } return 0; @@ -1001,10 +997,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); @@ -1068,10 +1062,8 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.fl_ip_sport = t1->source; if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { - if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) { - dst_release(buff->dst); + if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) return; - } ip6_xmit(NULL, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; @@ -1190,8 +1182,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb->h.th); treq->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || - np->rxopt.bits.rxhlim) { + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); treq->pktopts = skb; } @@ -1288,7 +1280,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.srcrt == 2 && + if (np->rxopt.bits.osrcrt == 2 && opt == NULL && treq->pktopts) { struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); if (rxopt->srcrt) @@ -1544,9 +1536,9 @@ ipv6_pktoptions: tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - if (np->rxopt.bits.rxinfo) + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); - if (np->rxopt.bits.rxhlim) + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); @@ -1734,7 +1726,6 @@ static int tcp_v6_rebuild_header(struct sock *sk) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; - dst_release(dst); return err; } @@ -1787,7 +1778,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; - dst_release(dst); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 390d750..69b1468 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,7 +483,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) } if (ulen < skb->len) { - if (__pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto discard; saddr = &skb->nh.ipv6h->saddr; daddr = &skb->nh.ipv6h->daddr; @@ -637,6 +637,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int ulen = len; int hlimit = -1; + int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; @@ -758,7 +759,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit); + err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -773,8 +774,7 @@ do_udp_sendmsg: } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl->proto = IPPROTO_UDP; ipv6_addr_copy(&fl->fl6_dst, daddr); @@ -799,10 +799,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl->fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) { - dst_release(dst); + if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) goto out; - } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl->fl6_dst)) @@ -815,6 +813,12 @@ do_udp_sendmsg: hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -834,9 +838,10 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), - hlimit, opt, fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, fl, + (struct rt6_info*)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 5d1e611..6f20b420 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -567,10 +567,8 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) self->tty = NULL; if (self->blocked_open) { - if (self->close_delay) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(self->close_delay); - } + if (self->close_delay) + schedule_timeout_interruptible(self->close_delay); wake_up_interruptible(&self->open_wait); } @@ -863,8 +861,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) spin_lock_irqsave(&self->spinlock, flags); while (self->tx_skb && self->tx_skb->len) { spin_unlock_irqrestore(&self->spinlock, flags); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(poll_time); + schedule_timeout_interruptible(poll_time); spin_lock_irqsave(&self->spinlock, flags); if (signal_pending(current)) break; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index f4578c7..e5d82d7 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -56,6 +56,7 @@ int sysctl_netrom_transport_requested_window_size = NR_DEFAULT_WINDOW; int sysctl_netrom_transport_no_activity_timeout = NR_DEFAULT_IDLE; int sysctl_netrom_routing_control = NR_DEFAULT_ROUTING; int sysctl_netrom_link_fails_count = NR_DEFAULT_FAILS; +int sysctl_netrom_reset_circuit = NR_DEFAULT_RESET; static unsigned short circuit = 0x101; @@ -908,17 +909,17 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) if (frametype != NR_CONNREQ) { /* * Here it would be nice to be able to send a reset but - * NET/ROM doesn't have one. The following hack would - * have been a way to extend the protocol but apparently - * it kills BPQ boxes... :-( + * NET/ROM doesn't have one. We've tried to extend the protocol + * by sending NR_CONNACK | NR_CHOKE_FLAGS replies but that + * apparently kills BPQ boxes... :-( + * So now we try to follow the established behaviour of + * G8PZT's Xrouter which is sending packets with command type 7 + * as an extension of the protocol. */ -#if 0 - /* - * Never reply to a CONNACK/CHOKE. - */ - if (frametype != NR_CONNACK || flags != NR_CHOKE_FLAG) - nr_transmit_refusal(skb, 1); -#endif + if (sysctl_netrom_reset_circuit && + (frametype != NR_RESET || flags != 0)) + nr_transmit_reset(skb, 1); + return 0; } @@ -1187,9 +1188,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCGSTAMP: - ret = -EINVAL; - if (sk != NULL) - ret = sock_get_timestamp(sk, argp); + ret = sock_get_timestamp(sk, argp); release_sock(sk); return ret; @@ -1393,8 +1392,7 @@ static int __init nr_proto_init(void) struct net_device *dev; sprintf(name, "nr%d", i); - dev = alloc_netdev(sizeof(struct net_device_stats), name, - nr_setup); + dev = alloc_netdev(sizeof(struct nr_private), name, nr_setup); if (!dev) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); goto fail; diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 263da4c..4e66eef 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -47,7 +47,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = netdev_priv(dev); if (!netif_running(dev)) { - stats->rx_errors++; + stats->rx_dropped++; return 0; } @@ -71,15 +71,10 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) static int nr_rebuild_header(struct sk_buff *skb) { - struct net_device *dev = skb->dev; - struct net_device_stats *stats = netdev_priv(dev); - struct sk_buff *skbn; unsigned char *bp = skb->data; - int len; - if (arp_find(bp + 7, skb)) { + if (arp_find(bp + 7, skb)) return 1; - } bp[6] &= ~AX25_CBIT; bp[6] &= ~AX25_EBIT; @@ -90,27 +85,7 @@ static int nr_rebuild_header(struct sk_buff *skb) bp[6] |= AX25_EBIT; bp[6] |= AX25_SSSID_SPARE; - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { - kfree_skb(skb); - return 1; - } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - kfree_skb(skb); - - len = skbn->len; - - if (!nr_route_frame(skbn, NULL)) { - kfree_skb(skbn); - stats->tx_errors++; - } - - stats->tx_packets++; - stats->tx_bytes += len; - - return 1; + return 0; } #else @@ -185,15 +160,27 @@ static int nr_close(struct net_device *dev) static int nr_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = netdev_priv(dev); - dev_kfree_skb(skb); - stats->tx_errors++; + struct nr_private *nr = netdev_priv(dev); + struct net_device_stats *stats = &nr->stats; + unsigned int len = skb->len; + + if (!nr_route_frame(skb, NULL)) { + kfree_skb(skb); + stats->tx_errors++; + return 0; + } + + stats->tx_packets++; + stats->tx_bytes += len; + return 0; } static struct net_device_stats *nr_get_stats(struct net_device *dev) { - return netdev_priv(dev); + struct nr_private *nr = netdev_priv(dev); + + return &nr->stats; } void nr_setup(struct net_device *dev) @@ -208,12 +195,11 @@ void nr_setup(struct net_device *dev) dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; - dev->tx_queue_len = 40; dev->rebuild_header = nr_rebuild_header; dev->set_mac_address = nr_set_mac_address; /* New-style flags. */ - dev->flags = 0; + dev->flags = IFF_NOARP; dev->get_stats = nr_get_stats; } diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 64b81a7..004e859 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -98,6 +98,11 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, nr_disconnect(sk, ECONNREFUSED); break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } @@ -124,6 +129,11 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, nr_disconnect(sk, 0); break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } @@ -254,6 +264,11 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype } break; + case NR_RESET: + if (sysctl_netrom_reset_circuit); + nr_disconnect(sk, ECONNRESET); + break; + default: break; } diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 165b2ab..e856ae1 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -17,7 +17,7 @@ static void nr_loopback_timer(unsigned long); static struct sk_buff_head loopback_queue; -static struct timer_list loopback_timer = TIMER_INITIALIZER(nr_loopback_timer, 0, 0); +static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0); void __init nr_loopback_init(void) { diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 587bed2..bcb9946 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -210,10 +210,9 @@ void nr_write_internal(struct sock *sk, int frametype) } /* - * This routine is called when a Connect Acknowledge with the Choke Flag - * set is needed to refuse a connection. + * This routine is called to send an error reply. */ -void nr_transmit_refusal(struct sk_buff *skb, int mine) +void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) { struct sk_buff *skbn; unsigned char *dptr; @@ -254,7 +253,7 @@ void nr_transmit_refusal(struct sk_buff *skb, int mine) *dptr++ = 0; } - *dptr++ = NR_CONNACK | NR_CHOKE_FLAG; + *dptr++ = cmdflags; *dptr++ = 0; if (!nr_route_frame(skbn, NULL)) diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index c9ed503..6bb8dda 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -30,6 +30,7 @@ static int min_idle[] = {0 * HZ}; static int max_idle[] = {65535 * HZ}; static int min_route[] = {0}, max_route[] = {1}; static int min_fails[] = {1}, max_fails[] = {10}; +static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; @@ -155,6 +156,17 @@ static ctl_table nr_table[] = { .extra1 = &min_fails, .extra2 = &max_fails }, + { + .ctl_name = NET_NETROM_RESET, + .procname = "reset", + .data = &sysctl_netrom_reset_circuit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_reset, + .extra2 = &max_reset + }, { .ctl_name = 0 } }; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 3077878..5acb168 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1243,7 +1243,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); if (amount < 0) amount = 0; - return put_user(amount, (unsigned int __user *)argp); + return put_user(amount, (unsigned int __user *) argp); } case TIOCINQ: { @@ -1252,13 +1252,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /* These two are safe on a single CPU system as only user tasks fiddle here */ if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; - return put_user(amount, (unsigned int __user *)argp); + return put_user(amount, (unsigned int __user *) argp); } case SIOCGSTAMP: - if (sk != NULL) - return sock_get_timestamp(sk, (struct timeval __user *)argp); - return -EINVAL; + return sock_get_timestamp(sk, (struct timeval __user *) argp); case SIOCGIFADDR: case SIOCSIFADDR: diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index a8ed9a1..d297af7 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -149,6 +149,6 @@ void rose_setup(struct net_device *dev) dev->set_mac_address = rose_set_mac_address; /* New-style flags. */ - dev->flags = 0; + dev->flags = IFF_NOARP; dev->get_stats = rose_get_stats; } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 02891ce..36a7794 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -337,13 +337,13 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->source_call = *asc2ax(callsign); + asc2ax(&facilities->source_call, callsign); } if (*p == FAC_CCITT_SRC_NSAP) { memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); callsign[l - 10] = '\0'; - facilities->dest_call = *asc2ax(callsign); + asc2ax(&facilities->dest_call, callsign); } p += l + 2; n += l + 2; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 737681c..31570b9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1194,7 +1194,7 @@ EXPORT_SYMBOL(psched_time_base); * with 32-bit get_cycles(). Safe up to 4GHz CPU. */ static void psched_tick(unsigned long); -static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0); +static DEFINE_TIMER(psched_timer, psched_tick, 0, 0); static void psched_tick(unsigned long dummy) { diff --git a/net/socket.c b/net/socket.c index e1bd5d84..c699e93 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1745,10 +1745,11 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) goto out_freeiov; ctl_len = msg_sys.msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl)); + err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); if (err) goto out_freeiov; ctl_buf = msg_sys.msg_control; + ctl_len = msg_sys.msg_controllen; } else if (ctl_len) { if (ctl_len > sizeof(ctl)) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 05fe2e7..51885b5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1170,8 +1170,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) while (rqstp->rq_arghi < pages) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ/2); + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); continue; } rqstp->rq_argpages[rqstp->rq_arghi++] = p; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 83c8135..fda737d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -765,8 +765,8 @@ restart: switch (policy->action) { case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - xfrm_pol_put(policy); - return -EPERM; + err = -EPERM; + goto error; case XFRM_POLICY_ALLOW: if (policy->xfrm_nr == 0) { @@ -782,8 +782,8 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - xfrm_pol_put(policy); - return PTR_ERR(dst); + err = PTR_ERR(dst); + goto error; } if (dst) |