From 1e9af2c480a638933876e637c1f3b9f1b7bad3e9 Mon Sep 17 00:00:00 2001 From: rrs Date: Sat, 14 Jul 2007 09:36:28 +0000 Subject: - Modular congestion control, with RFC2581 being the default. - CMT_PF states added (w/sysctl to turn the PF version on) - sctp_input.c had a missing incr of cookie case when the auth was bad. This meant a free was called without an increment to refcnt, added increment like rest of code. - There was a case, unlikely, when the scope of the destination changed (this is a TSNH case). In that case, it would not free the alloc'ed asoc (in sctp_input.c). - When listed addresses found a colliding cookie/Init, then the collided upon tcb was not unlocked in sctp_pcb.c - Add error checking on arguments of sctp_sendx(3) to prevent it from referencing a NULL pointer. - Fix an error return of sctp_sendx(3), it was returing ENOMEM not -1. - Get assoc id was changed to use the sanctified socket api method for getting a assoc id (PEER_ADDR_INFO instead of PEER_ADDR_PARAMS). - Fix it so a peeled off socket will get a proper error return if it trys to send to a different address then it is connected to. - Fix so that select_a_stream can avoid an endless loop that could hang a caller. - time_entered (state set time) was not being set in all cases to the time we went established. Approved by: re(ken smith) --- sys/netinet/sctp_input.c | 236 +++++++++++++++++------------------------------ 1 file changed, 87 insertions(+), 149 deletions(-) (limited to 'sys/netinet/sctp_input.c') diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 5f29214..eaedc02 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -127,6 +127,8 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM); sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, vrf_id); + if (stcb) + *abort_no_unlock = 1; return; } if (init->num_inbound_streams == 0) { @@ -546,6 +548,25 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net); } } + /* + * JRS 5/14/07 - If CMT PF is on and the destination is in PF state, + * set the destination to active state and set the cwnd to one or + * two MTU's based on whether PF1 or PF2 is being used. If a T3 + * timer is running, for the destination, stop the timer because a + * PF-heartbeat was received. + */ + if (sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == + SCTP_ADDR_PF) { + if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { + sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, + stcb, net, + SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); + } + net->dest_state &= ~SCTP_ADDR_PF; + net->cwnd = net->mtu * sctp_cmt_pf; + SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n", + net, net->cwnd); + } /* Now lets do a RTO with this */ r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv); } @@ -559,7 +580,7 @@ sctp_handle_abort(struct sctp_abort_chunk *cp, return; /* stop any receive timers */ - sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5); + sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6); /* notify user of the abort and clean up... */ sctp_abort_notification(stcb, 0); /* free the tcb */ @@ -629,7 +650,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp, * stop the shutdown timer, since we WILL move to * SHUTDOWN-ACK-SENT. */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_7); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); } /* Now is there unsent data on a stream somewhere? */ some_on_streamwheel = sctp_is_there_unsent_data(stcb); @@ -693,7 +714,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, sctp_report_all_outbound(stcb, 0); } /* stop the timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); /* send SHUTDOWN-COMPLETE */ sctp_send_shutdown_complete(stcb, net); /* notify upper layer protocol */ @@ -708,7 +729,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp, SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB but first save off the ep */ sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, - SCTP_FROM_SCTP_INPUT + SCTP_LOC_9); + SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); } /* @@ -834,7 +855,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch, asoc->max_init_times) { sctp_abort_notification(stcb, 0); /* now free the asoc */ - sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_10); + sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); return (-1); } /* blast back to INIT state */ @@ -1156,8 +1177,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } /* we have already processed the INIT so no problem */ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, - net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_11); - sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12); + net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12); + sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13); /* update current state */ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) SCTP_STAT_INCR_COUNTER32(sctps_activeestab); @@ -1274,7 +1295,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, } if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 8; - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14); sctp_stop_all_cookie_timers(stcb); /* * since we did not send a HB make sure we don't double @@ -1305,6 +1326,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { if (chk->sent < SCTP_DATAGRAM_RESEND) { chk->sent = SCTP_DATAGRAM_RESEND; + sctp_flight_size_decrease(chk); + sctp_total_flight_decrease(stcb, chk); sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt); spec_flag++; } @@ -1386,8 +1409,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, /* temp code */ if (how_indx < sizeof(asoc->cookie_how)) asoc->cookie_how[how_indx] = 12; - sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14); - sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); + sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15); + sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); *sac_assoc_id = sctp_get_associd(stcb); /* notify upper layer */ @@ -1624,6 +1647,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC); sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen, sh, op_err, vrf_id); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, + SCTP_FROM_SCTP_INPUT + SCTP_LOC_16); atomic_add_int(&stcb->asoc.refcnt, -1); return (NULL); } @@ -1676,7 +1701,9 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, /* auth HMAC failed, dump the assoc and packet */ SCTPDBG(SCTP_DEBUG_AUTH1, "COOKIE-ECHO: AUTH failed\n"); + atomic_add_int(&stcb->asoc.refcnt, 1); sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18); + atomic_add_int(&stcb->asoc.refcnt, -1); return (NULL); } else { /* remaining chunks checked... good to go */ @@ -1771,6 +1798,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, } /* respond with a COOKIE-ACK */ /* calculate the RTT */ + (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); if ((netp) && (*netp)) { (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp, &cookie->time_entered); @@ -2363,20 +2391,11 @@ sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp, net = stcb->asoc.primary_destination; if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) { - int old_cwnd; - - old_cwnd = net->cwnd; - SCTP_STAT_INCR(sctps_ecnereducedcwnd); - net->ssthresh = net->cwnd / 2; - if (net->ssthresh < net->mtu) { - net->ssthresh = net->mtu; - /* here back off the timer as well, to slow us down */ - net->RTO <<= 1; - } - net->cwnd = net->ssthresh; - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); - } + /* + * JRS - Use the congestion control given in the pluggable + * CC module + */ + stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net); /* * we reduce once every RTT. So we will only lower cwnd at * the next sending seq i.e. the resync_tsn. @@ -2458,10 +2477,10 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp, } } /* stop the timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_21); + sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22); SCTP_STAT_INCR_COUNTER32(sctps_shutdown); /* free the TCB */ - sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22); + sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); return; } @@ -2570,7 +2589,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, /* restart the timer */ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, - stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23); + stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, tp1->whoTo); @@ -2635,7 +2654,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, * this, otherwise we let the timer fire. */ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, - stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24); + stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25); sctp_send_initiate(stcb->sctp_ep, stcb); } break; @@ -2645,7 +2664,13 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc, break; case SCTP_HEARTBEAT_REQUEST: /* resend a demand HB */ - (void)sctp_send_hb(stcb, 1, net); + if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) { + /* + * Only retransmit if we KNOW we wont destroy the + * tcb + */ + (void)sctp_send_hb(stcb, 1, net); + } break; case SCTP_SHUTDOWN: sctp_send_shutdown(stcb, net); @@ -2806,7 +2831,7 @@ sctp_clean_up_stream_reset(struct sctp_tcb *stcb) } asoc = &stcb->asoc; - sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25); + sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26); TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); @@ -3203,7 +3228,6 @@ strres_nochunk: num_req++; - req_in = (struct sctp_stream_reset_in_request *)ph; sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc); @@ -3398,112 +3422,8 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp, * Note if a T3 timer has went off, we will prohibit any * changes to cwnd until we exit the t3 loss recovery. */ - uint32_t bw_avail; - int rtt, incr; - - int old_cwnd = net->cwnd; - - /* need real RTT for this calc */ - rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; - /* get bottle neck bw */ - bottle_bw = ntohl(cp->bottle_bw); - /* and whats on queue */ - on_queue = ntohl(cp->current_onq); - /* - * adjust the on-queue if our flight is more it could be - * that the router has not yet gotten data "in-flight" to it - */ - if (on_queue < net->flight_size) - on_queue = net->flight_size; - - /* calculate the available space */ - bw_avail = (bottle_bw * rtt) / 1000; - if (bw_avail > bottle_bw) { - /* - * Cap the growth to no more than the bottle neck. - * This can happen as RTT slides up due to queues. - * It also means if you have more than a 1 second - * RTT with a empty queue you will be limited to the - * bottle_bw per second no matter if other points - * have 1/2 the RTT and you could get more out... - */ - bw_avail = bottle_bw; - } - if (on_queue > bw_avail) { - /* - * No room for anything else don't allow anything - * else to be "added to the fire". - */ - int seg_inflight, seg_onqueue, my_portion; - - net->partial_bytes_acked = 0; - - /* how much are we over queue size? */ - incr = on_queue - bw_avail; - if (stcb->asoc.seen_a_sack_this_pkt) { - /* - * undo any cwnd adjustment that the sack - * might have made - */ - net->cwnd = net->prev_cwnd; - } - /* Now how much of that is mine? */ - seg_inflight = net->flight_size / net->mtu; - seg_onqueue = on_queue / net->mtu; - my_portion = (incr * seg_inflight) / seg_onqueue; - - /* Have I made an adjustment already */ - if (net->cwnd > net->flight_size) { - /* - * for this flight I made an adjustment we - * need to decrease the portion by a share - * our previous adjustment. - */ - int diff_adj; - - diff_adj = net->cwnd - net->flight_size; - if (diff_adj > my_portion) - my_portion = 0; - else - my_portion -= diff_adj; - } - /* - * back down to the previous cwnd (assume we have - * had a sack before this packet). minus what ever - * portion of the overage is my fault. - */ - net->cwnd -= my_portion; - - /* we will NOT back down more than 1 MTU */ - if (net->cwnd <= net->mtu) { - net->cwnd = net->mtu; - } - /* force into CA */ - net->ssthresh = net->cwnd - 1; - } else { - /* - * Take 1/4 of the space left or max burst up .. - * whichever is less. - */ - incr = min((bw_avail - on_queue) >> 2, - stcb->asoc.max_burst * net->mtu); - net->cwnd += incr; - } - if (net->cwnd > bw_avail) { - /* We can't exceed the pipe size */ - net->cwnd = bw_avail; - } - if (net->cwnd < net->mtu) { - /* We always have 1 MTU */ - net->cwnd = net->mtu; - } - if (net->cwnd - old_cwnd != 0) { - /* log only changes */ - if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) { - sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), - SCTP_CWND_LOG_FROM_SAT); - } - } + stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb, + net, cp, &bottle_bw, &on_queue); } } @@ -3886,7 +3806,7 @@ process_control_chunks: } *offset = length; if (stcb) { - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27); } return (NULL); } @@ -4297,7 +4217,7 @@ process_control_chunks: *fwd_tsn_seen = 1; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); *offset = length; return (NULL); } @@ -4324,7 +4244,7 @@ process_control_chunks: } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* We are not interested anymore */ - sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29); + sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30); *offset = length; return (NULL); } @@ -4525,6 +4445,21 @@ sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net, } } +#ifdef INVARIANTS +static void +sctp_validate_no_locks(struct sctp_inpcb *inp) +{ + struct sctp_tcb *stcb; + + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + if (mtx_owned(&stcb->tcb_mtx)) { + panic("Own lock on stcb at return from input"); + } + } +} + +#endif + /* * common input chunk processing (v4 and v6) */ @@ -4586,19 +4521,19 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, /* "silently" ignore */ SCTP_STAT_INCR(sctps_recvauthmissing); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; } if (stcb == NULL) { /* out of the blue DATA chunk */ sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, vrf_id); - return; + goto out_now; } if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) { /* v_tag mismatch! */ SCTP_STAT_INCR(sctps_badvtag); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; } } @@ -4608,7 +4543,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * packet while processing control, or we're done with this * packet (done or skip rest of data), so we drop it... */ - return; + goto out_now; } /* * DATA chunk processing @@ -4654,7 +4589,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL, vrf_id); SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; break; case SCTP_STATE_EMPTY: /* should not happen */ case SCTP_STATE_INUSE: /* should not happen */ @@ -4662,7 +4597,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, case SCTP_STATE_SHUTDOWN_ACK_SENT: default: SCTP_TCB_UNLOCK(stcb); - return; + goto out_now; break; case SCTP_STATE_OPEN: case SCTP_STATE_SHUTDOWN_SENT: @@ -4681,7 +4616,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, * The association aborted, NO UNLOCK needed since * the association is destroyed. */ - return; + goto out_now; } data_processed = 1; if (retval == 0) { @@ -4708,7 +4643,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, sctp_sack_check(stcb, 1, was_a_gap, &abort_flag); if (abort_flag) { /* Again, we aborted so NO UNLOCK needed */ - return; + goto out_now; } } /* trigger send of any chunks in queue... */ @@ -4737,6 +4672,10 @@ trigger_send: sctp_auditing(2, inp, stcb, net); #endif SCTP_TCB_UNLOCK(stcb); +out_now: +#ifdef INVARIANTS + sctp_validate_no_locks(inp); +#endif return; } @@ -4932,7 +4871,6 @@ sctp_skip_csum_4: * I very much doubt any of the IPSEC stuff will work but I have no * idea, so I will leave it in place. */ - if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) { ipsec4stat.in_polvio++; SCTP_STAT_INCR(sctps_hdrops); -- cgit v1.1