From e9195d677d6f06730edd5c2a3fe3283564e39c51 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 22 Mar 2007 23:27:01 -0700 Subject: [TCP] vegas: Use type safe netlink interface Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_vegas.c') diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 5c484dceb967..87e72bef6d08 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -341,16 +341,14 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info *info; - - info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, - sizeof(*info))); - - info->tcpv_enabled = ca->doing_vegas_now; - info->tcpv_rttcnt = ca->cntRTT; - info->tcpv_rtt = ca->baseRTT; - info->tcpv_minrtt = ca->minRTT; - rtattr_failure: ; + struct tcpvegas_info info = { + .tcpv_enabled = ca->doing_vegas_now, + .tcpv_rttcnt = ca->cntRTT, + .tcpv_rtt = ca->baseRTT, + .tcpv_minrtt = ca->minRTT, + }; + + nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } } -- cgit v1.2.3 From 164891aadf1721fca4dce473bb0e0998181537c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 23 Apr 2007 22:26:16 -0700 Subject: [TCP]: Congestion control API update. Do some simple changes to make congestion control API faster/cleaner. * use ktime_t rather than timeval * merge rtt sampling into existing ack callback this means one indirect call versus two per ack. * use flags bits to store options/settings Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ include/net/tcp.h | 9 +++++---- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cong.c | 14 +++++++------- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_illinois.c | 16 +++++++--------- net/ipv4/tcp_input.c | 25 ++++++++----------------- net/ipv4/tcp_lp.c | 8 +++++--- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_vegas.c | 10 +++++++--- net/ipv4/tcp_veno.c | 10 +++++++--- net/ipv4/tcp_westwood.c | 2 +- net/ipv4/tcp_yeah.c | 6 ++++-- net/ipv4/tcp_yeah.h | 7 +++++-- 15 files changed, 65 insertions(+), 55 deletions(-) (limited to 'net/ipv4/tcp_vegas.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50f6f6a094cf..2694cb3ca763 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb) skb->tstamp = ktime_get_real(); } +static inline ktime_t net_timedelta(ktime_t t) +{ + return ktime_sub(ktime_get_real(), t); +} + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index 43910fe3c448..a385797f160a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -629,9 +629,12 @@ enum tcp_ca_event { #define TCP_CA_MAX 128 #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) +#define TCP_CONG_NON_RESTRICTED 0x1 +#define TCP_CONG_RTT_STAMP 0x2 + struct tcp_congestion_ops { struct list_head list; - int non_restricted; + unsigned long flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); @@ -645,8 +648,6 @@ struct tcp_congestion_ops { /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); - /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -654,7 +655,7 @@ struct tcp_congestion_ops { /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last); /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 5730333cd0ac..281c9f913257 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ccd88407e0cd..86b26539e54b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name) #endif if (ca) { - ca->non_restricted = 1; /* default is always allowed */ + ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } @@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (!ca->non_restricted) + if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", @@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val) } } - /* pass 2 clear */ + /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) - ca->non_restricted = 0; + ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) - ca->non_restricted = 1; + ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); @@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (!ca) err = -ENOENT; - else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) + else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) err = -EPERM; else if (!try_module_get(ca->owner)) @@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk) EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); struct tcp_congestion_ops tcp_reno = { + .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", - .non_restricted = 1, .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 296845be912b..14224487b16b 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 1020eb48d8d1..4ba4a7ae0a85 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index ae6298600886..8e3165917f72 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk) } /* Measure RTT for each ack. */ -static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt) +static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) { struct illinois *ca = inet_csk_ca(sk); + u32 rtt; + + ca->acked = pkts_acked; + + rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC; /* ignore bogus values, this prevents wraparound in alpha math */ if (rtt > RTT_MAX) @@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt) ca->sum_rtt += rtt; } -/* Capture count of packets covered by ack, to adjust for delayed acks */ -static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked) -{ - struct illinois *ca = inet_csk_ca(sk); - ca->acked = pkts_acked; -} - /* Maximum queuing delay */ static inline u32 max_delay(const struct illinois *ca) { @@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_illinois = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, .min_cwnd = tcp_reno_min_cwnd, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, - .rtt_sample = tcp_illinois_rtt_sample, .get_info = tcp_illinois_info, .pkts_acked = tcp_illinois_acked, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 633389390788..051f0f815f17 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static u32 tcp_usrtt(struct timeval *tv) -{ - struct timeval now; - - do_gettimeofday(&now); - return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); -} - /* Remove acknowledged frames from the retransmission queue. */ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) { @@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) int acked = 0; __s32 seq_rtt = -1; u32 pkts_acked = 0; - void (*rtt_sample)(struct sock *sk, u32 usrtt) - = icsk->icsk_ca_ops->rtt_sample; - struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; + ktime_t last_ackt = ktime_set(0,0); while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { @@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) seq_rtt = -1; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - skb_get_timestamp(skb, &tv); + last_ackt = skb->tstamp; } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); @@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - skb_get_timestamp(skb, &tv); + last_ackt = skb->tstamp; } tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); @@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } if (acked&FLAG_ACKED) { + const struct tcp_congestion_ops *ca_ops + = inet_csk(sk)->icsk_ca_ops; + tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk); - if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) - (*rtt_sample)(sk, tcp_usrtt(&tv)); - if (icsk->icsk_ca_ops->pkts_acked) - icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); + if (ca_ops->pkts_acked) + ca_ops->pkts_acked(sk, pkts_acked, last_ackt); } #if FASTRETRANS_DEBUG > 0 diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index f0ebaf0e21cb..b4e062ab24a1 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) * 3. calc smoothed OWD (SOWD). * Most ideas come from the original TCP-LP implementation. */ -static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) +static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) { struct lp *lp = inet_csk_ca(sk); s64 mowd = tcp_lp_owd_calculator(sk); @@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ -static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) +static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + tcp_lp_rtt_sample(sk, ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC); + /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); @@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) } static struct tcp_congestion_ops tcp_lp = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_lp_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_lp_rtt_sample, .pkts_acked = tcp_lp_pkts_acked, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3a60aea744ae..e70a6840cb64 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* If congestion control is doing timestamping, we must * take such a timestamp before we potentially clone/copy. */ - if (icsk->icsk_ca_ops->rtt_sample) + if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) __net_timestamp(skb); if (likely(clone_it)) { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 87e72bef6d08..f4104eeb5f26 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) @@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_vegas = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_vegas_rtt_calc, + .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ce57bf302f6c..0b50d0607a0e 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk) } /* Do rtt sampling needed for Veno. */ -static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct veno *veno = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) @@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk) } static struct tcp_congestion_ops tcp_veno = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, .cong_avoid = tcp_veno_cong_avoid, - .rtt_sample = tcp_veno_rtt_calc, + .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, .cwnd_event = tcp_veno_cwnd_event, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ae1026a67720..e61e09dd513e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct westwood *w = inet_csk_ca(sk); if (cnt > 0) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 46dd1bee583a..81ef02c1649a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk) } -static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked) +static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); struct yeah *yeah = inet_csk_ca(sk); if (icsk->icsk_ca_state == TCP_CA_Open) yeah->pkts_acked = pkts_acked; + + tcp_vegas_pkts_acked(sk, pkts_acked, last); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, @@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { } static struct tcp_congestion_ops tcp_yeah = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .cong_avoid = tcp_yeah_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_vegas_rtt_calc, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h index a62d82038fd0..33ad5385c188 100644 --- a/net/ipv4/tcp_yeah.h +++ b/net/ipv4/tcp_yeah.h @@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) -- cgit v1.2.3 From 7752237e9f07b316f81aebdc43f0d7c9a4ba0acf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 23 Apr 2007 22:28:23 -0700 Subject: [TCP] TCP YEAH: Use vegas dont copy it. Rather than using a copy of vegas code, the YEAH code should just have it exported so there is common code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 31 +++++------- net/ipv4/tcp_vegas.h | 24 ++++++++++ net/ipv4/tcp_yeah.c | 53 ++++++++++----------- net/ipv4/tcp_yeah.h | 131 --------------------------------------------------- 4 files changed, 61 insertions(+), 178 deletions(-) create mode 100644 net/ipv4/tcp_vegas.h (limited to 'net/ipv4/tcp_vegas.c') diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index f4104eeb5f26..0f0ee7f732c3 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -38,6 +38,8 @@ #include +#include "tcp_vegas.h" + /* Default values of the Vegas variables, in fixed-point representation * with V_PARAM_SHIFT bits to the right of the binary point. */ @@ -54,17 +56,6 @@ module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); -/* Vegas variables */ -struct vegas { - u32 beg_snd_nxt; /* right edge during last RTT */ - u32 beg_snd_una; /* left edge during last RTT */ - u32 beg_snd_cwnd; /* saves the size of the cwnd */ - u8 doing_vegas_now;/* if true, do vegas for this RTT */ - u16 cntRTT; /* # of RTTs measured within last RTT */ - u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ - u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ -}; - /* There are several situations when we must "re-start" Vegas: * * o when a connection is established @@ -81,7 +72,7 @@ struct vegas { * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ -static inline void vegas_enable(struct sock *sk) +static void vegas_enable(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); @@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk) vegas->doing_vegas_now = 0; } -static void tcp_vegas_init(struct sock *sk) +void tcp_vegas_init(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; vegas_enable(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_init); /* Do RTT sampling needed for Vegas. * Basically we: @@ -120,7 +112,7 @@ static void tcp_vegas_init(struct sock *sk) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; @@ -138,8 +130,9 @@ static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) vegas->minRTT = min(vegas->minRTT, vrtt); vegas->cntRTT++; } +EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); -static void tcp_vegas_state(struct sock *sk, u8 ca_state) +void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) @@ -147,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) else vegas_disable(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_state); /* * If the connection is idle and we are restarting, @@ -157,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) * packets, _then_ we can make Vegas calculations * again. */ -static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) +void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) tcp_vegas_init(sk); } +EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int flag) @@ -339,8 +334,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { @@ -354,6 +348,7 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } } +EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas = { .flags = TCP_CONG_RTT_STAMP, diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h new file mode 100644 index 000000000000..502fa8183634 --- /dev/null +++ b/net/ipv4/tcp_vegas.h @@ -0,0 +1,24 @@ +/* + * TCP Vegas congestion control interface + */ +#ifndef __TCP_VEGAS_H +#define __TCP_VEGAS_H 1 + +/* Vegas variables */ +struct vegas { + u32 beg_snd_nxt; /* right edge during last RTT */ + u32 beg_snd_una; /* left edge during last RTT */ + u32 beg_snd_cwnd; /* saves the size of the cwnd */ + u8 doing_vegas_now;/* if true, do vegas for this RTT */ + u16 cntRTT; /* # of RTTs measured within last RTT */ + u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ + u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ +}; + +extern void tcp_vegas_init(struct sock *sk); +extern void tcp_vegas_state(struct sock *sk, u8 ca_state); +extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last); +extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); +extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); + +#endif /* __TCP_VEGAS_H */ diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 81ef02c1649a..545ed237ab53 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -6,13 +6,14 @@ * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf * */ +#include +#include +#include +#include -#include "tcp_yeah.h" +#include -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 +#include "tcp_vegas.h" #define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck #define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt @@ -26,14 +27,7 @@ /* YeAH variables */ struct yeah { - /* Vegas */ - u32 beg_snd_nxt; /* right edge during last RTT */ - u32 beg_snd_una; /* left edge during last RTT */ - u32 beg_snd_cwnd; /* saves the size of the cwnd */ - u8 doing_vegas_now;/* if true, do vegas for this RTT */ - u16 cntRTT; /* # of RTTs measured within last RTT */ - u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ - u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ + struct vegas vegas; /* must be first */ /* YeAH */ u32 lastQ; @@ -84,9 +78,10 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - } else if (!yeah->doing_reno_now) { + + else if (!yeah->doing_reno_now) { /* Scalable */ tp->snd_cwnd_cnt+=yeah->pkts_acked; @@ -110,19 +105,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, } } - /* The key players are v_beg_snd_una and v_beg_snd_nxt. + /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. * * These are so named because they represent the approximate values * of snd_una and snd_nxt at the beginning of the current RTT. More * precisely, they represent the amount of data sent during the RTT. * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, - * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding + * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding * bytes of data have been ACKed during the course of the RTT, giving * an "actual" rate of: * - * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) + * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration) * - * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, + * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una, * because delayed ACKs can cover more than one segment, so they * don't line up yeahly with the boundaries of RTTs. * @@ -132,7 +127,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ - if (after(ack, yeah->beg_snd_nxt)) { + if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -143,7 +138,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, * If we have 3 samples, we should be OK. */ - if (yeah->cntRTT > 2) { + if (yeah->vegas.cntRTT > 2) { u32 rtt, queue; u64 bw; @@ -158,18 +153,18 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, * of delayed ACKs, at the cost of noticing congestion * a bit later. */ - rtt = yeah->minRTT; + rtt = yeah->vegas.minRTT; /* Compute excess number of packets above bandwidth * Avoid doing full 64 bit divide. */ bw = tp->snd_cwnd; - bw *= rtt - yeah->baseRTT; + bw *= rtt - yeah->vegas.baseRTT; do_div(bw, rtt); queue = bw; if (queue > TCP_YEAH_ALPHA || - rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) { + rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { if (queue > TCP_YEAH_ALPHA && tp->snd_cwnd > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , @@ -208,13 +203,13 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - yeah->beg_snd_una = yeah->beg_snd_nxt; - yeah->beg_snd_nxt = tp->snd_nxt; - yeah->beg_snd_cwnd = tp->snd_cwnd; + yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt; + yeah->vegas.beg_snd_nxt = tp->snd_nxt; + yeah->vegas.beg_snd_cwnd = tp->snd_cwnd; /* Wipe the slate clean for the next RTT. */ - yeah->cntRTT = 0; - yeah->minRTT = 0x7fffffff; + yeah->vegas.cntRTT = 0; + yeah->vegas.minRTT = 0x7fffffff; } } diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h index 33ad5385c188..ed3b7198f23c 100644 --- a/net/ipv4/tcp_yeah.h +++ b/net/ipv4/tcp_yeah.h @@ -5,134 +5,3 @@ #include #include - -/* Vegas variables */ -struct vegas { - u32 beg_snd_nxt; /* right edge during last RTT */ - u32 beg_snd_una; /* left edge during last RTT */ - u32 beg_snd_cwnd; /* saves the size of the cwnd */ - u8 doing_vegas_now;/* if true, do vegas for this RTT */ - u16 cntRTT; /* # of RTTs measured within last RTT */ - u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ - u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ -}; - -/* There are several situations when we must "re-start" Vegas: - * - * o when a connection is established - * o after an RTO - * o after fast recovery - * o when we send a packet and there is no outstanding - * unacknowledged data (restarting an idle connection) - * - * In these circumstances we cannot do a Vegas calculation at the - * end of the first RTT, because any calculation we do is using - * stale info -- both the saved cwnd and congestion feedback are - * stale. - * - * Instead we must wait until the completion of an RTT during - * which we actually receive ACKs. - */ -static inline void vegas_enable(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct vegas *vegas = inet_csk_ca(sk); - - /* Begin taking Vegas samples next time we send something. */ - vegas->doing_vegas_now = 1; - - /* Set the beginning of the next send window. */ - vegas->beg_snd_nxt = tp->snd_nxt; - - vegas->cntRTT = 0; - vegas->minRTT = 0x7fffffff; -} - -/* Stop taking Vegas samples for now. */ -static inline void vegas_disable(struct sock *sk) -{ - struct vegas *vegas = inet_csk_ca(sk); - - vegas->doing_vegas_now = 0; -} - -static void tcp_vegas_init(struct sock *sk) -{ - struct vegas *vegas = inet_csk_ca(sk); - - vegas->baseRTT = 0x7fffffff; - vegas_enable(sk); -} - -static void tcp_vegas_state(struct sock *sk, u8 ca_state) -{ - - if (ca_state == TCP_CA_Open) - vegas_enable(sk); - else - vegas_disable(sk); -} - -/* Do RTT sampling needed for Vegas. - * Basically we: - * o min-filter RTT samples from within an RTT to get the current - * propagation delay + queuing delay (we are min-filtering to try to - * avoid the effects of delayed ACKs) - * o min-filter RTT samples from a much longer window (forever for now) - * to find the propagation delay (baseRTT) - */ -static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) -{ - struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt; - - /* Never allow zero rtt or baseRTT */ - vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; - - /* Filter to find propagation delay: */ - if (vrtt < vegas->baseRTT) - vegas->baseRTT = vrtt; - - /* Find the min RTT during the last RTT to find - * the current prop. delay + queuing delay: - */ - vegas->minRTT = min(vegas->minRTT, vrtt); - vegas->cntRTT++; -} - -/* - * If the connection is idle and we are restarting, - * then we don't want to do any Vegas calculations - * until we get fresh RTT samples. So when we - * restart, we reset our Vegas state to a clean - * slate. After we get acks for this flight of - * packets, _then_ we can make Vegas calculations - * again. - */ -static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) -{ - if (event == CA_EVENT_CWND_RESTART || - event == CA_EVENT_TX_START) - tcp_vegas_init(sk); -} - -/* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct sock *sk, u32 ext, - struct sk_buff *skb) -{ - const struct vegas *ca = inet_csk_ca(sk); - if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info *info; - - info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, - sizeof(*info))); - - info->tcpv_enabled = ca->doing_vegas_now; - info->tcpv_rttcnt = ca->cntRTT; - info->tcpv_rtt = ca->baseRTT; - info->tcpv_minrtt = ca->minRTT; - rtattr_failure: ; - } -} - - -- cgit v1.2.3 From 84299b3bc4eaedc0734fcc9052b01291e44445fc Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 24 Apr 2007 16:21:38 -0700 Subject: [TCP]: Fix linkage errors on i386. To avoid raw division, use ktime_to_timeval() to get usec. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ktime.h | 6 ++++++ net/ipv4/tcp_illinois.c | 2 +- net/ipv4/tcp_lp.c | 2 +- net/ipv4/tcp_vegas.c | 2 +- net/ipv4/tcp_veno.c | 2 +- 5 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_vegas.c') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 248305bb9a18..81bb9c7a4eb3 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -259,6 +259,12 @@ static inline s64 ktime_to_ns(const ktime_t kt) #endif +static inline s64 ktime_to_us(const ktime_t kt) +{ + struct timeval tv = ktime_to_timeval(kt); + return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 8e3165917f72..4adc47c55351 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -90,7 +90,7 @@ static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) ca->acked = pkts_acked; - rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC; + rtt = ktime_to_us(net_timedelta(last)); /* ignore bogus values, this prevents wraparound in alpha math */ if (rtt > RTT_MAX) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index b4e062ab24a1..43294ad9f63e 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -266,7 +266,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - tcp_lp_rtt_sample(sk, ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC); + tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 0f0ee7f732c3..73e19cf7df21 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -118,7 +118,7 @@ void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) u32 vrtt; /* Never allow zero rtt or baseRTT */ - vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; + vrtt = ktime_to_us(net_timedelta(last)) + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 0b50d0607a0e..9edb340f2f95 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -75,7 +75,7 @@ static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) u32 vrtt; /* Never allow zero rtt or baseRTT */ - vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; + vrtt = ktime_to_us(net_timedelta(last)) + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) -- cgit v1.2.3