/*
* net/dccp/ccids/ccid3.c
*
+ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
* Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
* Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include "../ccid.h"
#include "../dccp.h"
-#include "lib/packet_history.h"
-#include "lib/loss_interval.h"
-#include "lib/tfrc.h"
#include "ccid3.h"
#include <asm/unaligned.h>
/*
* Recalculate t_ipi and delta (should be called whenever X changes)
*/
-static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
+static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
{
/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
*
*/
static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
-
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
const __u64 old_x = hctx->ccid3hctx_x;
- ktime_t now = stamp? *stamp : ktime_get_real();
+ ktime_t now = stamp ? *stamp : ktime_get_real();
/*
* Handle IDLE periods: do not reduce below RFC3390 initial sending rate
ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
- switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_FBACK:
- /* RFC 3448, 4.4: Halve send rate directly */
+ if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+ else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+ goto out;
+
+ /*
+ * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
+ */
+ if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */
+ hctx->ccid3hctx_p == 0) {
+
+ /* halve send rate directly */
hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
(((__u64)hctx->ccid3hctx_s) << 6) /
TFRC_T_MBI);
-
- ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
- "bytes/s\n", dccp_role(sk), sk,
- ccid3_tx_state_name(hctx->ccid3hctx_state),
- (unsigned)(hctx->ccid3hctx_x >> 6));
- /* The value of R is still undefined and so we can not recompute
- * the timeout value. Keep initial value as per [RFC 4342, 5]. */
- t_nfb = TFRC_INITIAL_TIMEOUT;
ccid3_update_send_interval(hctx);
- break;
- case TFRC_SSTATE_FBACK:
+ } else {
/*
- * Modify the cached value of X_recv [RFC 3448, 4.4]
+ * Modify the cached value of X_recv
*
- * If (p == 0 || X_calc > 2 * X_recv)
+ * If (X_calc > 2 * X_recv)
* X_recv = max(X_recv / 2, s / (2 * t_mbi));
* Else
* X_recv = X_calc / 4;
*/
BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
- if (hctx->ccid3hctx_p == 0 ||
- (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
-
+ if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
hctx->ccid3hctx_x_recv =
max(hctx->ccid3hctx_x_recv / 2,
(((__u64)hctx->ccid3hctx_s) << 6) /
(2 * TFRC_T_MBI));
- } else {
+ else {
hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
hctx->ccid3hctx_x_recv <<= 4;
}
- /* Now recalculate X [RFC 3448, 4.3, step (4)] */
ccid3_hc_tx_update_x(sk, NULL);
- /*
- * Schedule no feedback timer to expire in
- * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
- * See comments in packet_recv() regarding the value of t_RTO.
- */
- t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
- break;
- case TFRC_SSTATE_NO_SENT:
- DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
- /* fall through */
- case TFRC_SSTATE_TERM:
- goto out;
}
+ ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
+ (unsigned long long)hctx->ccid3hctx_x);
+
+ /*
+ * Set new timeout for the nofeedback timer.
+ * See comments in packet_recv() regarding the value of t_RTO.
+ */
+ if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */
+ t_nfb = TFRC_INITIAL_TIMEOUT;
+ else
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
restart_timer:
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
return;
+ /* ... and only in the established state */
+ if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
+ hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+ return;
opt_recv = &hctx->ccid3hctx_options_received;
+ now = ktime_get_real();
- switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_FBACK:
- case TFRC_SSTATE_FBACK:
- now = ktime_get_real();
-
- /* estimate RTT from history if ACK number is valid */
- r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
- DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
- if (r_sample == 0) {
- DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
- dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
- (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
- return;
- }
+ /* Estimate RTT from history if ACK number is valid */
+ r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
+ DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
+ if (r_sample == 0) {
+ DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
+ dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
+ (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ return;
+ }
- /* Update receive rate in units of 64 * bytes/second */
- hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
- hctx->ccid3hctx_x_recv <<= 6;
+ /* Update receive rate in units of 64 * bytes/second */
+ hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+ hctx->ccid3hctx_x_recv <<= 6;
- /* Update loss event rate */
- pinv = opt_recv->ccid3or_loss_event_rate;
- if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
- hctx->ccid3hctx_p = 0;
- else /* can not exceed 100% */
- hctx->ccid3hctx_p = 1000000 / pinv;
- /*
- * Validate new RTT sample and update moving average
- */
- r_sample = dccp_sample_rtt(sk, r_sample);
- hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
+ /* Update loss event rate (which is scaled by 1e6) */
+ pinv = opt_recv->ccid3or_loss_event_rate;
+ if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
+ hctx->ccid3hctx_p = 0;
+ else /* can not exceed 100% */
+ hctx->ccid3hctx_p = scaled_div(1, pinv);
+ /*
+ * Validate new RTT sample and update moving average
+ */
+ r_sample = dccp_sample_rtt(sk, r_sample);
+ hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
+ /*
+ * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
+ */
+ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
- if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+ if (hctx->ccid3hctx_t_rto == 0) {
/*
- * Larger Initial Windows [RFC 4342, sec. 5]
+ * Initial feedback packet: Larger Initial Windows (4.2)
*/
hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
hctx->ccid3hctx_t_ld = now;
ccid3_update_send_interval(hctx);
- ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
- "R_sample=%uus, X=%u\n", dccp_role(sk),
- sk, hctx->ccid3hctx_s,
- dccp_sk(sk)->dccps_mss_cache, r_sample,
- (unsigned)(hctx->ccid3hctx_x >> 6));
-
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
- } else {
-
- /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
- if (hctx->ccid3hctx_p > 0)
- hctx->ccid3hctx_x_calc =
- tfrc_calc_x(hctx->ccid3hctx_s,
- hctx->ccid3hctx_rtt,
- hctx->ccid3hctx_p);
- ccid3_hc_tx_update_x(sk, &now);
-
- ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
- "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
- dccp_role(sk),
- sk, hctx->ccid3hctx_rtt, r_sample,
- hctx->ccid3hctx_s, hctx->ccid3hctx_p,
- hctx->ccid3hctx_x_calc,
- (unsigned)(hctx->ccid3hctx_x_recv >> 6),
- (unsigned)(hctx->ccid3hctx_x >> 6));
+ goto done_computing_x;
+ } else if (hctx->ccid3hctx_p == 0) {
+ /*
+ * First feedback after nofeedback timer expiry (4.3)
+ */
+ goto done_computing_x;
}
+ }
- /* unschedule no feedback timer */
- sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+ /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
+ if (hctx->ccid3hctx_p > 0)
+ hctx->ccid3hctx_x_calc =
+ tfrc_calc_x(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p);
+ ccid3_hc_tx_update_x(sk, &now);
+
+done_computing_x:
+ ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
+ "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
+ dccp_role(sk),
+ sk, hctx->ccid3hctx_rtt, r_sample,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+ hctx->ccid3hctx_x_calc,
+ (unsigned)(hctx->ccid3hctx_x_recv >> 6),
+ (unsigned)(hctx->ccid3hctx_x >> 6));
- /*
- * As we have calculated new ipi, delta, t_nom it is possible
- * that we now can send a packet, so wake up dccp_wait_for_ccid
- */
- sk->sk_write_space(sk);
+ /* unschedule no feedback timer */
+ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
- /*
- * Update timeout interval for the nofeedback timer.
- * We use a configuration option to increase the lower bound.
- * This can help avoid triggering the nofeedback timer too
- * often ('spinning') on LANs with small RTTs.
- */
- hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
- CONFIG_IP_DCCP_CCID3_RTO *
- (USEC_PER_SEC/1000));
- /*
- * Schedule no feedback timer to expire in
- * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
- */
- t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+ /*
+ * As we have calculated new ipi, delta, t_nom it is possible
+ * that we now can send a packet, so wake up dccp_wait_for_ccid
+ */
+ sk->sk_write_space(sk);
- ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
- "expire in %lu jiffies (%luus)\n",
- dccp_role(sk),
- sk, usecs_to_jiffies(t_nfb), t_nfb);
+ /*
+ * Update timeout interval for the nofeedback timer.
+ * We use a configuration option to increase the lower bound.
+ * This can help avoid triggering the nofeedback timer too
+ * often ('spinning') on LANs with small RTTs.
+ */
+ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+ (CONFIG_IP_DCCP_CCID3_RTO *
+ (USEC_PER_SEC / 1000)));
+ /*
+ * Schedule no feedback timer to expire in
+ * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
+ */
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + usecs_to_jiffies(t_nfb));
- break;
- case TFRC_SSTATE_NO_SENT: /* fall through */
- case TFRC_SSTATE_TERM: /* ignore feedback when closing */
- break;
- }
+ ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
+ "expire in %lu jiffies (%luus)\n",
+ dccp_role(sk),
+ sk, usecs_to_jiffies(t_nfb), t_nfb);
+
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(t_nfb));
}
static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
ktime_t now;
s64 delta = 0;
- ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
-
if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
return;
return 0;
}
+/** ccid3_first_li - Implements [RFC 3448, 6.3.1]
+ *
+ * Determine the length of the first loss interval via inverse lookup.
+ * Assume that X_recv can be computed by the throughput equation
+ * s
+ * X_recv = --------
+ * R * fval
+ * Find some p such that f(p) = fval; return 1/p (scaled).
+ */
+static u32 ccid3_first_li(struct sock *sk)
+{
+ struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ u32 x_recv, p, delta;
+ u64 fval;
+
+ if (hcrx->ccid3hcrx_rtt == 0) {
+ DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+ hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
+ }
+
+ delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
+ x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+ if (x_recv == 0) { /* would also trigger divide-by-zero */
+ DCCP_WARN("X_recv==0\n");
+ if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+ DCCP_BUG("stored value of X_recv is zero");
+ return ~0U;
+ }
+ }
+
+ fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
+ fval = scaled_div32(fval, x_recv);
+ p = tfrc_calc_x_reverse_lookup(fval);
+
+ ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
+ "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+
+ return p == 0 ? ~0U : scaled_div(1, p);
+}
+
static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
/*
* Handle pending losses and otherwise check for new loss
*/
+ if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
+ tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
+ &hcrx->ccid3hcrx_li_hist,
+ skb, ndp, ccid3_first_li, sk) ) {
+ do_feedback = CCID3_FBACK_PARAM_CHANGE;
+ goto done_receiving;
+ }
+
if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
goto update_records;
if (unlikely(!is_data_packet))
goto update_records;
- if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */
+ if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
/*
* Empty loss history: no loss so far, hence p stays 0.
*/
if (sample != 0)
hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
+
+ } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
+ /*
+ * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
+ * has decreased (resp. p has increased), send feedback now.
+ */
+ do_feedback = CCID3_FBACK_PARAM_CHANGE;
}
/*
update_records:
tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
+done_receiving:
if (do_feedback)
ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
}
{
struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
- ccid3_pr_debug("entry\n");
-
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
- INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
+ tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
}
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
- /* Empty packet history */
tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
-
- /* Empty loss interval history */
- dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
+ tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
}
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
u32 __user *optval, int __user *optlen)
{
const struct ccid3_hc_rx_sock *hcrx;
+ struct tfrc_rx_info rx_info;
const void *val;
/* Listen socks doesn't have a private CCID block */
hcrx = ccid3_hc_rx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_RX_INFO:
- if (len < sizeof(hcrx->ccid3hcrx_tfrc))
+ if (len < sizeof(rx_info))
return -EINVAL;
- len = sizeof(hcrx->ccid3hcrx_tfrc);
- val = &hcrx->ccid3hcrx_tfrc;
+ rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
+ rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt;
+ rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
+ scaled_div(1, hcrx->ccid3hcrx_pinv);
+ len = sizeof(rx_info);
+ val = &rx_info;
break;
default:
return -ENOPROTOOPT;
static struct ccid_operations ccid3 = {
.ccid_id = DCCPC_CCID3,
- .ccid_name = "ccid3",
+ .ccid_name = "TCP-Friendly Rate Control",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
.ccid_hc_tx_init = ccid3_hc_tx_init,