more aggressively. Let me to remind again: each orphan eats
up to ~64K of unswappable memory.
-tcp_max_ssthresh - INTEGER
- Limited Slow-Start for TCP with large congestion windows (cwnd) defined in
- RFC3742. Limited slow-start is a mechanism to limit growth of the cwnd
- on the region where cwnd is larger than tcp_max_ssthresh. TCP increases cwnd
- by at most tcp_max_ssthresh segments, and by at least tcp_max_ssthresh/2
- segments per RTT when the cwnd is above tcp_max_ssthresh.
- If TCP connection increased cwnd to thousands (or tens of thousands) segments,
- and thousands of packets were being dropped during slow-start, you can set
- tcp_max_ssthresh to improve performance for new TCP connection.
- Default: 0 (off)
-
tcp_max_syn_backlog - INTEGER
Maximal number of remembered connection requests, which have not
received an acknowledgment from connecting client.
extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
-extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
/* lower bound for congestion window (optional) */
u32 (*min_cwnd)(const struct sock *sk);
/* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
+ void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name);
-void tcp_slow_start(struct tcp_sock *tp);
+int tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
extern struct tcp_congestion_ops tcp_init_congestion_ops;
u32 tcp_reno_ssthresh(struct sock *sk);
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
u32 tcp_reno_min_cwnd(const struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;
.mode = 0644,
.proc_handler = proc_allowed_congestion_control,
},
- {
- .procname = "tcp_max_ssthresh",
- .data = &sysctl_tcp_max_ssthresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{
.procname = "tcp_thin_linear_timeouts",
.data = &sysctl_tcp_thin_linear_timeouts,
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else {
bictcp_update(ca, tp->snd_cwnd);
tcp_cong_avoid_ai(tp, ca->cnt);
#include <linux/gfp.h>
#include <net/tcp.h>
-int sysctl_tcp_max_ssthresh = 0;
-
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
}
EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-/*
- * Slow start is used when congestion window is less than slow start
- * threshold. This version implements the basic RFC2581 version
- * and optionally supports:
- * RFC3742 Limited Slow Start - growth limited to max_ssthresh
- * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
+/* Slow start is used when congestion window is no greater than the slow start
+ * threshold. We base on RFC2581 and also handle stretch ACKs properly.
+ * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
+ * something better;) a packet is only considered (s)acked in its entirety to
+ * defend the ACK attacks described in the RFC. Slow start processes a stretch
+ * ACK of degree N as if N acks of degree 1 are received back to back except
+ * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
+ * returns the leftover acks to adjust cwnd in congestion avoidance mode.
*/
-void tcp_slow_start(struct tcp_sock *tp)
+int tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- int cnt; /* increase in packets */
- unsigned int delta = 0;
- u32 snd_cwnd = tp->snd_cwnd;
-
- if (unlikely(!snd_cwnd)) {
- pr_err_once("snd_cwnd is nul, please report this bug.\n");
- snd_cwnd = 1U;
- }
+ u32 cwnd = tp->snd_cwnd + acked;
- if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
- cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */
- else
- cnt = snd_cwnd; /* exponential increase */
-
- tp->snd_cwnd_cnt += cnt;
- while (tp->snd_cwnd_cnt >= snd_cwnd) {
- tp->snd_cwnd_cnt -= snd_cwnd;
- delta++;
- }
- tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp);
+ if (cwnd > tp->snd_ssthresh)
+ cwnd = tp->snd_ssthresh + 1;
+ acked -= cwnd - tp->snd_cwnd;
+ tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+ return acked;
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
/* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
/* In dangerous area, increase slowly. */
else
tcp_cong_avoid_ai(tp, tp->snd_cwnd);
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
if (tp->snd_cwnd <= tp->snd_ssthresh) {
if (hystart && after(ack, ca->end_seq))
bictcp_hystart_reset(sk);
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
} else {
bictcp_update(ca, tp->snd_cwnd);
tcp_cong_avoid_ai(tp, ca->cnt);
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
-static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else {
/* Update AIMD parameters.
*
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else {
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
* o Give cwnd a new value based on the model proposed
* o remember increments <1
*/
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
return;
if (!ca->hybla_en) {
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
return;
}
/*
* Increase window in response to successful acknowledgment.
*/
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
/* In slow start */
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else {
u32 delta;
tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
}
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
/* Advance cwnd if state allows */
if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, prior_in_flight);
+ tcp_cong_avoid(sk, ack, acked, prior_in_flight);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
* Will only call newReno CA when away from inference.
* From TCP-LP's paper, this will be handled in additive increasement.
*/
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct lp *lp = inet_csk_ca(sk);
if (!(lp->flag & LP_WITHIN_INF))
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
}
/**
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else
tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT));
}
return min(tp->snd_ssthresh, tp->snd_cwnd-1);
}
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now) {
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
return;
}
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
} else {
u32 rtt, diff;
u64 target_cwnd;
} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
} else {
/* Congestion avoidance. */
}
/* Use normal slow start */
else if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
}
tcp_veno_init(sk);
}
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct veno *veno = inet_csk_ca(sk);
if (!veno->doing_veno_now) {
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
return;
}
/* We don't have enough rtt samples to do the Veno
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked, in_flight);
} else {
u64 target_cwnd;
u32 rtt;
if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
} else {
/* Congestion avoidance. */
if (veno->diff < beta) {
tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
}
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+ u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
struct yeah *yeah = inet_csk_ca(sk);
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp);
+ tcp_slow_start(tp, acked);
else if (!yeah->doing_reno_now) {
/* Scalable */