struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
+ const struct tcp_congestion_ops __rcu *tcp_congestion_control;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
spinlock_t tcp_fastopen_ctx_lock;
unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_assign_congestion_control(struct sock *sk);
void tcp_init_congestion_control(struct sock *sk);
void tcp_cleanup_congestion_control(struct sock *sk);
-int tcp_set_default_congestion_control(const char *name);
-void tcp_get_default_congestion_control(char *name);
+int tcp_set_default_congestion_control(struct net *net, const char *name);
+void tcp_get_default_congestion_control(struct net *net, char *name);
void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
extern struct tcp_congestion_ops tcp_reno;
struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
#ifdef CONFIG_INET
char *tcp_ca_get_name_by_key(u32 key, char *buffer);
#else
bool ecn_ca = false;
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
val = nla_get_u32(nla);
}
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(ctl->data, struct net,
+ ipv4.tcp_congestion_control);
char val[TCP_CA_NAME_MAX];
struct ctl_table tbl = {
.data = val,
};
int ret;
- tcp_get_default_congestion_control(val);
+ tcp_get_default_congestion_control(net, val);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
- ret = tcp_set_default_congestion_control(val);
+ ret = tcp_set_default_congestion_control(net, val);
return ret;
}
.mode = 0644,
.proc_handler = proc_dointvec
},
- {
- .procname = "tcp_congestion_control",
- .mode = 0644,
- .maxlen = TCP_CA_NAME_MAX,
- .proc_handler = proc_tcp_congestion_control,
- },
#ifdef CONFIG_NETLABEL
{
.procname = "cipso_cache_enable",
.extra1 = &one
},
#endif
+ {
+ .procname = "tcp_congestion_control",
+ .data = &init_net.ipv4.tcp_congestion_control,
+ .mode = 0644,
+ .maxlen = TCP_CA_NAME_MAX,
+ .proc_handler = proc_tcp_congestion_control,
+ },
{
.procname = "tcp_keepalive_time",
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,
}
/* Must be called with rcu lock held */
-static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
+ const char *name)
{
- const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+ struct tcp_congestion_ops *ca = tcp_ca_find(name);
+
#ifdef CONFIG_MODULES
if (!ca && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
u32 key = TCP_CA_UNSPEC;
might_sleep();
rcu_read_lock();
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(net, name);
if (ca) {
key = ca->key;
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
/* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk)
{
+ struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_congestion_ops *ca;
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (likely(try_module_get(ca->owner))) {
- icsk->icsk_ca_ops = ca;
- goto out;
- }
- /* Fallback to next available. The last really
- * guaranteed fallback is Reno from this list.
- */
- }
-out:
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
+ if (unlikely(!try_module_get(ca->owner)))
+ ca = &tcp_reno;
+ icsk->icsk_ca_ops = ca;
rcu_read_unlock();
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
INET_ECN_xmit(sk);
else
}
/* Used by sysctl to change default congestion control */
-int tcp_set_default_congestion_control(const char *name)
+int tcp_set_default_congestion_control(struct net *net, const char *name)
{
struct tcp_congestion_ops *ca;
- int ret = -ENOENT;
-
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
-#ifdef CONFIG_MODULES
- if (!ca && capable(CAP_NET_ADMIN)) {
- spin_unlock(&tcp_cong_list_lock);
+ const struct tcp_congestion_ops *prev;
+ int ret;
- request_module("tcp_%s", name);
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
- }
-#endif
+ rcu_read_lock();
+ ca = tcp_ca_find_autoload(net, name);
+ if (!ca) {
+ ret = -ENOENT;
+ } else if (!try_module_get(ca->owner)) {
+ ret = -EBUSY;
+ } else {
+ prev = xchg(&net->ipv4.tcp_congestion_control, ca);
+ if (prev)
+ module_put(prev->owner);
- if (ca) {
- ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
- list_move(&ca->list, &tcp_cong_list);
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
ret = 0;
}
- spin_unlock(&tcp_cong_list_lock);
+ rcu_read_unlock();
return ret;
}
/* Set default value from kernel configuration at bootup */
static int __init tcp_congestion_default(void)
{
- return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+ return tcp_set_default_congestion_control(&init_net,
+ CONFIG_DEFAULT_TCP_CONG);
}
late_initcall(tcp_congestion_default);
}
/* Get current default congestion control */
-void tcp_get_default_congestion_control(char *name)
+void tcp_get_default_congestion_control(struct net *net, char *name)
{
- struct tcp_congestion_ops *ca;
- /* We will always have reno... */
- BUG_ON(list_empty(&tcp_cong_list));
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
strncpy(name, ca->name, TCP_CA_NAME_MAX);
rcu_read_unlock();
}
if (!load)
ca = tcp_ca_find(name);
else
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(sock_net(sk), name);
+
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
icsk->icsk_ca_setsockopt = 1;
goto out;
}
+
if (!ca) {
err = -ENOENT;
} else if (!load) {
{
int cpu;
+ module_put(net->ipv4.tcp_congestion_control->owner);
+
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
free_percpu(net->ipv4.tcp_sk);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+ /* Reno is always built in */
+ if (!net_eq(net, &init_net) &&
+ try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+ net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
+ else
+ net->ipv4.tcp_congestion_control = &tcp_reno;
+
return 0;
fail:
tcp_sk_exit(net);
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
goto err;
} else {