There is a reference counter to ensure that masquerade modules register
notifiers only once. However, the existing reference counter approach is
not safe, test commands are:
while :
do
modprobe ip6t_MASQUERADE &
modprobe nft_masq_ipv6 &
modprobe -rv ip6t_MASQUERADE &
modprobe -rv nft_masq_ipv6 &
done
numbers below represent the reference counter.
--------------------------------------------------------
CPU0 CPU1 CPU2 CPU3 CPU4
[insmod] [insmod] [rmmod] [rmmod] [insmod]
--------------------------------------------------------
0->1
register 1->2
returns 2->1
returns 1->0
0->1
register <--
unregister
--------------------------------------------------------
The unregistation of CPU3 should be processed before the
registration of CPU4.
In order to fix this, use a mutex instead of reference counter.
splat looks like:
[ 323.869557] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [modprobe:1381]
[ 323.869574] Modules linked in: nf_tables(+) nf_nat_ipv6(-) nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 n]
[ 323.869574] irq event stamp: 194074
[ 323.898930] hardirqs last enabled at (194073): [<
ffffffff90004a0d>] trace_hardirqs_on_thunk+0x1a/0x1c
[ 323.898930] hardirqs last disabled at (194074): [<
ffffffff90004a29>] trace_hardirqs_off_thunk+0x1a/0x1c
[ 323.898930] softirqs last enabled at (182132): [<
ffffffff922006ec>] __do_softirq+0x6ec/0xa3b
[ 323.898930] softirqs last disabled at (182109): [<
ffffffff90193426>] irq_exit+0x1a6/0x1e0
[ 323.898930] CPU: 0 PID: 1381 Comm: modprobe Not tainted 4.20.0-rc2+ #27
[ 323.898930] RIP: 0010:raw_notifier_chain_register+0xea/0x240
[ 323.898930] Code: 3c 03 0f 8e f2 00 00 00 44 3b 6b 10 7f 4d 49 bc 00 00 00 00 00 fc ff df eb 22 48 8d 7b 10 488
[ 323.898930] RSP: 0018:
ffff888101597218 EFLAGS:
00000206 ORIG_RAX:
ffffffffffffff13
[ 323.898930] RAX:
0000000000000000 RBX:
ffffffffc04361c0 RCX:
0000000000000000
[ 323.898930] RDX:
1ffffffff26132ae RSI:
ffffffffc04aa3c0 RDI:
ffffffffc04361d0
[ 323.898930] RBP:
ffffffffc04361c8 R08:
0000000000000000 R09:
0000000000000001
[ 323.898930] R10:
ffff8881015972b0 R11:
fffffbfff26132c4 R12:
dffffc0000000000
[ 323.898930] R13:
0000000000000000 R14:
1ffff110202b2e44 R15:
ffffffffc04aa3c0
[ 323.898930] FS:
00007f813ed41540(0000) GS:
ffff88811ae00000(0000) knlGS:
0000000000000000
[ 323.898930] CS: 0010 DS: 0000 ES: 0000 CR0:
0000000080050033
[ 323.898930] CR2:
0000559bf2c9f120 CR3:
000000010bc80000 CR4:
00000000001006f0
[ 323.898930] Call Trace:
[ 323.898930] ? atomic_notifier_chain_register+0x2d0/0x2d0
[ 323.898930] ? down_read+0x150/0x150
[ 323.898930] ? sched_clock_cpu+0x126/0x170
[ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[ 323.898930] register_netdevice_notifier+0xbb/0x790
[ 323.898930] ? __dev_close_many+0x2d0/0x2d0
[ 323.898930] ? __mutex_unlock_slowpath+0x17f/0x740
[ 323.898930] ? wait_for_completion+0x710/0x710
[ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[ 323.898930] ? up_write+0x6c/0x210
[ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[ 324.127073] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[ 324.127073] nft_chain_filter_init+0x1e/0xe8a [nf_tables]
[ 324.127073] nf_tables_module_init+0x37/0x92 [nf_tables]
[ ... ]
Fixes: 8dd33cc93ec9 ("netfilter: nf_nat: generalize IPv4 masquerading support for nf_tables")
Fixes: be6b635cd674 ("netfilter: nf_nat: generalize IPv6 masquerading support for nf_tables")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
int nf_nat_masquerade_ipv4_register_notifier(void)
{
- int ret;
+ int ret = 0;
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return 0;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
ret = register_netdevice_notifier(&masq_dev_notifier);
if (ret)
goto err_unregister;
+ mutex_unlock(&masq_mutex);
return ret;
+
err_unregister:
unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
- atomic_dec(&masquerade_notifier_refcount);
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
.notifier_call = masq_inet6_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
int nf_nat_masquerade_ipv6_register_notifier(void)
{
- int ret;
+ int ret = 0;
+ mutex_lock(&masq_mutex);
/* check if the notifier is already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return 0;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
ret = register_netdevice_notifier(&masq_dev_notifier);
if (ret)
if (ret)
goto err_unregister;
+ mutex_unlock(&masq_mutex);
return ret;
+
err_unregister:
unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
- atomic_dec(&masquerade_notifier_refcount);
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);