1 From 9d4c0f8cd4cca2c65c7927f839469d6c1bef088f Mon Sep 17 00:00:00 2001
2 From: "Jason A. Donenfeld" <Jason@zx2c4.com>
3 Date: Wed, 9 Sep 2020 13:58:14 +0200
4 Subject: [PATCH 115/124] wireguard: noise: take lock when removing handshake
7 commit 9179ba31367bcf481c3c79b5f028c94faad9f30a upstream.
9 Eric reported that syzkaller found a race of this variety:
12 -------------------------------------------|---------------------------------------
13 wg_index_hashtable_replace(old, ...) |
14 if (hlist_unhashed(&old->index_hash)) |
15 | wg_index_hashtable_remove(old)
16 | hlist_del_init_rcu(&old->index_hash)
17 | old->index_hash.pprev = NULL
18 hlist_replace_rcu(&old->index_hash, ...) |
19 *old->index_hash.pprev |
21 Syzbot wasn't actually able to reproduce this more than once or create a
22 reproducer, because the race window between checking "hlist_unhashed" and
23 calling "hlist_replace_rcu" is just so small. Adding an mdelay(5) or
24 similar there helps make this demonstrable using this simple script:
28 trap 'kill $pid1; kill $pid2; ip link del wg0; ip link del wg1' EXIT
29 ip link add wg0 type wireguard
30 ip link add wg1 type wireguard
31 wg set wg0 private-key <(wg genkey) listen-port 9999
32 wg set wg1 private-key <(wg genkey) peer $(wg show wg0 public-key) endpoint 127.0.0.1:9999 persistent-keepalive 1
33 wg set wg0 peer $(wg show wg1 public-key)
35 yes link set wg1 up | ip -force -batch - &
37 yes link set wg1 down | ip -force -batch - &
41 The fundumental underlying problem is that we permit calls to wg_index_
42 hashtable_remove(handshake.entry) without requiring the caller to take
43 the handshake mutex that is intended to protect members of handshake
44 during mutations. This is consistently the case with calls to wg_index_
45 hashtable_insert(handshake.entry) and wg_index_hashtable_replace(
46 handshake.entry), but it's missing from a pertinent callsite of wg_
47 index_hashtable_remove(handshake.entry). So, this patch makes sure that
50 The original code was a little bit funky though, in the form of:
52 remove(handshake.entry)
53 lock(), memzero(handshake.some_members), unlock()
54 remove(handshake.entry)
56 The original intention of that double removal pattern outside the lock
57 appears to be some attempt to prevent insertions that might happen while
58 locks are dropped during expensive crypto operations, but actually, all
59 callers of wg_index_hashtable_insert(handshake.entry) take the write
60 lock and then explicitly check handshake.state, as they should, which
61 the aforementioned memzero clears, which means an insertion should
62 already be impossible. And regardless, the original intention was
63 necessarily racy, since it wasn't guaranteed that something else would
64 run after the unlock() instead of after the remove(). So, from a
65 soundness perspective, it seems positive to remove what looks like a
68 The crash from both syzbot and from the script above is as follows:
70 general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN
71 KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
72 CPU: 0 PID: 7395 Comm: kworker/0:3 Not tainted 5.9.0-rc4-syzkaller #0
73 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
74 Workqueue: wg-kex-wg1 wg_packet_handshake_receive_worker
75 RIP: 0010:hlist_replace_rcu include/linux/rculist.h:505 [inline]
76 RIP: 0010:wg_index_hashtable_replace+0x176/0x330 drivers/net/wireguard/peerlookup.c:174
77 Code: 00 fc ff df 48 89 f9 48 c1 e9 03 80 3c 01 00 0f 85 44 01 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 10 48 89 c6 48 c1 ee 03 <80> 3c 0e 00 0f 85 06 01 00 00 48 85 d2 4c 89 28 74 47 e8 a3 4f b5
78 RSP: 0018:ffffc90006a97bf8 EFLAGS: 00010246
79 RAX: 0000000000000000 RBX: ffff888050ffc4f8 RCX: dffffc0000000000
80 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88808e04e010
81 RBP: ffff88808e04e000 R08: 0000000000000001 R09: ffff8880543d0000
82 R10: ffffed100a87a000 R11: 000000000000016e R12: ffff8880543d0000
83 R13: ffff88808e04e008 R14: ffff888050ffc508 R15: ffff888050ffc500
84 FS: 0000000000000000(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000
85 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
86 CR2: 00000000f5505db0 CR3: 0000000097cf7000 CR4: 00000000001526f0
87 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
88 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
90 wg_noise_handshake_begin_session+0x752/0xc9a drivers/net/wireguard/noise.c:820
91 wg_receive_handshake_packet drivers/net/wireguard/receive.c:183 [inline]
92 wg_packet_handshake_receive_worker+0x33b/0x730 drivers/net/wireguard/receive.c:220
93 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
94 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
95 kthread+0x3b5/0x4a0 kernel/kthread.c:292
96 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
98 Reported-by: syzbot <syzkaller@googlegroups.com>
99 Reported-by: Eric Dumazet <edumazet@google.com>
100 Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/
101 Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
102 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
103 Signed-off-by: David S. Miller <davem@davemloft.net>
104 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
106 drivers/net/wireguard/noise.c | 5 +----
107 1 file changed, 1 insertion(+), 4 deletions(-)
109 --- a/drivers/net/wireguard/noise.c
110 +++ b/drivers/net/wireguard/noise.c
111 @@ -87,15 +87,12 @@ static void handshake_zero(struct noise_
113 void wg_noise_handshake_clear(struct noise_handshake *handshake)
115 + down_write(&handshake->lock);
116 wg_index_hashtable_remove(
117 handshake->entry.peer->device->index_hashtable,
119 - down_write(&handshake->lock);
120 handshake_zero(handshake);
121 up_write(&handshake->lock);
122 - wg_index_hashtable_remove(
123 - handshake->entry.peer->device->index_hashtable,
124 - &handshake->entry);
127 static struct noise_keypair *keypair_create(struct wg_peer *peer)