mm/swapfile: fix data races in try_to_unuse()
authorQian Cai <cai@lca.pw>
Thu, 2 Apr 2020 04:06:13 +0000 (21:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 16:35:27 +0000 (09:35 -0700)
si->inuse_pages could be accessed concurrently as noticed by KCSAN,

 write to 0xffff98b00ebd04dc of 4 bytes by task 82262 on cpu 92:
  swap_range_free+0xbe/0x230
  swap_range_free at mm/swapfile.c:719
  swapcache_free_entries+0x1be/0x250
  free_swap_slot+0x1c8/0x220
  __swap_entry_free.constprop.19+0xa3/0xb0
  free_swap_and_cache+0x53/0xa0
  unmap_page_range+0x7e0/0x1ce0
  unmap_single_vma+0xcd/0x170
  unmap_vmas+0x18b/0x220
  exit_mmap+0xee/0x220
  mmput+0xe7/0x240
  do_exit+0x598/0xfd0
  do_group_exit+0x8b/0x180
  get_signal+0x293/0x13d0
  do_signal+0x37/0x5d0
  prepare_exit_to_usermode+0x1b7/0x2c0
  ret_from_intr+0x32/0x42

 read to 0xffff98b00ebd04dc of 4 bytes by task 82499 on cpu 46:
  try_to_unuse+0x86b/0xc80
  try_to_unuse at mm/swapfile.c:2185
  __x64_sys_swapoff+0x372/0xd40
  do_syscall_64+0x91/0xb05
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

The plain reads in try_to_unuse() are outside si->lock critical section
which result in data races that could be dangerous to be used in a loop.
Fix them by adding READ_ONCE().

Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Marco Elver <elver@google.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/1582578903-29294-1-git-send-email-cai@lca.pw
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/swapfile.c

index 6d63bd9b7da264f6fba2224122ab097791f44802..273a923c275c964adc37481ba3e918ee3ab0a80f 100644 (file)
@@ -2132,7 +2132,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
        swp_entry_t entry;
        unsigned int i;
 
-       if (!si->inuse_pages)
+       if (!READ_ONCE(si->inuse_pages))
                return 0;
 
        if (!frontswap)
@@ -2148,7 +2148,7 @@ retry:
 
        spin_lock(&mmlist_lock);
        p = &init_mm.mmlist;
-       while (si->inuse_pages &&
+       while (READ_ONCE(si->inuse_pages) &&
               !signal_pending(current) &&
               (p = p->next) != &init_mm.mmlist) {
 
@@ -2177,7 +2177,7 @@ retry:
        mmput(prev_mm);
 
        i = 0;
-       while (si->inuse_pages &&
+       while (READ_ONCE(si->inuse_pages) &&
               !signal_pending(current) &&
               (i = find_next_to_unuse(si, i, frontswap)) != 0) {
 
@@ -2219,7 +2219,7 @@ retry:
         * been preempted after get_swap_page(), temporarily hiding that swap.
         * It's easy and robust (though cpu-intensive) just to keep retrying.
         */
-       if (si->inuse_pages) {
+       if (READ_ONCE(si->inuse_pages)) {
                if (!signal_pending(current))
                        goto retry;
                retval = -EINTR;