bcache: add separate workqueue for journal_write to avoid deadlock
authorGuoju Fang <fangguoju@gmail.com>
Thu, 27 Sep 2018 15:41:46 +0000 (23:41 +0800)
committerJens Axboe <axboe@kernel.dk>
Thu, 27 Sep 2018 15:47:01 +0000 (09:47 -0600)
After write SSD completed, bcache schedules journal_write work to
system_wq, which is a public workqueue in system, without WQ_MEM_RECLAIM
flag. system_wq is also a bound wq, and there may be no idle kworker on
current processor. Creating a new kworker may unfortunately need to
reclaim memory first, by shrinking cache and slab used by vfs, which
depends on bcache device. That's a deadlock.

This patch create a new workqueue for journal_write with WQ_MEM_RECLAIM
flag. It's rescuer thread will work to avoid the deadlock.

Signed-off-by: Guoju Fang <fangguoju@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/md/bcache/bcache.h
drivers/md/bcache/journal.c
drivers/md/bcache/super.c

index 83504dd8100ab2a80d7f0e737e50266de41add32..954dad29e6e8fca910b0ebd24171591f2acd0831 100644 (file)
@@ -965,6 +965,7 @@ void bch_prio_write(struct cache *ca);
 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
 
 extern struct workqueue_struct *bcache_wq;
+extern struct workqueue_struct *bch_journal_wq;
 extern struct mutex bch_register_lock;
 extern struct list_head bch_cache_sets;
 
index 6116bbf870d8ef9004717bf5d04803895d085bab..522c7426f3a05cee10df0e984521472849b9b707 100644 (file)
@@ -485,7 +485,7 @@ static void do_journal_discard(struct cache *ca)
 
                closure_get(&ca->set->cl);
                INIT_WORK(&ja->discard_work, journal_discard_work);
-               schedule_work(&ja->discard_work);
+               queue_work(bch_journal_wq, &ja->discard_work);
        }
 }
 
@@ -592,7 +592,7 @@ static void journal_write_done(struct closure *cl)
                : &j->w[0];
 
        __closure_wake_up(&w->wait);
-       continue_at_nobarrier(cl, journal_write, system_wq);
+       continue_at_nobarrier(cl, journal_write, bch_journal_wq);
 }
 
 static void journal_write_unlock(struct closure *cl)
@@ -627,7 +627,7 @@ static void journal_write_unlocked(struct closure *cl)
                spin_unlock(&c->journal.lock);
 
                btree_flush_write(c);
-               continue_at(cl, journal_write, system_wq);
+               continue_at(cl, journal_write, bch_journal_wq);
                return;
        }
 
index 94c756c66bd7216a6d83b67eaef891269f965467..30ba9aeb5ee8345ac192e34e51e67beae2127950 100644 (file)
@@ -47,6 +47,7 @@ static int bcache_major;
 static DEFINE_IDA(bcache_device_idx);
 static wait_queue_head_t unregister_wait;
 struct workqueue_struct *bcache_wq;
+struct workqueue_struct *bch_journal_wq;
 
 #define BTREE_MAX_PAGES                (256 * 1024 / PAGE_SIZE)
 /* limitation of partitions number on single bcache device */
@@ -2341,6 +2342,9 @@ static void bcache_exit(void)
                kobject_put(bcache_kobj);
        if (bcache_wq)
                destroy_workqueue(bcache_wq);
+       if (bch_journal_wq)
+               destroy_workqueue(bch_journal_wq);
+
        if (bcache_major)
                unregister_blkdev(bcache_major, "bcache");
        unregister_reboot_notifier(&reboot);
@@ -2370,6 +2374,10 @@ static int __init bcache_init(void)
        if (!bcache_wq)
                goto err;
 
+       bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
+       if (!bch_journal_wq)
+               goto err;
+
        bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
        if (!bcache_kobj)
                goto err;