From 247e743cbe6e655768c3679f84821e03c1577902 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:51 -0400 Subject: [PATCH] Btrfs: Use async helpers to deal with pages that have been improperly dirtied Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +++ fs/btrfs/disk-io.c | 4 ++ fs/btrfs/extent_io.c | 10 +++++ fs/btrfs/extent_io.h | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 93 +++++++++++++++++++++++++++++++++++++++----- 6 files changed, 106 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ceebc052ddcb..4ddc8a8f82cd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -546,6 +546,12 @@ struct btrfs_fs_info { struct btrfs_workers endio_workers; struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; + /* + * fixup workers take dirty pages that didn't properly go through + * the cow mechanism and make them safe to write. It happens + * for the sys_munmap function call path + */ + struct btrfs_workers fixup_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; int thread_pool_size; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a5ebafb935a..66466d125c05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->fixup_workers, 1); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); + btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); @@ -1454,6 +1456,7 @@ fail_tree_root: fail_sys_array: fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3f82a6e9ca4f..feff16cb9b40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, lock_extent(tree, start, page_end, GFP_NOFS); unlock_start = start; + if (tree->ops && tree->ops->writepage_start_hook) { + ret = tree->ops->writepage_start_hook(page, start, page_end); + if (ret == -EAGAIN) { + unlock_extent(tree, start, page_end, GFP_NOFS); + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + } + end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { printk("found delalloc bits after lock_extent\n"); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2268a7995896..23affd27af5e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -30,6 +30,7 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; int (*merge_bio_hook)(struct page *page, unsigned long offset, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 12e765f7e0d4..20928639d173 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); + ClearPageChecked(p); set_page_dirty(p); } } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5a62f0b9595..47a008c19308 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } +struct btrfs_writepage_fixup { + struct page *page; + struct btrfs_work work; +}; + +/* see btrfs_writepage_start_hook for details on why this is required */ +void btrfs_writepage_fixup_worker(struct btrfs_work *work) +{ + struct btrfs_writepage_fixup *fixup; + struct btrfs_ordered_extent *ordered; + struct page *page; + struct inode *inode; + u64 page_start; + u64 page_end; + + fixup = container_of(work, struct btrfs_writepage_fixup, work); + page = fixup->page; + + lock_page(page); + if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { + ClearPageChecked(page); + goto out_page; + } + + inode = page->mapping->host; + page_start = page_offset(page); + page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; + + lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) + goto out; + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, + GFP_NOFS); + ClearPageChecked(page); +out: + unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); +out_page: + unlock_page(page); + page_cache_release(page); +} + +/* + * There are a few paths in the higher layers of the kernel that directly + * set the page dirty bit without asking the filesystem if it is a + * good idea. This causes problems because we want to make sure COW + * properly happens and the data=ordered rules are followed. + * + * In our case any range that doesn't have the EXTENT_ORDERED bit set + * hasn't been properly setup for IO. We kick off an async process + * to fix it up. The async helper will wait for ordered extents, set + * the delalloc bit and make it safe to write the page. + */ +int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) +{ + struct inode *inode = page->mapping->host; + struct btrfs_writepage_fixup *fixup; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, + EXTENT_ORDERED, 0); + if (ret) + return 0; + + if (PageChecked(page)) + return -EAGAIN; + + fixup = kzalloc(sizeof(*fixup), GFP_NOFS); + if (!fixup) + return -EAGAIN; +printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page)); + SetPageChecked(page); + page_cache_get(page); + fixup->work.func = btrfs_writepage_fixup_worker; + fixup->page = page; + btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); + return -EAGAIN; +} + int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { @@ -1263,6 +1344,7 @@ again: flush_dcache_page(page); kunmap(page); } + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -2658,6 +2740,7 @@ again: flush_dcache_page(page); kunmap(page); } + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -3039,15 +3122,6 @@ out_fail: static int btrfs_set_page_dirty(struct page *page) { - struct inode *inode = page->mapping->host; - u64 page_start = page_offset(page); - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - - if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_DELALLOC, 0)) { -printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); -WARN_ON(1); - } return __set_page_dirty_nobuffers(page); } @@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, + .writepage_start_hook = btrfs_writepage_start_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, -- 2.30.2