btrfs: qgroup: Use separate meta reservation type for delalloc
authorQu Wenruo <wqu@suse.com>
Tue, 12 Dec 2017 07:34:32 +0000 (15:34 +0800)
committerDavid Sterba <dsterba@suse.com>
Fri, 30 Mar 2018 23:41:14 +0000 (01:41 +0200)
Before this patch, btrfs qgroup is mixing per-transcation meta rsv with
preallocated meta rsv, making it quite easy to underflow qgroup meta
reservation.

Since we have the new qgroup meta rsv types, apply it to delalloc
reservation.

Now for delalloc, most of its reserved space will use META_PREALLOC qgroup
rsv type.

And for callers reducing outstanding extent like btrfs_finish_ordered_io(),
they will convert corresponding META_PREALLOC reservation to
META_PERTRANS.

This is mainly due to the fact that current qgroup numbers will only be
updated in btrfs_commit_transaction(), that's to say if we don't keep
such placeholder reservation, we can exceed qgroup limitation.

And for callers freeing outstanding extent in error handler, we will
just free META_PREALLOC bytes.

This behavior makes callers of btrfs_qgroup_release_meta() or
btrfs_qgroup_convert_meta() to be aware of which type they are.
So in this patch, btrfs_delalloc_release_metadata() and its callers get
an extra parameter to info qgroup to do correct meta convert/release.

The good news is, even we use the wrong type (convert or free), it won't
cause obvious bug, as prealloc type is always in good shape, and the
type only affects how per-trans meta is increased or not.

So the worst case will be at most metadata limitation can be sometimes
exceeded (no convert at all) or metadata limitation is reached too soon
(no free at all).

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/relocation.c

index df0463e2ab7fada44cf82585271489a0e30ae1da..7924e50cc528af3c8e1962f2525f292480dd6094 100644 (file)
@@ -2742,7 +2742,8 @@ int btrfs_check_data_free_space(struct inode *inode,
 void btrfs_free_reserved_data_space(struct inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode,
-                       struct extent_changeset *reserved, u64 start, u64 len);
+                                 struct extent_changeset *reserved,
+                                 u64 start, u64 len, bool qgroup_free);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
                                            u64 len);
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
@@ -2755,10 +2756,12 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
                                      struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+                                   bool qgroup_free);
 
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+                                    bool qgroup_free);
 int btrfs_delalloc_reserve_space(struct inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
index 020c1a1a65263e5f5b3cd36dcc4119959b907fd3..6b07202385d3b620eac2ff223dfd1665a0fca6a4 100644 (file)
@@ -5760,6 +5760,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
        if (num_bytes == 0)
                return 0;
 
+       ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+       if (ret)
+               return ret;
        ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
        if (!ret) {
                block_rsv_add_bytes(block_rsv, num_bytes, 0);
@@ -5772,11 +5775,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 /**
  * btrfs_inode_rsv_release - release any excessive reservation.
  * @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ *   Unlike normal operation, qgroup meta reservation needs to know if we are
+ *   freeing qgroup reservation or just converting it into per-trans.  Normally
+ *   @qgroup_free is true for error handling, and false for normal release.
  *
  * This is the same as btrfs_block_rsv_release, except that it handles the
  * tracepoint for the reservation.
  */
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
@@ -5792,6 +5799,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
        if (released > 0)
                trace_btrfs_space_reservation(fs_info, "delalloc",
                                              btrfs_ino(inode), released, 0);
+       if (qgroup_free)
+               btrfs_qgroup_free_meta_prealloc(inode->root, released);
+       else
+               btrfs_qgroup_convert_reserved_meta(inode->root, released);
 }
 
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -6033,7 +6044,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-       struct btrfs_root *root = inode->root;
        unsigned nr_extents;
        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
        int ret = 0;
@@ -6071,19 +6081,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
 
-       if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-               ret = btrfs_qgroup_reserve_meta_prealloc(root,
-                               nr_extents * fs_info->nodesize, true);
-               if (ret)
-                       goto out_fail;
-       }
-
        ret = btrfs_inode_rsv_refill(inode, flush);
-       if (unlikely(ret)) {
-               btrfs_qgroup_free_meta_prealloc(root,
-                                      nr_extents * fs_info->nodesize);
+       if (unlikely(ret))
                goto out_fail;
-       }
 
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
@@ -6097,7 +6097,7 @@ out_fail:
        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
 
-       btrfs_inode_rsv_release(inode);
+       btrfs_inode_rsv_release(inode, true);
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
        return ret;
@@ -6107,12 +6107,14 @@ out_fail:
  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
  * @inode: the inode to release the reservation for.
  * @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
  *
  * This will release the metadata reservation for an inode.  This can be called
  * once we complete IO for a given set of bytes to release their metadata
  * reservations, or on error for the same reason.
  */
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+                                    bool qgroup_free)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
@@ -6125,13 +6127,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
        if (btrfs_is_testing(fs_info))
                return;
 
-       btrfs_inode_rsv_release(inode);
+       btrfs_inode_rsv_release(inode, qgroup_free);
 }
 
 /**
  * btrfs_delalloc_release_extents - release our outstanding_extents
  * @inode: the inode to balance the reservation for.
  * @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
  *
  * When we reserve space we increase outstanding_extents for the extents we may
  * add.  Once we've set the range as delalloc or created our ordered extents we
@@ -6139,7 +6142,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
  * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
  * with btrfs_delalloc_reserve_metadata.
  */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+                                   bool qgroup_free)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
        unsigned num_extents;
@@ -6153,7 +6157,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
        if (btrfs_is_testing(fs_info))
                return;
 
-       btrfs_inode_rsv_release(inode);
+       btrfs_inode_rsv_release(inode, qgroup_free);
 }
 
 /**
@@ -6209,9 +6213,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  */
 void btrfs_delalloc_release_space(struct inode *inode,
                                  struct extent_changeset *reserved,
-                                 u64 start, u64 len)
+                                 u64 start, u64 len, bool qgroup_free)
 {
-       btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
+       btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
        btrfs_free_reserved_data_space(inode, reserved, start, len);
 }
 
index 6d878f1d1082ae5a9c88473d6b133194a483ac9d..f247300170e59cad9783a7a2dbaf1a6ca8068cf9 100644 (file)
@@ -1691,7 +1691,7 @@ again:
                                    force_page_uptodate);
                if (ret) {
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      reserve_bytes);
+                                                      reserve_bytes, true);
                        break;
                }
 
@@ -1703,7 +1703,7 @@ again:
                        if (extents_locked == -EAGAIN)
                                goto again;
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      reserve_bytes);
+                                                      reserve_bytes, true);
                        ret = extents_locked;
                        break;
                }
@@ -1738,7 +1738,7 @@ again:
                                                fs_info->sb->s_blocksize_bits;
                        if (only_release_metadata) {
                                btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                                               release_bytes);
+                                                       release_bytes, true);
                        } else {
                                u64 __pos;
 
@@ -1747,7 +1747,7 @@ again:
                                        (dirty_pages << PAGE_SHIFT);
                                btrfs_delalloc_release_space(inode,
                                                data_reserved, __pos,
-                                               release_bytes);
+                                               release_bytes, true);
                        }
                }
 
@@ -1760,7 +1760,8 @@ again:
                if (extents_locked)
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             lockstart, lockend, &cached_state);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
+                                              (ret != 0));
                if (ret) {
                        btrfs_drop_pages(pages, num_pages);
                        break;
@@ -1800,11 +1801,11 @@ again:
                if (only_release_metadata) {
                        btrfs_end_write_no_snapshotting(root);
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                       release_bytes);
+                                       release_bytes, true);
                } else {
                        btrfs_delalloc_release_space(inode, data_reserved,
                                        round_down(pos, fs_info->sectorsize),
-                                       release_bytes);
+                                       release_bytes, true);
                }
        }
 
index a9f22ac50d6a92095dd43d35e613091b5dc6fc87..d0dde9e6afd76916ab491fdbb2b42eb75e433afc 100644 (file)
@@ -3547,7 +3547,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
        if (ret) {
                if (release_metadata)
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                       inode->i_size);
+                                       inode->i_size, true);
 #ifdef DEBUG
                btrfs_err(fs_info,
                          "failed to write free ino cache for root %llu",
index 022b19336feeadd7c9fb686fcc94f1266b7b382f..9409dcc7020d78455e5eafc65f1331b6c37f8e85 100644 (file)
@@ -500,12 +500,12 @@ again:
        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
                                              prealloc, prealloc, &alloc_hint);
        if (ret) {
-               btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
                goto out_put;
        }
 
        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
 out_put:
        iput(inode);
 out_release:
index a9a47387e53f485be7f5b2403bc38b2a1ff39d50..1f091c2358a408b340b2172848246021b07bb307 100644 (file)
@@ -1867,7 +1867,7 @@ static void btrfs_clear_bit_hook(void *private_data,
                 */
                if (*bits & EXTENT_CLEAR_META_RESV &&
                    root != fs_info->tree_root)
-                       btrfs_delalloc_release_metadata(inode, len);
+                       btrfs_delalloc_release_metadata(inode, len, false);
 
                /* For sanity tests. */
                if (btrfs_is_testing(fs_info))
@@ -2152,7 +2152,7 @@ again:
 
        ClearPageChecked(page);
        set_page_dirty(page);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
 out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
                             &cached_state);
@@ -4802,8 +4802,8 @@ again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode, data_reserved,
-                                            block_start, blocksize);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+                                            block_start, blocksize, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
                ret = -ENOMEM;
                goto out;
        }
@@ -4870,8 +4870,8 @@ again:
 out_unlock:
        if (ret)
                btrfs_delalloc_release_space(inode, data_reserved, block_start,
-                                            blocksize);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+                                            blocksize, true);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
        unlock_page(page);
        put_page(page);
 out:
@@ -8636,7 +8636,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                if (ret < 0 && ret != -EIOCBQUEUED) {
                        if (dio_data.reserve)
                                btrfs_delalloc_release_space(inode, data_reserved,
-                                       offset, dio_data.reserve);
+                                       offset, dio_data.reserve, true);
                        /*
                         * On error we might have left some ordered extents
                         * without submitting corresponding bios for them, so
@@ -8652,8 +8652,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                                        false);
                } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode, data_reserved,
-                                       offset, count - (size_t)ret);
-               btrfs_delalloc_release_extents(BTRFS_I(inode), count);
+                                       offset, count - (size_t)ret, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
        }
 out:
        if (wakeup)
@@ -8968,7 +8968,8 @@ again:
                if (reserved_space < PAGE_SIZE) {
                        end = page_start + reserved_space - 1;
                        btrfs_delalloc_release_space(inode, data_reserved,
-                                       page_start, PAGE_SIZE - reserved_space);
+                                       page_start, PAGE_SIZE - reserved_space,
+                                       true);
                }
        }
 
@@ -9018,16 +9019,16 @@ again:
 
 out_unlock:
        if (!ret) {
-               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
                sb_end_pagefault(inode->i_sb);
                extent_changeset_free(data_reserved);
                return VM_FAULT_LOCKED;
        }
        unlock_page(page);
 out:
-       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
        btrfs_delalloc_release_space(inode, data_reserved, page_start,
-                                    reserved_space);
+                                    reserved_space, (ret != 0));
 out_noreserve:
        sb_end_pagefault(inode->i_sb);
        extent_changeset_free(data_reserved);
index 94bcc1bf71caba5307c169b060fab0401c1fd46a..8c3ff75cbdd4b4f3521dd5da545bcd5456f2aa24 100644 (file)
@@ -1197,7 +1197,7 @@ again:
                spin_unlock(&BTRFS_I(inode)->lock);
                btrfs_delalloc_release_space(inode, data_reserved,
                                start_index << PAGE_SHIFT,
-                               (page_cnt - i_done) << PAGE_SHIFT);
+                               (page_cnt - i_done) << PAGE_SHIFT, true);
        }
 
 
@@ -1215,7 +1215,8 @@ again:
                unlock_page(pages[i]);
                put_page(pages[i]);
        }
-       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+                                      false);
        extent_changeset_free(data_reserved);
        return i_done;
 out:
@@ -1225,8 +1226,9 @@ out:
        }
        btrfs_delalloc_release_space(inode, data_reserved,
                        start_index << PAGE_SHIFT,
-                       page_cnt << PAGE_SHIFT);
-       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+                       page_cnt << PAGE_SHIFT, true);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+                                      true);
        extent_changeset_free(data_reserved);
        return ret;
 
index 9be98e42cfb68175cf28ac4d91ea1574b103c553..661cc3db0c7c023610f0bf57ee92e236e5b493b1 100644 (file)
@@ -610,7 +610,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
        btrfs_mod_outstanding_extents(btrfs_inode, -1);
        spin_unlock(&btrfs_inode->lock);
        if (root != fs_info->tree_root)
-               btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
+               btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
 
        tree = &btrfs_inode->ordered_tree;
        spin_lock_irq(&tree->lock);
index cd2298d185dd121bd1412e571a07952c343b0ab5..e61e1ee9af9adaea20750aab29e4c83c6f019ecf 100644 (file)
@@ -3226,7 +3226,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
                                                   mask);
                        if (!page) {
                                btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                                       PAGE_SIZE);
+                                                       PAGE_SIZE, true);
                                ret = -ENOMEM;
                                goto out;
                        }
@@ -3245,9 +3245,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
                                unlock_page(page);
                                put_page(page);
                                btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                                       PAGE_SIZE);
+                                                       PAGE_SIZE, true);
                                btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                              PAGE_SIZE);
+                                                              PAGE_SIZE, true);
                                ret = -EIO;
                                goto out;
                        }
@@ -3274,9 +3274,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        unlock_page(page);
                        put_page(page);
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
-                                                        PAGE_SIZE);
+                                                        PAGE_SIZE, true);
                        btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      PAGE_SIZE);
+                                                      PAGE_SIZE, true);
 
                        clear_extent_bits(&BTRFS_I(inode)->io_tree,
                                          page_start, page_end,
@@ -3292,7 +3292,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
                put_page(page);
 
                index++;
-               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
+                                              false);
                balance_dirty_pages_ratelimited(inode->i_mapping);
                btrfs_throttle(fs_info);
        }