xfs: account finobt blocks properly in perag reservation
authorBrian Foster <bfoster@redhat.com>
Fri, 12 Jan 2018 22:07:21 +0000 (14:07 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Fri, 12 Jan 2018 22:09:08 +0000 (14:09 -0800)
XFS started using the perag metadata reservation pool for free inode
btree blocks in commit 76d771b4cbe33 ("xfs: use per-AG reservations
for the finobt"). To handle backwards compatibility, finobt blocks
are accounted against the pool so long as the full reservation is
available at mount time. Otherwise the ->m_inotbt_nores flag is set
and the filesystem falls back to the traditional per-transaction
finobt reservation.

This commit has two problems:

- finobt blocks are always accounted against the metadata
  reservation on allocation, regardless of ->m_inotbt_nores state
- finobt blocks are never returned to the reservation pool on free

The first problem affects reflink+finobt filesystems where the full
finobt reservation is not available at mount time. finobt blocks are
essentially stolen from the reflink reservation, putting refcountbt
management at risk of allocation failure. The second problem is an
unconditional leak of metadata reservation whenever finobt is
enabled.

Update the finobt block allocation callouts to consider
->m_inotbt_nores and account blocks appropriately. Blocks should be
consistently accounted against the metadata pool when
->m_inotbt_nores is false and otherwise tagged as RESV_NONE.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/libxfs/xfs_ialloc_btree.c

index 47f44d624cb1332e461284c7811586fbdf20d1c1..af197a5f3a82e622c2dea38a9a2b33aaaf676b66 100644 (file)
@@ -141,21 +141,42 @@ xfs_finobt_alloc_block(
        union xfs_btree_ptr     *new,
        int                     *stat)
 {
+       if (cur->bc_mp->m_inotbt_nores)
+               return xfs_inobt_alloc_block(cur, start, new, stat);
        return __xfs_inobt_alloc_block(cur, start, new, stat,
                        XFS_AG_RESV_METADATA);
 }
 
 STATIC int
-xfs_inobt_free_block(
+__xfs_inobt_free_block(
        struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
+       struct xfs_buf          *bp,
+       enum xfs_ag_resv_type   resv)
 {
        struct xfs_owner_info   oinfo;
 
        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
        return xfs_free_extent(cur->bc_tp,
                        XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
-                       &oinfo, XFS_AG_RESV_NONE);
+                       &oinfo, resv);
+}
+
+STATIC int
+xfs_inobt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       if (cur->bc_mp->m_inotbt_nores)
+               return xfs_inobt_free_block(cur, bp);
+       return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA);
 }
 
 STATIC int
@@ -380,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
        .dup_cursor             = xfs_inobt_dup_cursor,
        .set_root               = xfs_finobt_set_root,
        .alloc_block            = xfs_finobt_alloc_block,
-       .free_block             = xfs_inobt_free_block,
+       .free_block             = xfs_finobt_free_block,
        .get_minrecs            = xfs_inobt_get_minrecs,
        .get_maxrecs            = xfs_inobt_get_maxrecs,
        .init_key_from_rec      = xfs_inobt_init_key_from_rec,