[XFS] Prevent ENOSPC from aborting transactions that need to succeed
authorDavid Chinner <dgc@sgi.com>
Mon, 18 Jun 2007 06:50:27 +0000 (16:50 +1000)
committerTim Shimmin <tes@chook.melbourne.sgi.com>
Sat, 14 Jul 2007 05:35:19 +0000 (15:35 +1000)
During delayed allocation extent conversion or unwritten extent
conversion, we need to reserve some blocks for transactions reservations.
We need to reserve these blocks in case a btree split occurs and we need
to allocate some blocks.

Unfortunately, we've only ever reserved the number of data blocks we are
allocating, so in both the unwritten and delalloc case we can get ENOSPC
to the transaction reservation. This is bad because in both cases we
cannot report the failure to the writing application.

The fix is two-fold:

1 - leverage the reserved block infrastructure XFS already
has to reserve a small pool of blocks by default to allow
specially marked transactions to dip into when we are at
ENOSPC.
Default setting is min(5%, 1024 blocks).

2 - convert critical transaction reservations to be allowed
to dip into this pool. Spots changed are delalloc
conversion, unwritten extent conversion and growing a
filesystem at ENOSPC.
This also allows growing the filesytsem to succeed at ENOSPC.

SGI-PV: 964468
SGI-Modid: xfs-linux-melb:xfs-kern:28865a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
fs/xfs/xfs_fsops.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.c

index ddd45e5b9383d1163930b110cb7b9e926f5da3ae..2251a49f3e17c84e47f1488762a926ce168bb4a6 100644 (file)
@@ -177,6 +177,7 @@ xfs_growfs_data_private(
                up_write(&mp->m_peraglock);
        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
+       tp->t_flags |= XFS_TRANS_RESERVE;
        if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
                        XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
                xfs_trans_cancel(tp, 0);
@@ -499,8 +500,9 @@ xfs_reserve_blocks(
        unsigned long           s;
 
        /* If inval is null, report current values and return */
-
        if (inval == (__uint64_t *)NULL) {
+               if (!outval)
+                       return EINVAL;
                outval->resblks = mp->m_resblks;
                outval->resblks_avail = mp->m_resblks_avail;
                return 0;
@@ -563,8 +565,10 @@ retry:
                }
        }
 out:
-       outval->resblks = mp->m_resblks;
-       outval->resblks_avail = mp->m_resblks_avail;
+       if (outval) {
+               outval->resblks = mp->m_resblks;
+               outval->resblks_avail = mp->m_resblks_avail;
+       }
        XFS_SB_UNLOCK(mp, s);
 
        if (fdblks_delta) {
index 3f2b9f2a7b949d01e0f01f74b21e9b42fd76ae35..ab5062199f55bf33bf6853f771d3677472724584 100644 (file)
@@ -489,13 +489,13 @@ xfs_iomap_write_direct(
        if (unlikely(rt)) {
                resrtextents = qblocks = resaligned;
                resrtextents /= mp->m_sb.sb_rextsize;
-               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-               quota_flag = XFS_QMOPT_RES_RTBLKS;
-       } else {
-               resrtextents = 0;
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+               quota_flag = XFS_QMOPT_RES_RTBLKS;
+       } else {
+               resrtextents = 0;
                resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-               quota_flag = XFS_QMOPT_RES_REGBLKS;
-       }
+               quota_flag = XFS_QMOPT_RES_REGBLKS;
+       }
 
        /*
         * Allocate and setup the transaction
@@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
                nimaps = 0;
                while (nimaps == 0) {
                        tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+                       tp->t_flags |= XFS_TRANS_RESERVE;
                        nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
                        error = xfs_trans_reserve(tp, nres,
                                        XFS_WRITE_LOG_RES(mp),
                                        0, XFS_TRANS_PERM_LOG_RES,
                                        XFS_WRITE_LOG_COUNT);
-                       if (error == ENOSPC) {
-                               error = xfs_trans_reserve(tp, 0,
-                                               XFS_WRITE_LOG_RES(mp),
-                                               0,
-                                               XFS_TRANS_PERM_LOG_RES,
-                                               XFS_WRITE_LOG_COUNT);
-                       }
                        if (error) {
                                xfs_trans_cancel(tp, 0);
                                return XFS_ERROR(error);
@@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
                 * from unwritten to real. Do allocations in a loop until
                 * we have covered the range passed in.
                 */
-
                tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+               tp->t_flags |= XFS_TRANS_RESERVE;
                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
index 39cf6f3267c398e656c382626c12e4cb6f5e3521..31453ca0f3dd77827cd9456919bd83d4bd5fbba2 100644 (file)
@@ -725,7 +725,7 @@ xfs_mountfs(
        bhv_vnode_t     *rvp = NULL;
        int             readio_log, writeio_log;
        xfs_daddr_t     d;
-       __uint64_t      ret64;
+       __uint64_t      resblks;
        __int64_t       update_flags;
        uint            quotamount, quotaflags;
        int             agno;
@@ -842,6 +842,7 @@ xfs_mountfs(
         */
        if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
            (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+               __uint64_t      ret64;
                if (xfs_uuid_mount(mp)) {
                        error = XFS_ERROR(EINVAL);
                        goto error1;
@@ -1135,13 +1136,27 @@ xfs_mountfs(
                goto error4;
        }
 
-
        /*
         * Complete the quota initialisation, post-log-replay component.
         */
        if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
                goto error4;
 
+       /*
+        * Now we are mounted, reserve a small amount of unused space for
+        * privileged transactions. This is needed so that transaction
+        * space required for critical operations can dip into this pool
+        * when at ENOSPC. This is needed for operations like create with
+        * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
+        * are not allowed to use this reserved space.
+        *
+        * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
+        * This may drive us straight to ENOSPC on mount, but that implies
+        * we were already there on the last unmount.
+        */
+       resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
+       xfs_reserve_blocks(mp, &resblks, NULL);
+
        return 0;
 
  error4:
@@ -1181,6 +1196,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
        int64_t         fsid;
 #endif
+       __uint64_t      resblks;
 
        /*
         * We can potentially deadlock here if we have an inode cluster
@@ -1209,6 +1225,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
                xfs_binval(mp->m_rtdev_targp);
        }
 
+       /*
+        * Unreserve any blocks we have so that when we unmount we don't account
+        * the reserved free space as used. This is really only necessary for
+        * lazy superblock counting because it trusts the incore superblock
+        * counters to be aboslutely correct on clean unmount.
+        *
+        * We don't bother correcting this elsewhere for lazy superblock
+        * counting because on mount of an unclean filesystem we reconstruct the
+        * correct counter value and this is irrelevant.
+        *
+        * For non-lazy counter filesystems, this doesn't matter at all because
+        * we only every apply deltas to the superblock and hence the incore
+        * value does not matter....
+        */
+       resblks = 0;
+       xfs_reserve_blocks(mp, &resblks, NULL);
+
        xfs_log_sbcount(mp, 1);
        xfs_unmountfs_writesb(mp);
        xfs_unmountfs_wait(mp);                 /* wait for async bufs */