xfs: Properly retry failed inode items in case of error during buffer writeback
authorCarlos Maiolino <cmaiolino@redhat.com>
Wed, 9 Aug 2017 01:21:50 +0000 (18:21 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Tue, 22 Aug 2017 16:22:23 +0000 (09:22 -0700)
When a buffer has been failed during writeback, the inode items into it
are kept flush locked, and are never resubmitted due the flush lock, so,
if any buffer fails to be written, the items in AIL are never written to
disk and never unlocked.

This causes unmount operation to hang due these items flush locked in AIL,
but this also causes the items in AIL to never be written back, even when
the IO device comes back to normal.

I've been testing this patch with a DM-thin device, creating a
filesystem larger than the real device.

When writing enough data to fill the DM-thin device, XFS receives ENOSPC
errors from the device, and keep spinning on xfsaild (when 'retry
forever' configuration is set).

At this point, the filesystem can not be unmounted because of the flush locked
items in AIL, but worse, the items in AIL are never retried at all
(once xfs_inode_item_push() will skip the items that are flush locked),
even if the underlying DM-thin device is expanded to the proper size.

This patch fixes both cases, retrying any item that has been failed
previously, using the infra-structure provided by the previous patch.

Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_priv.h

index 7573a1f0bc9a8cac94d8c7f13ca298307808fe11..573fc72c3f23b545d641d0ecb8d98eb2578ff712 100644 (file)
@@ -1234,3 +1234,31 @@ xfs_buf_iodone(
        xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
        xfs_buf_item_free(BUF_ITEM(lip));
 }
+
+/*
+ * Requeue a failed buffer for writeback
+ *
+ * Return true if the buffer has been re-queued properly, false otherwise
+ */
+bool
+xfs_buf_resubmit_failed_buffers(
+       struct xfs_buf          *bp,
+       struct xfs_log_item     *lip,
+       struct list_head        *buffer_list)
+{
+       struct xfs_log_item     *next;
+
+       /*
+        * Clear XFS_LI_FAILED flag from all items before resubmit
+        *
+        * XFS_LI_FAILED set/clear is protected by xa_lock, caller  this
+        * function already have it acquired
+        */
+       for (; lip; lip = next) {
+               next = lip->li_bio_list;
+               xfs_clear_li_failed(lip);
+       }
+
+       /* Add this buffer back to the delayed write list */
+       return xfs_buf_delwri_queue(bp, buffer_list);
+}
index f7eba99d19dde9404c1517633fd8f34f16a36599..530686e1afb94307f7a250aeacc9e31e5b8a49f7 100644 (file)
@@ -70,6 +70,9 @@ void  xfs_buf_attach_iodone(struct xfs_buf *,
                              xfs_log_item_t *);
 void   xfs_buf_iodone_callbacks(struct xfs_buf *);
 void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
+bool   xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
+                                       struct xfs_log_item *,
+                                       struct list_head *);
 
 extern kmem_zone_t     *xfs_buf_item_zone;
 
index 013cc78d7daf46ce69427dd05830be65182d71da..6d0f74ec31e89c35769b55f3402003c2548ae980 100644 (file)
@@ -27,6 +27,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
 #include "xfs_log.h"
 
 
@@ -475,6 +476,23 @@ xfs_inode_item_unpin(
                wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
 }
 
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * This informs the AIL that the inode is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_inode_item_error(
+       struct xfs_log_item     *lip,
+       struct xfs_buf          *bp)
+{
+       ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
+       xfs_set_li_failed(lip, bp);
+}
+
 STATIC uint
 xfs_inode_item_push(
        struct xfs_log_item     *lip,
@@ -484,13 +502,28 @@ xfs_inode_item_push(
 {
        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
        struct xfs_inode        *ip = iip->ili_inode;
-       struct xfs_buf          *bp = NULL;
+       struct xfs_buf          *bp = lip->li_buf;
        uint                    rval = XFS_ITEM_SUCCESS;
        int                     error;
 
        if (xfs_ipincount(ip) > 0)
                return XFS_ITEM_PINNED;
 
+       /*
+        * The buffer containing this item failed to be written back
+        * previously. Resubmit the buffer for IO.
+        */
+       if (lip->li_flags & XFS_LI_FAILED) {
+               if (!xfs_buf_trylock(bp))
+                       return XFS_ITEM_LOCKED;
+
+               if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+                       rval = XFS_ITEM_FLUSHING;
+
+               xfs_buf_unlock(bp);
+               return rval;
+       }
+
        if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
                return XFS_ITEM_LOCKED;
 
@@ -622,7 +655,8 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
        .iop_unlock     = xfs_inode_item_unlock,
        .iop_committed  = xfs_inode_item_committed,
        .iop_push       = xfs_inode_item_push,
-       .iop_committing = xfs_inode_item_committing
+       .iop_committing = xfs_inode_item_committing,
+       .iop_error      = xfs_inode_item_error
 };
 
 
@@ -710,7 +744,8 @@ xfs_iflush_done(
                 * the AIL lock.
                 */
                iip = INODE_ITEM(blip);
-               if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
+               if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
+                   lip->li_flags & XFS_LI_FAILED)
                        need_ail++;
 
                blip = next;
@@ -718,7 +753,8 @@ xfs_iflush_done(
 
        /* make sure we capture the state of the initial inode. */
        iip = INODE_ITEM(lip);
-       if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
+       if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
+           lip->li_flags & XFS_LI_FAILED)
                need_ail++;
 
        /*
@@ -739,6 +775,9 @@ xfs_iflush_done(
                        if (INODE_ITEM(blip)->ili_logged &&
                            blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
                                mlip_changed |= xfs_ail_delete_one(ailp, blip);
+                       else {
+                               xfs_clear_li_failed(blip);
+                       }
                }
 
                if (mlip_changed) {
index 442d679210a18198236a969026beeef43027e8c6..7d627721e4b31758d82fe5df68705733926052fb 100644 (file)
@@ -49,6 +49,7 @@ typedef struct xfs_log_item {
        struct xfs_ail                  *li_ailp;       /* ptr to AIL */
        uint                            li_type;        /* item type */
        uint                            li_flags;       /* misc flags */
+       struct xfs_buf                  *li_buf;        /* real buffer pointer */
        struct xfs_log_item             *li_bio_list;   /* buffer item list */
        void                            (*li_cb)(struct xfs_buf *,
                                                 struct xfs_log_item *);
index 9056c0f34a3c42d40a6acaba8a189a55bbf0cc97..70f5ab01732360450a3f1c95e23b74c829db2672 100644 (file)
@@ -687,12 +687,13 @@ xfs_trans_ail_update_bulk(
 bool
 xfs_ail_delete_one(
        struct xfs_ail          *ailp,
-       struct xfs_log_item     *lip)
+       struct xfs_log_item     *lip)
 {
        struct xfs_log_item     *mlip = xfs_ail_min(ailp);
 
        trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
        xfs_ail_delete(ailp, lip);
+       xfs_clear_li_failed(lip);
        lip->li_flags &= ~XFS_LI_IN_AIL;
        lip->li_lsn = 0;
 
index d91706c56c633a1eb4ed243cf182d2891200e24d..b317a3644c006817d7c6a2ccc3b0e08cc59e2a3f 100644 (file)
@@ -164,4 +164,35 @@ xfs_trans_ail_copy_lsn(
        *dst = *src;
 }
 #endif
+
+static inline void
+xfs_clear_li_failed(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_buf  *bp = lip->li_buf;
+
+       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+       lockdep_assert_held(&lip->li_ailp->xa_lock);
+
+       if (lip->li_flags & XFS_LI_FAILED) {
+               lip->li_flags &= ~XFS_LI_FAILED;
+               lip->li_buf = NULL;
+               xfs_buf_rele(bp);
+       }
+}
+
+static inline void
+xfs_set_li_failed(
+       struct xfs_log_item     *lip,
+       struct xfs_buf          *bp)
+{
+       lockdep_assert_held(&lip->li_ailp->xa_lock);
+
+       if (!(lip->li_flags & XFS_LI_FAILED)) {
+               xfs_buf_hold(bp);
+               lip->li_flags |= XFS_LI_FAILED;
+               lip->li_buf = bp;
+       }
+}
+
 #endif /* __XFS_TRANS_PRIV_H__ */