xfs: make inode flush at ENOSPC synchronous
authorDave Chinner <david@fromorbit.com>
Mon, 6 Apr 2009 16:45:44 +0000 (18:45 +0200)
committerChristoph Hellwig <hch@brick.lst.de>
Mon, 6 Apr 2009 16:45:44 +0000 (18:45 +0200)
When we are writing to a single file and hit ENOSPC, we trigger a background
flush of the inode and try again.  Because we hold page locks and the iolock,
the flush won't proceed until after we release these locks. This occurs once
we've given up and ENOSPC has been reported. Hence if this one is the only
dirty inode in the system, we'll get an ENOSPC prematurely.

To fix this, remove the async flush from the allocation routines and move
it to the top of the write path where we can do a synchronous flush
and retry the write again. Only retry once as a second ENOSPC indicates
that we really are ENOSPC.

This avoids a page cache deadlock when trying to do this flush synchronously
in the allocation layer that was identified by Mikulas Patocka.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/linux-2.6/xfs_lrw.c
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/linux-2.6/xfs_sync.h
fs/xfs/xfs_iomap.c

index 7e90daa0d1d1bfbc6435b3d161fbd73baf36bc85..9142192ccbe692a678793ba3cb3d69058d1ed717 100644 (file)
@@ -751,10 +751,26 @@ start:
                        goto relock;
                }
        } else {
+               int enospc = 0;
+               ssize_t ret2 = 0;
+
+write_retry:
                xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
                                *offset, ioflags);
-               ret = generic_file_buffered_write(iocb, iovp, segs,
+               ret2 = generic_file_buffered_write(iocb, iovp, segs,
                                pos, offset, count, ret);
+               /*
+                * if we just got an ENOSPC, flush the inode now we
+                * aren't holding any page locks and retry *once*
+                */
+               if (ret2 == -ENOSPC && !enospc) {
+                       error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
+                       if (error)
+                               goto out_unlock_internal;
+                       enospc = 1;
+                       goto write_retry;
+               }
+               ret = ret2;
        }
 
        current->backing_dev_info = NULL;
index 88caafc8ef1b35e9eb471387d3c263c4db12eaac..73cf8dc197389b75f19c68af1375fd5a16709c96 100644 (file)
@@ -426,31 +426,6 @@ xfs_syncd_queue_work(
  * heads, looking about for more room...
  */
 STATIC void
-xfs_flush_inode_work(
-       struct xfs_mount *mp,
-       void            *arg)
-{
-       struct inode    *inode = arg;
-       filemap_flush(inode->i_mapping);
-       iput(inode);
-}
-
-void
-xfs_flush_inode(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
-       delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
 xfs_flush_inodes_work(
        struct xfs_mount *mp,
        void            *arg)
index ec95e264805bf25f733e3ae4b9e59c866aa083c8..6e83a35626ed3e86656a2483ca61c8e49502f159 100644 (file)
@@ -44,7 +44,6 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 int xfs_quiesce_data(struct xfs_mount *mp);
 void xfs_quiesce_attr(struct xfs_mount *mp);
 
-void xfs_flush_inode(struct xfs_inode *ip);
 void xfs_flush_inodes(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
index 8b97d82d7a883d0f1e942705e35fa3a82063acb6..7b8b17071030701e15138de26fa8b81cf6cdd59b 100644 (file)
@@ -347,7 +347,7 @@ xfs_flush_space(
        case 0:
                if (ip->i_delayed_blks) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       xfs_flush_inode(ip);
+                       delay(1);
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
                        *fsynced = 1;
                } else {