xfs: remove "no-allocation" reservations for file creations
authorChristoph Hellwig <hch@lst.de>
Tue, 5 Dec 2017 01:32:55 +0000 (17:32 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Sat, 9 Dec 2017 01:51:05 +0000 (17:51 -0800)
If we create a new file we will need an inode, and usually some metadata
in the parent direction.  Aiming for everything to go well despite the
lack of a reservation leads to dirty transactions cancelled under a heavy
create/delete load.  This patch removes those nospace transactions, which
will lead to slightly earlier ENOSPC on some workloads, but instead
prevent file system shutdowns due to cancelling dirty transactions for
others.

A customer could observe assertations failures and shutdowns due to
cancelation of dirty transactions during heavy NFS workloads as shown
below:

2017-05-30 21:17:06 kernel: WARNING: [ 2670.728125] XFS: Assertion failed: error != -ENOSPC, file: fs/xfs/xfs_inode.c, line: 1262

2017-05-30 21:17:06 kernel: WARNING: [ 2670.728222] Call Trace:
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728246]  [<ffffffff81795daf>] dump_stack+0x63/0x81
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728262]  [<ffffffff810a1a5a>] warn_slowpath_common+0x8a/0xc0
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728264]  [<ffffffff810a1b8a>] warn_slowpath_null+0x1a/0x20
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728285]  [<ffffffffa01bf403>] asswarn+0x33/0x40 [xfs]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728308]  [<ffffffffa01bb07e>] xfs_create+0x7be/0x7d0 [xfs]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728329]  [<ffffffffa01b6ffb>] xfs_generic_create+0x1fb/0x2e0 [xfs]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728348]  [<ffffffffa01b7114>] xfs_vn_mknod+0x14/0x20 [xfs]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728366]  [<ffffffffa01b7153>] xfs_vn_create+0x13/0x20 [xfs]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728380]  [<ffffffff81231de5>] vfs_create+0xd5/0x140
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728390]  [<ffffffffa045ddb9>] do_nfsd_create+0x499/0x610 [nfsd]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728396]  [<ffffffffa0465fa5>] nfsd3_proc_create+0x135/0x210 [nfsd]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728401]  [<ffffffffa04561e3>] nfsd_dispatch+0xc3/0x210 [nfsd]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728416]  [<ffffffffa03bfa43>] svc_process_common+0x453/0x6f0 [sunrpc]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728423]  [<ffffffffa03bfdf3>] svc_process+0x113/0x1f0 [sunrpc]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728427]  [<ffffffffa0455bcf>] nfsd+0x10f/0x180 [nfsd]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728432]  [<ffffffffa0455ac0>] ? nfsd_destroy+0x80/0x80 [nfsd]
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728438]  [<ffffffff810c0d58>] kthread+0xd8/0xf0
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728441]  [<ffffffff810c0c80>] ? kthread_create_on_node+0x1b0/0x1b0
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728451]  [<ffffffff8179d962>] ret_from_fork+0x42/0x70
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728453]  [<ffffffff810c0c80>] ? kthread_create_on_node+0x1b0/0x1b0
2017-05-30 21:17:06 kernel: WARNING: [ 2670.728454] ---[ end trace f9822c842fec81d4 ]---

2017-05-30 21:17:06 kernel: ALERT: [ 2670.728477] XFS (sdb): Internal error xfs_trans_cancel at line 983 of file fs/xfs/xfs_trans.c.  Caller xfs_create+0x4ee/0x7d0 [xfs]

2017-05-30 21:17:06 kernel: ALERT: [ 2670.728684] XFS (sdb): Corruption of in-memory data detected. Shutting down filesystem
2017-05-30 21:17:06 kernel: ALERT: [ 2670.728685] XFS (sdb): Please umount the filesystem and rectify the problem(s)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_qm.c
fs/xfs/xfs_symlink.c

index de3f04a986565d7265fc694b80962000de8d7dc8..3b57ef0f2f76c758e6a9c8b89b7a0c470cdd09a9 100644 (file)
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
 xfs_ialloc_ag_select(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_ino_t       parent,         /* parent directory inode number */
-       umode_t         mode,           /* bits set to indicate file type */
-       int             okalloc)        /* ok to allocate more space */
+       umode_t         mode)           /* bits set to indicate file type */
 {
        xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
        xfs_agnumber_t  agno;           /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
                        return agno;
                }
 
-               if (!okalloc)
-                       goto nextag;
-
                if (!pag->pagf_init) {
                        error = xfs_alloc_pagf_init(mp, tp, agno, flags);
                        if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
        struct xfs_trans        *tp,
        xfs_ino_t               parent,
        umode_t                 mode,
-       int                     okalloc,
        struct xfs_buf          **IO_agbp,
        xfs_ino_t               *inop)
 {
@@ -1692,6 +1687,7 @@ xfs_dialloc(
        int                     noroom = 0;
        xfs_agnumber_t          start_agno;
        struct xfs_perag        *pag;
+       int                     okalloc = 1;
 
        if (*IO_agbp) {
                /*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
         * We do not have an agbp, so select an initial allocation
         * group for inode allocation.
         */
-       start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+       start_agno = xfs_ialloc_ag_select(tp, parent, mode);
        if (start_agno == NULLAGNUMBER) {
                *inop = NULLFSINO;
                return 0;
index d2bdcd5e7312e499deb91b29bc62cb91cc35e881..66a8de0b1caaad8d1ba9d5ed94fccc813780dca3 100644 (file)
@@ -81,7 +81,6 @@ xfs_dialloc(
        struct xfs_trans *tp,           /* transaction pointer */
        xfs_ino_t       parent,         /* parent inode (directory) */
        umode_t         mode,           /* mode bits for new inode */
-       int             okalloc,        /* ok to allocate more space */
        struct xfs_buf  **agbp,         /* buf for a.g. inode header */
        xfs_ino_t       *inop);         /* inode number allocated */
 
index 8012741266488ab4e0724b68aadb2742c1d29c2c..b41952a4ddd851fe63a475be4f2b6bc8d7e47beb 100644 (file)
@@ -749,7 +749,6 @@ xfs_ialloc(
        xfs_nlink_t     nlink,
        dev_t           rdev,
        prid_t          prid,
-       int             okalloc,
        xfs_buf_t       **ialloc_context,
        xfs_inode_t     **ipp)
 {
@@ -765,7 +764,7 @@ xfs_ialloc(
         * Call the space management code to pick
         * the on-disk inode to be allocated.
         */
-       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
                            ialloc_context, &ino);
        if (error)
                return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
        xfs_nlink_t     nlink,
        dev_t           rdev,
        prid_t          prid,           /* project id */
-       int             okalloc,        /* ok to allocate new space */
        xfs_inode_t     **ipp,          /* pointer to inode; it will be
                                           locked. */
        int             *committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
         * transaction commit so that no other process can steal
         * the inode(s) that we've just allocated.
         */
-       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
-                         &ialloc_context, &ip);
+       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
+                       &ip);
 
        /*
         * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
                 * this call should always succeed.
                 */
                code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-                                 okalloc, &ialloc_context, &ip);
+                                 &ialloc_context, &ip);
 
                /*
                 * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
                xfs_flush_inodes(mp);
                error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
        }
-       if (error == -ENOSPC) {
-               /* No space at all so try a "no-allocation" reservation */
-               resblks = 0;
-               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-       }
        if (error)
                goto out_release_inode;
 
@@ -1203,19 +1196,13 @@ xfs_create(
        if (error)
                goto out_trans_cancel;
 
-       if (!resblks) {
-               error = xfs_dir_canenter(tp, dp, name);
-               if (error)
-                       goto out_trans_cancel;
-       }
-
        /*
         * A newly created regular or special file just has one directory
         * entry pointing to them, but a directory also the "." entry
         * pointing to itself.
         */
-       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
-                              prid, resblks > 0, &ip, NULL);
+       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
+                       NULL);
        if (error)
                goto out_trans_cancel;
 
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
        tres = &M_RES(mp)->tr_create_tmpfile;
 
        error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
-       if (error == -ENOSPC) {
-               /* No space at all so try a "no-allocation" reservation */
-               resblks = 0;
-               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-       }
        if (error)
                goto out_release_inode;
 
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
        if (error)
                goto out_trans_cancel;
 
-       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
-                               prid, resblks > 0, &ip, NULL);
+       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
        if (error)
                goto out_trans_cancel;
 
index cc13c37637217e74e4c9425b34a710a5fd55e090..b2136af9289f3d854f88a549ec32efa455cfc78b 100644 (file)
@@ -428,7 +428,7 @@ xfs_extlen_t        xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t   xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
 int            xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-                              xfs_nlink_t, dev_t, prid_t, int,
+                              xfs_nlink_t, dev_t, prid_t,
                               struct xfs_inode **, int *);
 
 /* from xfs_file.c */
index 010a13a201aad78382ae69fdcc8e3b8d5c451c1d..ec952dfad359f6ad08d33d234f0cd75200c5933b 100644 (file)
@@ -793,8 +793,8 @@ xfs_qm_qino_alloc(
                return error;
 
        if (need_alloc) {
-               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
-                                                               &committed);
+               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
+                               &committed);
                if (error) {
                        xfs_trans_cancel(tp);
                        return error;
index 68d3ca2c4968054646345dab2bd3a76f97490fb8..2e9e793a8f9dfa18e87078bce5133860d4de6d4d 100644 (file)
@@ -232,11 +232,6 @@ xfs_symlink(
        resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
 
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
-       if (error == -ENOSPC && fs_blocks == 0) {
-               resblks = 0;
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
-                               &tp);
-       }
        if (error)
                goto out_release_inode;
 
@@ -259,14 +254,6 @@ xfs_symlink(
        if (error)
                goto out_trans_cancel;
 
-       /*
-        * Check for ability to enter directory entry, if no space reserved.
-        */
-       if (!resblks) {
-               error = xfs_dir_canenter(tp, dp, link_name);
-               if (error)
-                       goto out_trans_cancel;
-       }
        /*
         * Initialize the bmap freelist prior to calling either
         * bmapi or the directory create code.
@@ -277,7 +264,7 @@ xfs_symlink(
         * Allocate an inode for the symlink.
         */
        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                              prid, resblks > 0, &ip, NULL);
+                              prid, &ip, NULL);
        if (error)
                goto out_trans_cancel;