[PATCH] ext3: remove unnecessary race then retry in ext3_get_block
authorMingming Cao <cmm@us.ibm.com>
Sun, 1 May 2005 15:59:20 +0000 (08:59 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Sun, 1 May 2005 15:59:20 +0000 (08:59 -0700)
The extra race-with-truncate-then-retry logic around
ext3_get_block_handle(), which was inherited from ext2, becomes unecessary
for ext3, since we have already obtained the ei->truncate_sem in
ext3_get_block_handle() before calling ext3_alloc_branch().  The
ei->truncate_sem is already there to block concurrent truncate and block
allocation on the same inode.  So the inode's indirect addressing tree
won't be changed after we grab that semaphore.

We could, after get the semaphore, re-verify the branch is up-to-date or
not.  If it has been changed, then get the updated branch.  If we still
need block allocation, we will have a safe version of the branch to work
with in the ext3_find_goal()/ext3_splice_branch().

The code becomes more readable after remove those retry logic.  The patch
also clean up some gotos in ext3_get_block_handle() to make it more
readable.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
fs/ext3/inode.c

index 040eb288bb1cb7d6965ac4cdf029ca7ad57a99db..ea5888688f9471735395537eeab09c31472fc21f 100644 (file)
@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
  *     @goal:  place to store the result.
  *
  *     Normally this function find the prefered place for block allocation,
- *     stores it in *@goal and returns zero. If the branch had been changed
- *     under us we return -EAGAIN.
+ *     stores it in *@goal and returns zero.
  */
 
-static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
-                         Indirect *partial, unsigned long *goal)
+static unsigned long ext3_find_goal(struct inode *inode, long block,
+               Indirect chain[4], Indirect *partial)
 {
        struct ext3_block_alloc_info *block_i =  EXT3_I(inode)->i_block_alloc_info;
 
@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
         */
        if (block_i && (block == block_i->last_alloc_logical_block + 1)
                && (block_i->last_alloc_physical_block != 0)) {
-               *goal = block_i->last_alloc_physical_block + 1;
-               return 0;
+               return block_i->last_alloc_physical_block + 1;
        }
 
-       if (verify_chain(chain, partial)) {
-               *goal = ext3_find_near(inode, partial);
-               return 0;
-       }
-       return -EAGAIN;
+       return ext3_find_near(inode, partial);
 }
 
 /**
@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  *     @where: location of missing link
  *     @num:   number of blocks we are adding
  *
- *     This function verifies that chain (up to the missing link) had not
- *     changed, fills the missing link and does all housekeeping needed in
+ *     This function fills the missing link and does all housekeeping needed in
  *     inode (->i_blocks, etc.). In case of success we end up with the full
- *     chain to new block and return 0. Otherwise (== chain had been changed)
- *     we free the new blocks (forgetting their buffer_heads, indeed) and
- *     return -EAGAIN.
+ *     chain to new block and return 0.
  */
 
 static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
                if (err)
                        goto err_out;
        }
-       /* Verify that place we are splicing to is still there and vacant */
-
-       if (!verify_chain(chain, where-1) || *where->p)
-               /* Writer: end */
-               goto changed;
-
        /* That's it */
 
        *where->p = where->key;
@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
        }
        return err;
 
-changed:
-       /*
-        * AKPM: if where[i].bh isn't part of the current updating
-        * transaction then we explode nastily.  Test this code path.
-        */
-       jbd_debug(1, "the chain changed: try again\n");
-       err = -EAGAIN;
-
 err_out:
        for (i = 1; i < num; i++) {
                BUFFER_TRACE(where[i].bh, "call journal_forget");
                ext3_journal_forget(handle, where[i].bh);
        }
-       /* For the normal collision cleanup case, we free up the blocks.
-        * On genuine filesystem errors we don't even think about doing
-        * that. */
-       if (err == -EAGAIN)
-               for (i = 0; i < num; i++)
-                       ext3_free_blocks(handle, inode, 
-                                        le32_to_cpu(where[i].key), 1);
        return err;
 }
 
@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
        unsigned long goal;
        int left;
        int boundary = 0;
-       int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
+       const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
        struct ext3_inode_info *ei = EXT3_I(inode);
 
        J_ASSERT(handle != NULL || create == 0);
@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
        if (depth == 0)
                goto out;
 
-reread:
        partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 
        /* Simplest case - block found, no allocation needed */
        if (!partial) {
                clear_buffer_new(bh_result);
-got_it:
-               map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
-               if (boundary)
-                       set_buffer_boundary(bh_result);
-               /* Clean up and exit */
-               partial = chain+depth-1; /* the whole chain */
-               goto cleanup;
+               goto got_it;
        }
 
        /* Next simple case - plain lookup or failed read of indirect block */
-       if (!create || err == -EIO) {
-cleanup:
+       if (!create || err == -EIO)
+               goto cleanup;
+
+       down(&ei->truncate_sem);
+
+       /*
+        * If the indirect block is missing while we are reading
+        * the chain(ext3_get_branch() returns -EAGAIN err), or
+        * if the chain has been changed after we grab the semaphore,
+        * (either because another process truncated this branch, or
+        * another get_block allocated this branch) re-grab the chain to see if
+        * the request block has been allocated or not.
+        *
+        * Since we already block the truncate/other get_block
+        * at this point, we will have the current copy of the chain when we
+        * splice the branch into the tree.
+        */
+       if (err == -EAGAIN || !verify_chain(chain, partial)) {
                while (partial > chain) {
-                       BUFFER_TRACE(partial->bh, "call brelse");
                        brelse(partial->bh);
                        partial--;
                }
-               BUFFER_TRACE(bh_result, "returned");
-out:
-               return err;
+               partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+               if (!partial) {
+                       up(&ei->truncate_sem);
+                       if (err)
+                               goto cleanup;
+                       clear_buffer_new(bh_result);
+                       goto got_it;
+               }
        }
 
        /*
-        * Indirect block might be removed by truncate while we were
-        * reading it. Handling of that case (forget what we've got and
-        * reread) is taken out of the main path.
-        */
-       if (err == -EAGAIN)
-               goto changed;
-
-       goal = 0;
-       down(&ei->truncate_sem);
-
-       /* lazy initialize the block allocation info here if necessary */
-       if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) {
+        * Okay, we need to do block allocation.  Lazily initialize the block
+        * allocation info here if necessary
+       */
+       if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
                ext3_init_block_alloc_info(inode);
-       }
 
-       if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) {
-               up(&ei->truncate_sem);
-               goto changed;
-       }
+       goal = ext3_find_goal(inode, iblock, chain, partial);
 
        left = (chain + depth) - partial;
 
@@ -771,38 +742,45 @@ out:
         * Block out ext3_truncate while we alter the tree
         */
        err = ext3_alloc_branch(handle, inode, left, goal,
-                                       offsets+(partial-chain), partial);
+                               offsets + (partial - chain), partial);
 
-       /* The ext3_splice_branch call will free and forget any buffers
+       /*
+        * The ext3_splice_branch call will free and forget any buffers
         * on the new chain if there is a failure, but that risks using
         * up transaction credits, especially for bitmaps where the
         * credits cannot be returned.  Can we handle this somehow?  We
-        * may need to return -EAGAIN upwards in the worst case.  --sct */
+        * may need to return -EAGAIN upwards in the worst case.  --sct
+        */
        if (!err)
                err = ext3_splice_branch(handle, inode, iblock, chain,
                                         partial, left);
-       /* i_disksize growing is protected by truncate_sem
-        * don't forget to protect it if you're about to implement
-        * concurrent ext3_get_block() -bzzz */
+       /*
+        * i_disksize growing is protected by truncate_sem.  Don't forget to
+        * protect it if you're about to implement concurrent
+        * ext3_get_block() -bzzz
+       */
        if (!err && extend_disksize && inode->i_size > ei->i_disksize)
                ei->i_disksize = inode->i_size;
        up(&ei->truncate_sem);
-       if (err == -EAGAIN)
-               goto changed;
        if (err)
                goto cleanup;
 
        set_buffer_new(bh_result);
-       goto got_it;
-
-changed:
+got_it:
+       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       if (boundary)
+               set_buffer_boundary(bh_result);
+       /* Clean up and exit */
+       partial = chain + depth - 1;    /* the whole chain */
+cleanup:
        while (partial > chain) {
-               jbd_debug(1, "buffer chain changed, retrying\n");
-               BUFFER_TRACE(partial->bh, "brelsing");
+               BUFFER_TRACE(partial->bh, "call brelse");
                brelse(partial->bh);
                partial--;
        }
-       goto reread;
+       BUFFER_TRACE(bh_result, "returned");
+out:
+       return err;
 }
 
 static int ext3_get_block(struct inode *inode, sector_t iblock,