locks: break delegations on unlink
authorJ. Bruce Fields <bfields@redhat.com>
Tue, 20 Sep 2011 13:14:34 +0000 (09:14 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Sat, 9 Nov 2013 05:16:42 +0000 (00:16 -0500)
We need to break delegations on any operation that changes the set of
links pointing to an inode.  Start with unlink.

Such operations also hold the i_mutex on a parent directory.  Breaking a
delegation may require waiting for a timeout (by default 90 seconds) in
the case of a unresponsive NFS client.  To avoid blocking all directory
operations, we therefore drop locks before waiting for the delegation.
The logic then looks like:

acquire locks
...
test for delegation; if found:
take reference on inode
release locks
wait for delegation break
drop reference on inode
retry

It is possible this could never terminate.  (Even if we take precautions
to prevent another delegation being acquired on the same inode, we could
get a different inode on each retry.)  But this seems very unlikely.

The initial test for a delegation happens after the lock on the target
inode is acquired, but the directory inode may have been acquired
further up the call stack.  We therefore add a "struct inode **"
argument to any intervening functions, which we use to pass the inode
back up to the caller in the case it needs a delegation synchronously
broken.

Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
drivers/base/devtmpfs.c
fs/cachefiles/namei.c
fs/ecryptfs/inode.c
fs/namei.c
fs/nfsd/vfs.c
include/linux/fs.h
ipc/mqueue.c

index 7413d065906bf828f8afb36aed45a9d94efe4f98..1b8490e2fbdeae0c62799a30cfd2c1848874d169 100644 (file)
@@ -324,7 +324,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                        mutex_lock(&dentry->d_inode->i_mutex);
                        notify_change(dentry, &newattrs);
                        mutex_unlock(&dentry->d_inode->i_mutex);
-                       err = vfs_unlink(parent.dentry->d_inode, dentry);
+                       err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
                }
index f4a08d7fa2f70a58a8513110988cc4928a674bdb..31d480c0e046ac6673ffff6dcafbf5f9574425db 100644 (file)
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                if (ret < 0) {
                        cachefiles_io_error(cache, "Unlink security error");
                } else {
-                       ret = vfs_unlink(dir->d_inode, rep);
+                       ret = vfs_unlink(dir->d_inode, rep, NULL);
 
                        if (preemptive)
                                cachefiles_mark_object_buried(cache, rep);
index 0f9b66eaa7677ce920d8488e5afd49fc684c4ac7..dc60b8bd09ecd6c782a4c924ddae02b2488579e2 100644 (file)
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
 
        dget(lower_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_unlink(lower_dir_inode, lower_dentry);
+       rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
        if (rc) {
                printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
                goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
        inode = __ecryptfs_get_inode(lower_dentry->d_inode,
                                     directory_inode->i_sb);
        if (IS_ERR(inode)) {
-               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
                goto out_lock;
        }
        fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
index e633a58d4222afab29a4a0cb422a3dd33c9fbb6f..67ce331a3ed875f64408e78750341c6243aaeb3e 100644 (file)
@@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
        return do_rmdir(AT_FDCWD, pathname);
 }
 
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:       parent directory
+ * @dentry:    victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
 {
        struct inode *target = dentry->d_inode;
        int error = may_delete(dir, dentry, 0);
@@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
        else {
                error = security_inode_unlink(dir, dentry);
                if (!error) {
+                       error = break_deleg(target, O_WRONLY|O_NONBLOCK);
+                       if (error) {
+                               if (error == -EWOULDBLOCK && delegated_inode) {
+                                       *delegated_inode = target;
+                                       ihold(target);
+                               }
+                               goto out;
+                       }
                        error = dir->i_op->unlink(dir, dentry);
                        if (!error)
                                dont_mount(dentry);
                }
        }
+out:
        mutex_unlock(&target->i_mutex);
 
        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
@@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
        struct dentry *dentry;
        struct nameidata nd;
        struct inode *inode = NULL;
+       struct inode *delegated_inode = NULL;
        unsigned int lookup_flags = 0;
 retry:
        name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3675,7 +3703,7 @@ retry:
        error = mnt_want_write(nd.path.mnt);
        if (error)
                goto exit1;
-
+retry_deleg:
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
        error = PTR_ERR(dentry);
@@ -3690,13 +3718,21 @@ retry:
                error = security_path_unlink(&nd.path, dentry);
                if (error)
                        goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+               error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
                dput(dentry);
        }
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        if (inode)
                iput(inode);    /* truncate the inode here */
+       inode = NULL;
+       if (delegated_inode) {
+               error = break_deleg(delegated_inode, O_WRONLY);
+               iput(delegated_inode);
+               delegated_inode = NULL;
+               if (!error)
+                       goto retry_deleg;
+       }
        mnt_drop_write(nd.path.mnt);
 exit1:
        path_put(&nd.path);
index 13886f7f40d5e8a868182936d3cda837492b7b61..7a810235d5994a69d3c395e9299a26a405bc3840 100644 (file)
@@ -1910,7 +1910,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (host_err)
                goto out_put;
        if (type != S_IFDIR)
-               host_err = vfs_unlink(dirp, rdentry);
+               host_err = vfs_unlink(dirp, rdentry, NULL);
        else
                host_err = vfs_rmdir(dirp, rdentry);
        if (!host_err)
index 8e4be1be1a6257238f99c4ebde0ff8ce26c29c87..a5799233142aafc67cbf03486d593d0bd6c0a0b5 100644 (file)
@@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *);
+extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 
 /*
index ae1996d3c5395ceef121c85704cce2a29297d322..95827ce2f3c78e76adfdef64278a227402356587 100644 (file)
@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
                err = -ENOENT;
        } else {
                ihold(inode);
-               err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+               err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
        }
        dput(dentry);