staging: lustre: fid: do open-by-fid by default
authorLai Siyao <lai.siyao@intel.com>
Tue, 16 Aug 2016 20:19:16 +0000 (16:19 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 21 Aug 2016 13:57:38 +0000 (15:57 +0200)
Currently client open-by-fid often packs name into the request,
but the name may be invalid, eg. NFS export, and even if it's
valid, it may cause inconsistency because this operation is done
on this fid, which is globally unique, but name not.

Since open-by-fid doesn't pack name, for striped dir we can't know
parent stripe fid on client, so we set parent fid the same as
child fid, and MDT has to find its parent fid from linkea (this is
already supported by MDT).

M_CHECK_STALE becomes obsolete.

Unset MDS_OPEN_FL_INTERNAL from open syscall flags, because these
flags are internally used, and should not be set from user space.

It's not necessary to store parent fid in lli_pfid, because MDT
can get it's parent fid from linkea, and now that DNE stripe
directory stores master inode fid in lli_pfid, stop storing parent
fid to avoid conflict.

Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3544
Reviewed-on: http://review.whamcloud.com/7476
Reviewed-on: http://review.whamcloud.com/10692
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
13 files changed:
drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
drivers/staging/lustre/lustre/include/lustre_lite.h
drivers/staging/lustre/lustre/include/lustre_mds.h
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/llite/llite_nfs.c
drivers/staging/lustre/lustre/llite/namei.c
drivers/staging/lustre/lustre/lmv/lmv_intent.c
drivers/staging/lustre/lustre/mdc/mdc_internal.h
drivers/staging/lustre/lustre/mdc/mdc_lib.c
drivers/staging/lustre/lustre/mdc/mdc_locks.c
drivers/staging/lustre/lustre/obdclass/lprocfs_status.c

index 400ab3cd5877e46107d19bc0f8f872dc4f5538d7..a9661c0c6aa1a8c8aead05537ffef64054e7cb84 100644 (file)
@@ -2252,6 +2252,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
                                              */
 #define MDS_OPEN_RELEASE   02000000000000ULL /* Open the file for HSM release */
 
+#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |    \
+                             MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |  \
+                             MDS_OPEN_BY_FID | MDS_OPEN_LEASE |        \
+                             MDS_OPEN_RELEASE)
+
 enum mds_op_bias {
        MDS_CHECK_SPLIT         = 1 << 0,
        MDS_CROSS_REF           = 1 << 1,
index b16897702559ad006cc1b82891adf3a006a06b67..a3d757348ec661bcd6b2934584825cfe55bf46b2 100644 (file)
@@ -42,7 +42,6 @@
 
 #include "obd_class.h"
 #include "lustre_net.h"
-#include "lustre_mds.h"
 #include "lustre_ha.h"
 
 /* 4UL * 1024 * 1024 */
index 4104bd9bd5c4dcb0f8d2d140a80ead0e42ed8986..23a7e4f78e9ab29eff0b51f91e497bcc85e5b33a 100644 (file)
@@ -58,9 +58,6 @@ struct mds_group_info {
 #define MDD_OBD_NAME     "mdd_obd"
 #define MDD_OBD_UUID     "mdd_obd_uuid"
 
-/* these are local flags, used only on the client, private */
-#define M_CHECK_STALE     0200000000
-
 /** @} mds */
 
 #endif
index 563cdf65324e5c4da81c7f1d7eecfc537927a13d..015b0ab23bbd01909662a866111f815e9a2edc23 100644 (file)
@@ -379,53 +379,35 @@ int ll_file_release(struct inode *inode, struct file *file)
        return rc;
 }
 
-static int ll_intent_file_open(struct dentry *dentry, void *lmm,
-                              int lmmsize, struct lookup_intent *itp)
+static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
+                              struct lookup_intent *itp)
 {
-       struct inode *inode = d_inode(dentry);
+       struct inode *inode = d_inode(de);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
-       struct dentry *parent = dentry->d_parent;
-       const char *name = dentry->d_name.name;
-       const int len = dentry->d_name.len;
+       struct dentry *parent = de->d_parent;
+       const char *name = NULL;
        struct md_op_data *op_data;
        struct ptlrpc_request *req;
-       __u32 opc = LUSTRE_OPC_ANY;
-       int rc;
+       int len = 0, rc;
 
-       /* Usually we come here only for NFSD, and we want open lock. */
-       /* We can also get here if there was cached open handle in revalidate_it
-        * but it disappeared while we were getting from there to ll_file_open.
-        * But this means this file was closed and immediately opened which
-        * makes a good candidate for using OPEN lock
-        */
-       /* If lmmsize & lmm are not 0, we are just setting stripe info
-        * parameters. No need for the open lock
+       LASSERT(parent);
+       LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
+
+       /*
+        * if server supports open-by-fid, or file name is invalid, don't pack
+        * name in open request
         */
-       if (!lmm && lmmsize == 0) {
-               struct ll_dentry_data *ldd = ll_d2d(dentry);
-               /*
-                * If we came via ll_iget_for_nfs, then we need to request
-                * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
-                *
-                * NB: when ldd is NULL, it must have come via normal
-                * lookup path only, since ll_iget_for_nfs always calls
-                * ll_d_init().
-                */
-               if (ldd && ldd->lld_nfs_dentry) {
-                       ldd->lld_nfs_dentry = 0;
-                       itp->it_flags |= MDS_OPEN_LOCK;
-               }
-               if (itp->it_flags & FMODE_WRITE)
-                       opc = LUSTRE_OPC_CREATE;
+       if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
+           lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
+               name = de->d_name.name;
+               len = de->d_name.len;
        }
 
-       op_data  = ll_prep_md_op_data(NULL, d_inode(parent),
-                                     inode, name, len,
-                                     O_RDWR, opc, NULL);
+       op_data  = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
+                                     O_RDWR, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                return PTR_ERR(op_data);
 
-       itp->it_flags |= MDS_OPEN_BY_FID;
        rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
                            0 /*unused */, &req, ll_md_blocking_ast, 0);
        ll_finish_md_op_data(op_data);
@@ -655,9 +637,19 @@ restart:
                         * result in a deadlock
                         */
                        mutex_unlock(&lli->lli_och_mutex);
-                       it->it_create_mode |= M_CHECK_STALE;
+                       /*
+                        * Normally called under two situations:
+                        * 1. NFS export.
+                        * 2. revalidate with IT_OPEN (revalidate doesn't
+                        *    execute this intent any more).
+                        *
+                        * Always fetch MDS_OPEN_LOCK if this is not setstripe.
+                        *
+                        * Always specify MDS_OPEN_BY_FID because we don't want
+                        * to get file with different fid.
+                        */
+                       it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
                        rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
-                       it->it_create_mode &= ~M_CHECK_STALE;
                        if (rc)
                                goto out_openerr;
 
@@ -1399,6 +1391,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
        }
 
        ll_inode_size_lock(inode);
+       oit.it_flags |= MDS_OPEN_BY_FID;
        rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
        if (rc)
                goto out_unlock;
@@ -3066,7 +3059,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
                if (IS_ERR(op_data))
                        return PTR_ERR(op_data);
 
-               oit.it_create_mode |= M_CHECK_STALE;
                rc = md_intent_lock(exp, op_data, NULL, 0,
                                    /* we are not interested in name
                                     * based lookup
@@ -3074,7 +3066,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
                                    &oit, 0, &req,
                                    ll_md_blocking_ast, 0);
                ll_finish_md_op_data(op_data);
-               oit.it_create_mode &= ~M_CHECK_STALE;
                if (rc < 0) {
                        rc = ll_inode_revalidate_fini(inode, rc);
                        goto out;
index 43269aaa85fb61d695e8fbceba9acf338a4a1aad..b4e843a941b1ffc75ba1f8eabfdf9992e245712b 100644 (file)
@@ -118,9 +118,7 @@ struct ll_inode_info {
 
        /* identifying fields for both metadata and data stacks. */
        struct lu_fid              lli_fid;
-       /* Parent fid for accessing default stripe data on parent directory
-        * for allocating OST objects after a mknod() and later open-by-FID.
-        */
+       /* master inode fid for stripe directory */
        struct lu_fid              lli_pfid;
 
        struct list_head              lli_close_list;
index 5f6343acc2270a246e18e7aed6c1554b8f97a449..da00fbd30721b7653568e9bd03bb4940c802b747 100644 (file)
@@ -189,7 +189,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                  OBD_CONNECT_PINGLESS |
                                  OBD_CONNECT_MAX_EASIZE |
                                  OBD_CONNECT_FLOCK_DEAD |
-                                 OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK;
+                                 OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
+                                 OBD_CONNECT_OPEN_BY_FID;
 
        if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
                data->ocd_connect_flags |= OBD_CONNECT_SOM;
@@ -2364,20 +2365,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
        op_data->op_mds = 0;
        op_data->op_data = data;
 
-       /* If the file is being opened after mknod() (normally due to NFS)
-        * try to use the default stripe data from parent directory for
-        * allocating OST objects.  Try to pass the parent FID to MDS.
-        */
-       if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
-           !ll_i2info(i2)->lli_has_smd) {
-               struct ll_inode_info *lli = ll_i2info(i2);
-
-               spin_lock(&lli->lli_lock);
-               if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
-                       op_data->op_fid1 = lli->lli_pfid;
-               spin_unlock(&lli->lli_lock);
-       }
-
        /* When called by ll_setattr_raw, file is i1. */
        if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
                op_data->op_bias |= MDS_DATA_MODIFIED;
index ac96d897ab50560303a5d9619dccf5eaacf92e1a..2b652407c239a510aec7199d24bcc86ee60629a7 100644 (file)
@@ -148,12 +148,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
                return ERR_PTR(-ESTALE);
        }
 
+       result = d_obtain_alias(inode);
+       if (IS_ERR(result)) {
+               iput(inode);
+               return result;
+       }
+
        /**
-        * It is an anonymous dentry without OST objects created yet.
-        * We have to find the parent to tell MDS how to init lov objects.
+        * In case d_obtain_alias() found a disconnected dentry, always update
+        * lli_pfid to allow later operation (normally open) have parent fid,
+        * which may be used by MDS to create data.
         */
-       if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd &&
-           parent && !fid_is_zero(parent)) {
+       if (parent) {
                struct ll_inode_info *lli = ll_i2info(inode);
 
                spin_lock(&lli->lli_lock);
index ac0f44229a7b3fc6a3917b4a8b5dbabb454a52f5..ee5a42e5e95df0fce89a45defcd6b61619737732 100644 (file)
@@ -650,6 +650,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
        }
        it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
        it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
+       it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
 
        /* Dentry added to dcache tree in ll_lookup_it */
        de = ll_lookup_it(dir, dentry, it, lookup_flags);
index 761ab248f855ae11f2e1d0b841601b0ce74e727a..cde1d7b6bfc218c89dab66942fda128a6ff4436d 100644 (file)
@@ -111,10 +111,6 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
                 */
                LASSERT(it->it_op & IT_OPEN);
                op_data->op_fid2 = *parent_fid;
-               /* Add object FID to op_fid3, in case it needs to check stale
-                * (M_CHECK_STALE), see mdc_finish_intent_lock
-                */
-               op_data->op_fid3 = body->mbo_fid1;
        }
 
        op_data->op_bias = MDS_CROSS_REF;
@@ -313,17 +309,16 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
        struct mdt_body         *body;
        int                     rc;
 
-       if (it->it_flags & MDS_OPEN_BY_FID && fid_is_sane(&op_data->op_fid2)) {
-               if (op_data->op_mea1) {
-                       struct lmv_stripe_md *lsm = op_data->op_mea1;
-                       const struct lmv_oinfo *oinfo;
+       if (it->it_flags & MDS_OPEN_BY_FID) {
+               LASSERT(fid_is_sane(&op_data->op_fid2));
 
-                       oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name,
-                                                       op_data->op_namelen);
-                       if (IS_ERR(oinfo))
-                               return PTR_ERR(oinfo);
-                       op_data->op_fid1 = oinfo->lmo_fid;
-               }
+               /*
+                * for striped directory, we can't know parent stripe fid
+                * without name, but we can set it to child fid, and MDT
+                * will obtain it from linkea in open in such case.
+                */
+               if (op_data->op_mea1)
+                       op_data->op_fid1 = op_data->op_fid2;
 
                tgt = lmv_find_target(lmv, &op_data->op_fid2);
                if (IS_ERR(tgt))
@@ -331,6 +326,10 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 
                op_data->op_mds = tgt->ltd_idx;
        } else {
+               LASSERT(fid_is_sane(&op_data->op_fid1));
+               LASSERT(fid_is_zero(&op_data->op_fid2));
+               LASSERT(op_data->op_name);
+
                tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
                if (IS_ERR(tgt))
                        return PTR_ERR(tgt);
@@ -339,13 +338,11 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
        /* If it is ready to open the file by FID, do not need
         * allocate FID at all, otherwise it will confuse MDT
         */
-       if ((it->it_op & IT_CREAT) &&
-           !(it->it_flags & MDS_OPEN_BY_FID)) {
+       if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
                /*
-                * For open with IT_CREATE and for IT_CREATE cases allocate new
-                * fid and setup FLD for it.
+                * For lookup(IT_CREATE) cases allocate new fid and setup FLD
+                * for it.
                 */
-               op_data->op_fid3 = op_data->op_fid2;
                rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
                if (rc != 0)
                        return rc;
@@ -494,9 +491,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 
        LASSERT(fid_is_sane(&op_data->op_fid1));
 
-       CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
-              LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
-              PFID(&op_data->op_fid1));
+       CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%*s' on "DFID"\n",
+              LL_IT2STR(it), PFID(&op_data->op_fid2), op_data->op_namelen,
+              op_data->op_name, PFID(&op_data->op_fid1));
 
        rc = lmv_check_connect(obd);
        if (rc)
index 00e8435b938f1e83deefba1b9d57e433512df0ee..1901b933952694f6dbe669108f4c6ccaeb0de8c5 100644 (file)
@@ -34,7 +34,6 @@
 #define _MDC_INTERNAL_H
 
 #include "../include/lustre_mdc.h"
-#include "../include/lustre_mds.h"
 
 void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
 
index 813f923b578593be1f9d67c527f95271adae1257..aa496f3381db4f5a3948659d1e65cde66b1ac541 100644 (file)
@@ -171,10 +171,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
 {
        __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
-                                  MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
-                                  MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
-                                  MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
-                                  MDS_OPEN_RELEASE));
+                                  MDS_OPEN_FL_INTERNAL));
        if (flags & O_CREAT)
                cr_flags |= MDS_OPEN_CREAT;
        if (flags & O_EXCL)
index fab83ddeef650b8d50a2acba609fbfe9ffe52ebd..1c3b78d4dd4c065153c3b547c11f0add26bc5a3e 100644 (file)
@@ -922,27 +922,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
        mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
        LASSERT(mdt_body);      /* mdc_enqueue checked */
 
-       /* If we were revalidating a fid/name pair, mark the intent in
-        * case we fail and get called again from lookup
-        */
-       if (fid_is_sane(&op_data->op_fid2) &&
-           it->it_create_mode & M_CHECK_STALE &&
-           it->it_op != IT_GETATTR) {
-               /* Also: did we find the same inode? */
-               /* sever can return one of two fids:
-                * op_fid2 - new allocated fid - if file is created.
-                * op_fid3 - existent fid - if file only open.
-                * op_fid3 is saved in lmv_intent_open
-                */
-               if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->mbo_fid1)) &&
-                   (!lu_fid_eq(&op_data->op_fid3, &mdt_body->mbo_fid1))) {
-                       CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
-                              "\n", PFID(&op_data->op_fid2),
-                              PFID(&op_data->op_fid2), PFID(&mdt_body->mbo_fid1));
-                       return -ESTALE;
-               }
-       }
-
        rc = it_open_error(DISP_LOOKUP_EXECD, it);
        if (rc)
                return rc;
index f42ed17996d736add2ddfbe3cb67efeb2679b0d4..fbb08517d57a02c36b74c043a051399fcd32a893 100644 (file)
@@ -96,7 +96,7 @@ static const char * const obd_connect_names[] = {
        "pingless",
        "flock_deadlock",
        "disp_stripe",
-       "unknown",
+       "open_by_fid",
        "lfsck",
        "unknown",
        NULL