staging/lustre/ptlrpc: lost bulk leads to a hang
authorVitaly Fertman <vitaly.fertman@seagate.com>
Mon, 20 Jun 2016 20:55:34 +0000 (16:55 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 Jun 2016 21:28:39 +0000 (14:28 -0700)
The reverse order of request_out_callback() and reply_in_callback()
puts the RPC into UNREGISTERING state, which is waiting for RPC &
bulk md unlink, whereas only RPC md unlink has been called so far.
If bulk is lost, even expired_set does not check for UNREGISTERING
state.

The same for write if server returns an error.

This phase is ambiguous, split to UNREG_RPC and UNREG_BULK.

Signed-off-by: Vitaly Fertman <vitaly.fertman@seagate.com>
Seagate-bug-id:  MRP-2953, MRP-3206
Reviewed-by: Andriy Skulysh <andriy.skulysh@seagate.com>
Reviewed-by: Alexey Leonidovich Lyashkov <alexey.lyashkov@seagate.com>
Tested-by: Elena V. Gryaznova <elena.gryaznova@seagate.com>
Reviewed-on: http://review.whamcloud.com/19953
Reviewed-by: Chris Horn <hornc@cray.com>
Reviewed-by: Ann Koehler <amk@cray.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/lustre/lustre/include/lustre_net.h
drivers/staging/lustre/lustre/include/obd_support.h
drivers/staging/lustre/lustre/ptlrpc/client.c
drivers/staging/lustre/lustre/ptlrpc/import.c
drivers/staging/lustre/lustre/ptlrpc/niobuf.c

index 0fedef97b80bc70c524393f5ae4e888c444297a6..d5debd615fdf1c198c7b40b1091776577bfb28fb 100644 (file)
@@ -480,8 +480,9 @@ enum rq_phase {
        RQ_PHASE_BULK      = 0xebc0de02,
        RQ_PHASE_INTERPRET      = 0xebc0de03,
        RQ_PHASE_COMPLETE       = 0xebc0de04,
-       RQ_PHASE_UNREGISTERING  = 0xebc0de05,
-       RQ_PHASE_UNDEFINED      = 0xebc0de06
+       RQ_PHASE_UNREG_RPC      = 0xebc0de05,
+       RQ_PHASE_UNREG_BULK     = 0xebc0de06,
+       RQ_PHASE_UNDEFINED      = 0xebc0de07
 };
 
 /** Type of request interpreter call-back */
@@ -1263,6 +1264,8 @@ struct ptlrpc_cli_req {
        time_t                           cr_reply_deadline;
        /** when req bulk unlink must finish. */
        time_t                           cr_bulk_deadline;
+       /** when req unlink must finish. */
+       time_t                           cr_req_deadline;
        /** Portal to which this request would be sent */
        short                            cr_req_ptl;
        /** Portal where to wait for reply and where reply would be sent */
@@ -1318,6 +1321,7 @@ struct ptlrpc_cli_req {
 #define rq_real_sent           rq_cli.cr_sent_out
 #define rq_reply_deadline      rq_cli.cr_reply_deadline
 #define rq_bulk_deadline       rq_cli.cr_bulk_deadline
+#define rq_req_deadline                rq_cli.cr_req_deadline
 #define rq_nr_resend           rq_cli.cr_resend_nr
 #define rq_request_portal      rq_cli.cr_req_ptl
 #define rq_reply_portal                rq_cli.cr_rep_ptl
@@ -1692,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase)
                return "Interpret";
        case RQ_PHASE_COMPLETE:
                return "Complete";
-       case RQ_PHASE_UNREGISTERING:
-               return "Unregistering";
+       case RQ_PHASE_UNREG_RPC:
+               return "UnregRPC";
+       case RQ_PHASE_UNREG_BULK:
+               return "UnregBULK";
        default:
                return "?Phase?";
        }
@@ -1720,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 #define DEBUG_REQ_FLAGS(req)                                               \
        ptlrpc_rqphase2str(req),                                                \
        FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),                \
-       FLAG(req->rq_err, "E"),                                          \
+       FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"),                 \
        FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
        FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),            \
        FLAG(req->rq_no_resend, "N"),                                      \
@@ -1728,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
        FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),                  \
        FLAG(req->rq_committed, "M")
 
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
 
 void _debug_req(struct ptlrpc_request *req,
                struct libcfs_debug_msg_data *data, const char *fmt, ...)
@@ -2379,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
 
        desc = req->rq_bulk;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-           req->rq_bulk_deadline > ktime_get_real_seconds())
+       if (req->rq_bulk_deadline > ktime_get_real_seconds())
                return 1;
 
        if (!desc)
@@ -2727,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
        if (req->rq_phase == new_phase)
                return;
 
-       if (new_phase == RQ_PHASE_UNREGISTERING) {
+       if (new_phase == RQ_PHASE_UNREG_RPC ||
+           new_phase == RQ_PHASE_UNREG_BULK) {
+               /* No embedded unregistering phases */
+               if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+                   req->rq_phase == RQ_PHASE_UNREG_BULK)
+                       return;
+
                req->rq_next_phase = req->rq_phase;
                if (req->rq_import)
                        atomic_inc(&req->rq_import->imp_unregistering);
        }
 
-       if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+       if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+           req->rq_phase == RQ_PHASE_UNREG_BULK) {
                if (req->rq_import)
                        atomic_dec(&req->rq_import->imp_unregistering);
        }
@@ -2750,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 static inline int
 ptlrpc_client_early(struct ptlrpc_request *req)
 {
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           req->rq_reply_deadline > ktime_get_real_seconds())
-               return 0;
        return req->rq_early;
 }
 
@@ -2762,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_replied(struct ptlrpc_request *req)
 {
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           req->rq_reply_deadline > ktime_get_real_seconds())
+       if (req->rq_reply_deadline > ktime_get_real_seconds())
                return 0;
        return req->rq_replied;
 }
@@ -2772,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_recv(struct ptlrpc_request *req)
 {
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           req->rq_reply_deadline > ktime_get_real_seconds())
+       if (req->rq_reply_deadline > ktime_get_real_seconds())
                return 1;
        return req->rq_receiving_reply;
 }
@@ -2784,8 +2791,11 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
        int rc;
 
        spin_lock(&req->rq_lock);
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           req->rq_reply_deadline > ktime_get_real_seconds()) {
+       if (req->rq_reply_deadline > ktime_get_real_seconds()) {
+               spin_unlock(&req->rq_lock);
+               return 1;
+       }
+       if (req->rq_req_deadline > ktime_get_real_seconds()) {
                spin_unlock(&req->rq_lock);
                return 1;
        }
index cdf20d6470d74cd7a42e25e0ac771b5adb4fb119..845e64a56c21b9a7532e9d5158a3f3a37b7913b8 100644 (file)
@@ -364,6 +364,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2  0x515
 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL   0x516
 #define OBD_FAIL_PTLRPC_CANCEL_RESEND    0x517
+#define OBD_FAIL_PTLRPC_DROP_BULK       0x51a
+#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK         0x51b
+#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 
 #define OBD_FAIL_OBD_PING_NET      0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
index 5d832eb540d31704bd8941b3dc37f797ce674a8c..d4463d7c81d2938cf0ed1361cc129aafc0e386c8 100644 (file)
@@ -621,6 +621,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
        request->rq_reply_cbid.cbid_arg = request;
 
        request->rq_reply_deadline = 0;
+       request->rq_bulk_deadline = 0;
+       request->rq_req_deadline = 0;
        request->rq_phase = RQ_PHASE_NEW;
        request->rq_next_phase = RQ_PHASE_UNDEFINED;
 
@@ -632,6 +634,37 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
        request->rq_xid = ptlrpc_next_xid();
        lustre_msg_set_opc(request->rq_reqmsg, opcode);
 
+       /* Let's setup deadline for req/reply/bulk unlink for opcode. */
+       if (cfs_fail_val == opcode) {
+               time_t *fail_t = NULL, *fail2_t = NULL;
+
+               if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+                       fail_t = &request->rq_bulk_deadline;
+               } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+                       fail_t = &request->rq_reply_deadline;
+               } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
+                       fail_t = &request->rq_req_deadline;
+               } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
+                       fail_t = &request->rq_reply_deadline;
+                       fail2_t = &request->rq_bulk_deadline;
+               }
+
+               if (fail_t) {
+                       *fail_t = ktime_get_real_seconds() + LONG_UNLINK;
+
+                       if (fail2_t)
+                               *fail2_t = ktime_get_real_seconds() +
+                                                LONG_UNLINK;
+
+                       /* The RPC is infected, let the test change the
+                        * fail_loc
+                        */
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_timeout(cfs_time_seconds(2));
+                       set_current_state(TASK_RUNNING);
+               }
+       }
+
        return 0;
 
 out_ctx:
@@ -1481,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                if (!(req->rq_phase == RQ_PHASE_RPC ||
                      req->rq_phase == RQ_PHASE_BULK ||
                      req->rq_phase == RQ_PHASE_INTERPRET ||
-                     req->rq_phase == RQ_PHASE_UNREGISTERING ||
+                     req->rq_phase == RQ_PHASE_UNREG_RPC ||
+                     req->rq_phase == RQ_PHASE_UNREG_BULK ||
                      req->rq_phase == RQ_PHASE_COMPLETE)) {
                        DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
                        LBUG();
                }
 
-               if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+               if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+                   req->rq_phase == RQ_PHASE_UNREG_BULK) {
                        LASSERT(req->rq_next_phase != req->rq_phase);
                        LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
 
+                       if (req->rq_req_deadline &&
+                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
+                               req->rq_req_deadline = 0;
+                       if (req->rq_reply_deadline &&
+                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
+                               req->rq_reply_deadline = 0;
+                       if (req->rq_bulk_deadline &&
+                           !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
+                               req->rq_bulk_deadline = 0;
+
                        /*
                         * Skip processing until reply is unlinked. We
                         * can't return to pool before that and we can't
@@ -1498,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                         * sure that all rdma transfers finished and will
                         * not corrupt any data.
                         */
-                       if (ptlrpc_client_recv_or_unlink(req) ||
+                       if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
+                           ptlrpc_client_recv_or_unlink(req))
+                               continue;
+                       if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
                            ptlrpc_client_bulk_active(req))
                                continue;
 
@@ -1976,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data)
                        list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
                if (req->rq_phase != RQ_PHASE_RPC &&
-                   req->rq_phase != RQ_PHASE_UNREGISTERING)
+                   req->rq_phase != RQ_PHASE_UNREG_RPC)
                        continue;
 
                ptlrpc_mark_interrupted(req);
@@ -2288,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
        /* Let's setup deadline for reply unlink. */
        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           async && request->rq_reply_deadline == 0)
-               request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK;
+           async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
+               request->rq_reply_deadline =
+                       ktime_get_real_seconds() + LONG_UNLINK;
 
        /* Nothing left to do. */
        if (!ptlrpc_client_recv_or_unlink(request))
@@ -2302,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
                return 1;
 
        /* Move to "Unregistering" phase as reply was not unlinked yet. */
-       ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+       ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
 
        /* Do not wait for unlink to finish. */
        if (async)
@@ -2932,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
        req->rq_timeout         = obd_timeout;
        req->rq_sent            = ktime_get_real_seconds();
        req->rq_deadline        = req->rq_sent + req->rq_timeout;
-       req->rq_reply_deadline  = req->rq_deadline;
        req->rq_phase           = RQ_PHASE_INTERPRET;
        req->rq_next_phase      = RQ_PHASE_COMPLETE;
        req->rq_xid             = ptlrpc_next_xid();
index 914bbd2956fba01e3d5ed3fee944a049f430b0da..3292e6ea0102aa5ad9d28e842abe642fe3c0b377 100644 (file)
@@ -356,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                                                  "still on delayed list");
                                }
 
-                               CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+                               CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
                                       cli_tgt,
-                                      ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
                                       atomic_read(&imp->
                                                   imp_unregistering));
                        }
index 8c49f5ed2fca9c23b25c05212b0d703d25f7c013..11ec8254534741bd09026fb2f25b2e67198416ef 100644 (file)
@@ -247,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 
        /* Let's setup deadline for reply unlink. */
        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-           async && req->rq_bulk_deadline == 0)
+           async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
                req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
 
        if (ptlrpc_client_bulk_active(req) == 0)        /* completed or */
@@ -266,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
                return 1;                               /* never registered */
 
        /* Move to "Unregistering" phase as bulk was not unlinked yet. */
-       ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+       ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
 
        /* Do not wait for unlink to finish. */
        if (async)