drbd: Removed a race that could cause unexpected execution of w_make_resync_request()
authorPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 1 Sep 2010 13:47:15 +0000 (15:47 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 14 Oct 2010 16:38:29 +0000 (18:38 +0200)
The actual race happened int the drbd_start_resync() function. Where
drbd_resync_finished() -> __drbd_set_state() set STOP_SYNC_TIMER and
armed the timer.

If the timer fired before execution reaches the mod_timer statement
at the end of drbd_start_resync() the latter would cause an
unexpected call to w_make_resync_request().

Removed the STOP_SYNC_TIMER bit, and base it on the connection state.

The STOP_SYNC_TIMER bit probably originates probably the time before
the state engine.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_worker.c

index bb3a488b6fd6a5deb6a7b8d478ee0ed6cb2a7418..d5e38de83a199e9d2d283bb3bd8730c49cc8a875 100644 (file)
@@ -827,7 +827,6 @@ enum {
        SIGNAL_ASENDER,         /* whether asender wants to be interrupted */
        SEND_PING,              /* whether asender should send a ping asap */
 
-       STOP_SYNC_TIMER,        /* tell timer to cancel itself */
        UNPLUG_QUEUED,          /* only relevant with kernel 2.4 */
        UNPLUG_REMOTE,          /* sending a "UnplugRemote" could help */
        MD_DIRTY,               /* current uuids and flags not yet on disk */
index 73c905d0ef181bd95e0298ccf0d5aaa2dc73209d..5dd071e5c921ba1a03f967c6145d749a55d9ece8 100644 (file)
@@ -1052,12 +1052,6 @@ int __drbd_set_state(struct drbd_conf *mdev,
        wake_up(&mdev->misc_wait);
        wake_up(&mdev->state_wait);
 
-       /*   post-state-change actions   */
-       if (os.conn >= C_SYNC_SOURCE   && ns.conn <= C_CONNECTED) {
-               set_bit(STOP_SYNC_TIMER, &mdev->flags);
-               mod_timer(&mdev->resync_timer, jiffies);
-       }
-
        /* aborted verify run. log the last position */
        if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
            ns.conn < C_CONNECTED) {
@@ -1072,22 +1066,14 @@ int __drbd_set_state(struct drbd_conf *mdev,
                dev_info(DEV, "Syncer continues.\n");
                mdev->rs_paused += (long)jiffies
                                  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
-               if (ns.conn == C_SYNC_TARGET) {
-                       if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))
-                               mod_timer(&mdev->resync_timer, jiffies);
-                       /* This if (!test_bit) is only needed for the case
-                          that a device that has ceased to used its timer,
-                          i.e. it is already in drbd_resync_finished() gets
-                          paused and resumed. */
-               }
+               if (ns.conn == C_SYNC_TARGET)
+                       mod_timer(&mdev->resync_timer, jiffies);
        }
 
        if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
            (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
                dev_info(DEV, "Resync suspended\n");
                mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
-               if (ns.conn == C_PAUSED_SYNC_T)
-                       set_bit(STOP_SYNC_TIMER, &mdev->flags);
        }
 
        if (os.conn == C_CONNECTED &&
index 4249117f1f676083cc17bf6ba0d68c8e0e3ead76..885471ded2fbcd103de2c57abd99ff081c0a43f8 100644 (file)
@@ -3803,7 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
 
        /* make sure syncer is stopped and w_resume_next_sg queued */
        del_timer_sync(&mdev->resync_timer);
-       set_bit(STOP_SYNC_TIMER, &mdev->flags);
        resync_timer_fn((unsigned long)mdev);
 
        /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
index 8be9832633745d04d419d6e6bcb603be55e5994c..0e5bf8c9829368a5bad98097fca68a07fb099c7a 100644 (file)
@@ -395,25 +395,22 @@ defer:
 
 void resync_timer_fn(unsigned long data)
 {
-       unsigned long flags;
        struct drbd_conf *mdev = (struct drbd_conf *) data;
        int queue;
 
-       spin_lock_irqsave(&mdev->req_lock, flags);
-
-       if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
-               queue = 1;
-               if (mdev->state.conn == C_VERIFY_S)
-                       mdev->resync_work.cb = w_make_ov_request;
-               else
-                       mdev->resync_work.cb = w_make_resync_request;
-       } else {
+       queue = 1;
+       switch (mdev->state.conn) {
+       case C_VERIFY_S:
+               mdev->resync_work.cb = w_make_ov_request;
+               break;
+       case C_SYNC_TARGET:
+               mdev->resync_work.cb = w_make_resync_request;
+               break;
+       default:
                queue = 0;
                mdev->resync_work.cb = w_resync_inactive;
        }
 
-       spin_unlock_irqrestore(&mdev->req_lock, flags);
-
        /* harmless race: list_empty outside data.work.q_lock */
        if (list_empty(&mdev->resync_work.list) && queue)
                drbd_queue_work(&mdev->data.work, &mdev->resync_work);