drbd: Disable activity log updates when the whole device is out of sync
authorPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 31 Aug 2010 10:00:50 +0000 (12:00 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 14 Oct 2010 16:38:26 +0000 (18:38 +0200)
When the complete device is marked as out of sync, we can disable
updates of the on disk AL. Currently AL updates are only disabled
if one uses the "invalidate-remote" command on an unconnected,
primary device, or when at attach time all bits in the bitmap are
set.

As of now, AL updated do not get disabled when a all bits becomes
set due to application writes to an unconnected DRBD device.
While this is a missing feature, it is not considered important,
and might get added later.

BTW, after initializing a "one legged" DRBD device
drbdadm create-md resX
drbdadm -- --force primary resX
AL updates also get disabled, until the first connect.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_proc.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h
drivers/block/drbd/drbd_worker.c

index e3f88d6e141243336a80adc2247c6692b555aa07..fd42832f785b86a6056e3c4efceccaeab37f04bd 100644 (file)
@@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
  *
  * maybe bm_set should be atomic_t ?
  */
-static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
 {
        struct drbd_bitmap *b = mdev->bitmap;
        unsigned long s;
index 58dc02bd16c219a8b05b97ce3be320cd0941ecf3..bb3a488b6fd6a5deb6a7b8d478ee0ed6cb2a7418 100644 (file)
@@ -863,6 +863,7 @@ enum {
        CONN_DRY_RUN,           /* Expect disconnect after resync handshake. */
        GOT_PING_ACK,           /* set when we receive a ping_ack packet, misc wait gets woken */
        NEW_CUR_UUID,           /* Create new current UUID when thawing IO */
+       AL_SUSPENDED,           /* Activity logging is currently suspended. */
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
@@ -1425,6 +1426,7 @@ extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_
 /* bm_find_next variants for use while you hold drbd_bm_lock() */
 extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
 extern int drbd_bm_rs_done(struct drbd_conf *mdev);
 /* for receive_bitmap */
index 71c4c261573e9c40fe441326798d715d74a51bad..23878ffc43c8ac588ea4060e83d38724eff02a7d 100644 (file)
@@ -963,6 +963,12 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
        }
 }
 
+static void drbd_resume_al(struct drbd_conf *mdev)
+{
+       if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
+               dev_info(DEV, "Resumed AL updates\n");
+}
+
 /**
  * __drbd_set_state() - Set a new DRBD state
  * @mdev:      DRBD device.
@@ -1160,6 +1166,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
            ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
                drbd_thread_restart_nowait(&mdev->receiver);
 
+       /* Resume AL writing if we get a connection */
+       if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+               drbd_resume_al(mdev);
+
        ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
        if (ascw) {
                ascw->os = os;
@@ -2851,6 +2861,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
        }
 
        drbd_free_resources(mdev);
+       clear_bit(AL_SUSPENDED, &mdev->flags);
 
        /*
         * currently we drbd_init_ee only on module load, so
@@ -3652,6 +3663,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
 {
        int rv = -EIO;
 
+       drbd_resume_al(mdev);
        if (get_ldev_if_state(mdev, D_ATTACHING)) {
                drbd_bm_clear_all(mdev);
                rv = drbd_bm_write(mdev);
index 97fb2c2a7a57635ae293da9977e2130d08beea0e..6742652c8abc7f8b65fe0b211cd4d11bc62961e9 100644 (file)
@@ -777,6 +777,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
        wake_up(&mdev->state_wait);
 }
 
+/* Make sure IO is suspended before calling this function(). */
+static void drbd_suspend_al(struct drbd_conf *mdev)
+{
+       int s = 0;
+
+       if (lc_try_lock(mdev->act_log)) {
+               drbd_al_shrink(mdev);
+               lc_unlock(mdev->act_log);
+       } else {
+               dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
+               return;
+       }
+
+       spin_lock_irq(&mdev->req_lock);
+       if (mdev->state.conn < C_CONNECTED)
+               s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
+
+       spin_unlock_irq(&mdev->req_lock);
+
+       if (s)
+               dev_info(DEV, "Suspended AL updates\n");
+}
+
 /* does always return 0;
  * interesting return code is in reply->ret_code */
 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -1113,6 +1136,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
                drbd_al_to_on_disk_bm(mdev);
        }
 
+       if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
+               drbd_suspend_al(mdev); /* IO is still suspended here... */
+
        spin_lock_irq(&mdev->req_lock);
        os = mdev->state;
        ns.i = os.i;
@@ -1792,12 +1818,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
        return 0;
 }
 
+static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
+{
+       int rv;
+
+       rv = drbd_bmio_set_n_write(mdev);
+       drbd_suspend_al(mdev);
+       return rv;
+}
+
 static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
                                   struct drbd_nl_cfg_reply *reply)
 {
+       int retcode;
 
-       reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+       retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
+
+       if (retcode < SS_SUCCESS) {
+               if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
+                       /* The peer will get a resync upon connect anyways. Just make that
+                          into a full resync. */
+                       retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+                       if (retcode >= SS_SUCCESS) {
+                               /* open coded drbd_bitmap_io() */
+                               if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+                                                  "set_n_write from invalidate_peer"))
+                                       retcode = ERR_IO_MD_DISK;
+                       }
+               } else
+                       retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+       }
 
+       reply->ret_code = retcode;
        return 0;
 }
 
index c159692c3b56ed64eab78d56f7613259f3c82592..a4a4a06908c5eb3fcce7d2235b512d76362ca99c 100644 (file)
@@ -203,7 +203,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
                        seq_printf(seq, "%2d: cs:Unconfigured\n", i);
                } else {
                        seq_printf(seq,
-                          "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
+                          "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
                           "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
                           "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
                           i, sn,
@@ -218,6 +218,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
                           mdev->state.peer_isp ? 'p' : '-',
                           mdev->state.user_isp ? 'u' : '-',
                           mdev->congestion_reason ?: '-',
+                          test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
                           mdev->send_cnt/2,
                           mdev->recv_cnt/2,
                           mdev->writ_cnt/2,
index 3b61d767d9c43ed7eae36b2a03fb091e54c29a2d..af608b39c4e05013f8d6ecc4bc216891d72f4f57 100644 (file)
@@ -94,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
                 */
                if (s & RQ_LOCAL_MASK) {
                        if (get_ldev_if_state(mdev, D_FAILED)) {
-                               drbd_al_complete_io(mdev, req->sector);
+                               if (s & RQ_IN_ACT_LOG)
+                                       drbd_al_complete_io(mdev, req->sector);
                                put_ldev(mdev);
                        } else if (__ratelimit(&drbd_ratelimit_state)) {
                                dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
@@ -802,8 +803,10 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
         * resync extent to finish, and, if necessary, pulls in the target
         * extent into the activity log, which involves further disk io because
         * of transactional on-disk meta data updates. */
-       if (rw == WRITE && local)
+       if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+               req->rq_state |= RQ_IN_ACT_LOG;
                drbd_al_begin_io(mdev, sector);
+       }
 
        remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
                            (mdev->state.pdsk == D_INCONSISTENT &&
index f2e45aaa2cd5279bafc224a4ef6b836905fd4adb..181ea0364822d4f6d559ff1101d95d7d675eff7f 100644 (file)
@@ -189,6 +189,9 @@ enum drbd_req_state_bits {
 
        /* Set when this is a write, clear for a read */
        __RQ_WRITE,
+
+       /* Should call drbd_al_complete_io() for this request... */
+       __RQ_IN_ACT_LOG,
 };
 
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
@@ -208,6 +211,7 @@ enum drbd_req_state_bits {
 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
 
 #define RQ_WRITE           (1UL << __RQ_WRITE)
+#define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
 
 /* For waking up the frozen transfer log mod_req() has to return if the request
    should be counted in the epoch object*/
index 3d0e14e3ade37def6323422635f8f0239d478fb9..8be9832633745d04d419d6e6bcb603be55e5994c 100644 (file)
@@ -1273,7 +1273,7 @@ int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
        struct drbd_request *req = container_of(w, struct drbd_request, w);
 
-       if (bio_data_dir(req->master_bio) == WRITE)
+       if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
                drbd_al_begin_io(mdev, req->sector);
        /* Calling drbd_al_begin_io() out of the worker might deadlocks
           theoretically. Practically it can not deadlock, since this is