reiserfs: kill-the-BKL

author Frederic Weisbecker <fweisbec@gmail.com>

Tue, 7 Apr 2009 02:19:49 +0000 (04:19 +0200)

committer Frederic Weisbecker <fweisbec@gmail.com>

Mon, 14 Sep 2009 05:17:59 +0000 (07:17 +0200)
author Frederic Weisbecker <fweisbec@gmail.com>
Tue, 7 Apr 2009 02:19:49 +0000 (04:19 +0200)
committer Frederic Weisbecker <fweisbec@gmail.com>
Mon, 14 Sep 2009 05:17:59 +0000 (07:17 +0200)
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile

index 7c5ab6330dd6bbe4edbd1f6b62a3a1d5a3f6b645..6a9e30c041dda2a52a44bbe14d9aa664dae14bb7 100644 (file)
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
  reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
                  super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
                  hashes.o tail_conversion.o journal.o resize.o \
-                item_ops.o ioctl.o procfs.o xattr.o
+                item_ops.o ioctl.o procfs.o xattr.o lock.o
  
  ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
  reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c

index e716161ab325c8f246b33c11110a60fff351100f..147033461b870313019162560b7dd781ebbe4711 100644 (file)
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1256,7 +1256,9 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
         else {
                 if (buffer_locked(bh)) {
                         PROC_INFO_INC(sb, scan_bitmap.wait);
+                       reiserfs_write_unlock(sb);
                         __wait_on_buffer(bh);
+                       reiserfs_write_lock(sb);
                 }
                 BUG_ON(!buffer_uptodate(bh));
                 BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c

index 6d2668fdc3848eb5b2be29d027c5634c727148f6..17f31ad379c813147984aacfad5b0c0a990373e2 100644 (file)
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
                                 // user space buffer is swapped out. At that time
                                 // entry can move to somewhere else
                                 memcpy(local_buf, d_name, d_reclen);
+
+                               /*
+                                * Since filldir might sleep, we can release
+                                * the write lock here for other waiters
+                                */
+                               reiserfs_write_unlock(inode->i_sb);
                                 if (filldir
                                     (dirent, local_buf, d_reclen, d_off, d_ino,
                                      DT_UNKNOWN) < 0) {
+                                       reiserfs_write_lock(inode->i_sb);
                                         if (local_buf != small_buf) {
                                                 kfree(local_buf);
                                         }
                                         goto end;
                                 }
+                               reiserfs_write_lock(inode->i_sb);
                                 if (local_buf != small_buf) {
                                         kfree(local_buf);
                                 }
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c

index 5e5a4e6fbaf8290d2bf91799d7e116390256ccf0..bf5f2cbdb0638cb69fd1d7010bb21740130d2611 100644 (file)
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -1022,7 +1022,11 @@ static int get_far_parent(struct tree_balance *tb,
         /* Check whether the common parent is locked. */
  
         if (buffer_locked(*pcom_father)) {
+
+               /* Release the write lock while the buffer is busy */
+               reiserfs_write_unlock(tb->tb_sb);
                 __wait_on_buffer(*pcom_father);
+               reiserfs_write_lock(tb->tb_sb);
                 if (FILESYSTEM_CHANGED_TB(tb)) {
                         brelse(*pcom_father);
                         return REPEAT_SEARCH;
@@ -1927,7 +1931,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
                 return REPEAT_SEARCH;
  
         if (buffer_locked(bh)) {
+               reiserfs_write_unlock(tb->tb_sb);
                 __wait_on_buffer(bh);
+               reiserfs_write_lock(tb->tb_sb);
                 if (FILESYSTEM_CHANGED_TB(tb))
                         return REPEAT_SEARCH;
         }
@@ -2278,7 +2284,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
                                     REPEAT_SEARCH : CARRY_ON;
                         }
  #endif
+                       reiserfs_write_unlock(tb->tb_sb);
                         __wait_on_buffer(locked);
+                       reiserfs_write_lock(tb->tb_sb);
                         if (FILESYSTEM_CHANGED_TB(tb))
                                 return REPEAT_SEARCH;
                 }
@@ -2349,7 +2357,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
  
         /* if it possible in indirect_to_direct conversion */
         if (buffer_locked(tbS0)) {
+               reiserfs_write_unlock(tb->tb_sb);
                 __wait_on_buffer(tbS0);
+               reiserfs_write_lock(tb->tb_sb);
                 if (FILESYSTEM_CHANGED_TB(tb))
                         return REPEAT_SEARCH;
         }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c

index a14d6cd9eeda2670251c0ec3377518912cb465a2..1893c8198439efd4c80314c0f2fa307d91186fc1 100644 (file)
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -489,10 +489,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
            disappeared */
         if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
                 int err;
-               lock_kernel();
+
+               reiserfs_write_lock(inode->i_sb);
+
                 err = reiserfs_commit_for_inode(inode);
                 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
-               unlock_kernel();
+
+               reiserfs_write_unlock(inode->i_sb);
+
                 if (err < 0)
                         ret = err;
         }
@@ -616,7 +620,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
         loff_t new_offset =
             (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
  
-       /* bad.... */
         reiserfs_write_lock(inode->i_sb);
         version = get_inode_item_key_version(inode);
  
@@ -997,10 +1000,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                         if (retval)
                                 goto failure;
                 }
-               /* inserting indirect pointers for a hole can take a
-                ** long time.  reschedule if needed
+               /*
+                * inserting indirect pointers for a hole can take a
+                * long time.  reschedule if needed and also release the write
+                * lock for others.
                  */
+               reiserfs_write_unlock(inode->i_sb);
                 cond_resched();
+               reiserfs_write_lock(inode->i_sb);
  
                 retval = search_for_position_by_key(inode->i_sb, &key, &path);
                 if (retval == IO_ERROR) {
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
         int ret;
         int old_ref = 0;
  
+       reiserfs_write_unlock(inode->i_sb);
         reiserfs_wait_on_write_block(inode->i_sb);
+       reiserfs_write_lock(inode->i_sb);
+
         fix_tail_page_for_writing(page);
         if (reiserfs_transaction_running(inode->i_sb)) {
                 struct reiserfs_transaction_handle *th;
@@ -2758,7 +2768,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
         int update_sd = 0;
         struct reiserfs_transaction_handle *th = NULL;
  
+       reiserfs_write_unlock(inode->i_sb);
         reiserfs_wait_on_write_block(inode->i_sb);
+       reiserfs_write_lock(inode->i_sb);
+
         if (reiserfs_transaction_running(inode->i_sb)) {
                 th = current->journal_info;
         }
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c

index 0ccc3fdda7bfb7d5d00e59e8b26e74a0331e6d3c..5e40b0cd4c3d703f57fc96061f305d6e84491a7c 100644 (file)
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -141,9 +141,11 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
         default:
                 return -ENOIOCTLCMD;
         }
-       lock_kernel();
+
+       reiserfs_write_lock(inode->i_sb);
         ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-       unlock_kernel();
+       reiserfs_write_unlock(inode->i_sb);
+
         return ret;
  }
  #endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c

index 90622200b39c0622e0f159d423c929a036d76257..438c71f0bc91640bf0daed68fe17229b1bec88bc 100644 (file)
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
         clear_buffer_journal_restore_dirty(bh);
  }
  
-/* utility function to force a BUG if it is called without the big
-** kernel lock held.  caller is the string printed just before calling BUG()
-*/
-void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
-{
-#ifdef CONFIG_SMP
-       if (current->lock_depth < 0) {
-               reiserfs_panic(sb, "journal-1", "%s called without kernel "
-                              "lock held", caller);
-       }
-#else
-       ;
-#endif
-}
-
  /* return a cnode with same dev, block number and size in table, or null if not found */
  static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
                                                                   super_block
@@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
         journal_hash(table, cn->sb, cn->blocknr) = cn;
  }
  
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+                              struct super_block *s)
+{
+       while (!mutex_trylock(m)) {
+               reiserfs_write_unlock(s);
+               schedule();
+               reiserfs_write_lock(s);
+       }
+}
+
  /* lock the current transaction */
  static inline void lock_journal(struct super_block *sb)
  {
         PROC_INFO_INC(sb, journal.lock_journal);
-       mutex_lock(&SB_JOURNAL(sb)->j_mutex);
+
+       reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
  }
  
  /* unlock the current transaction */
@@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s,
                 disable_barrier(s);
                 set_buffer_uptodate(bh);
                 set_buffer_dirty(bh);
+               reiserfs_write_unlock(s);
                 sync_dirty_buffer(bh);
+               reiserfs_write_lock(s);
         }
  }
  
@@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
  {
         DEFINE_WAIT(wait);
         struct reiserfs_journal *j = SB_JOURNAL(s);
-       if (atomic_read(&j->j_async_throttle))
+
+       if (atomic_read(&j->j_async_throttle)) {
+               reiserfs_write_unlock(s);
                 congestion_wait(BLK_RW_ASYNC, HZ / 10);
+               reiserfs_write_lock(s);
+       }
+
         return 0;
  }
  
@@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s,
         }
  
         /* make sure nobody is trying to flush this one at the same time */
-       mutex_lock(&jl->j_commit_mutex);
+       reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
+
         if (!journal_list_still_alive(s, trans_id)) {
                 mutex_unlock(&jl->j_commit_mutex);
                 goto put_jl;
@@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s,
  
         if (!list_empty(&jl->j_bh_list)) {
                 int ret;
-               unlock_kernel();
+
+               /*
+                * We might sleep in numerous places inside
+                * write_ordered_buffers. Relax the write lock.
+                */
+               reiserfs_write_unlock(s);
                 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
                                             journal, jl, &jl->j_bh_list);
                 if (ret < 0 && retval == 0)
                         retval = ret;
-               lock_kernel();
+               reiserfs_write_lock(s);
         }
         BUG_ON(!list_empty(&jl->j_bh_list));
         /*
@@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s,
                 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
                     (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
                 tbh = journal_find_get_block(s, bn);
+
+               reiserfs_write_unlock(s);
                 wait_on_buffer(tbh);
+               reiserfs_write_lock(s);
                 // since we're using ll_rw_blk above, it might have skipped over
                 // a locked buffer.  Double check here
                 //
-               if (buffer_dirty(tbh))  /* redundant, sync_dirty_buffer() checks */
+               /* redundant, sync_dirty_buffer() checks */
+               if (buffer_dirty(tbh)) {
+                       reiserfs_write_unlock(s);
                         sync_dirty_buffer(tbh);
+                       reiserfs_write_lock(s);
+               }
                 if (unlikely(!buffer_uptodate(tbh))) {
  #ifdef CONFIG_REISERFS_CHECK
                         reiserfs_warning(s, "journal-601",
@@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s,
                         if (buffer_dirty(jl->j_commit_bh))
                                 BUG();
                         mark_buffer_dirty(jl->j_commit_bh) ;
+                       reiserfs_write_unlock(s);
                         sync_dirty_buffer(jl->j_commit_bh) ;
+                       reiserfs_write_lock(s);
                 }
-       } else
+       } else {
+               reiserfs_write_unlock(s);
                 wait_on_buffer(jl->j_commit_bh);
+               reiserfs_write_lock(s);
+       }
  
         check_barrier_completion(s, jl->j_commit_bh);
  
@@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb,
  
         if (trans_id >= journal->j_last_flush_trans_id) {
                 if (buffer_locked((journal->j_header_bh))) {
+                       reiserfs_write_unlock(sb);
                         wait_on_buffer((journal->j_header_bh));
+                       reiserfs_write_lock(sb);
                         if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
  #ifdef CONFIG_REISERFS_CHECK
                                 reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb,
                                 disable_barrier(sb);
                                 goto sync;
                         }
+                       reiserfs_write_unlock(sb);
                         wait_on_buffer(journal->j_header_bh);
+                       reiserfs_write_lock(sb);
                         check_barrier_completion(sb, journal->j_header_bh);
                 } else {
                       sync:
                         set_buffer_dirty(journal->j_header_bh);
+                       reiserfs_write_unlock(sb);
                         sync_dirty_buffer(journal->j_header_bh);
+                       reiserfs_write_lock(sb);
                 }
                 if (!buffer_uptodate(journal->j_header_bh)) {
                         reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s,
  
         /* if flushall == 0, the lock is already held */
         if (flushall) {
-               mutex_lock(&journal->j_flush_mutex);
+               reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
         } else if (mutex_trylock(&journal->j_flush_mutex)) {
                 BUG();
         }
@@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s,
                                         reiserfs_panic(s, "journal-1011",
                                                        "cn->bh is NULL");
                                 }
+
+                               reiserfs_write_unlock(s);
                                 wait_on_buffer(cn->bh);
+                               reiserfs_write_lock(s);
+
                                 if (!cn->bh) {
                                         reiserfs_panic(s, "journal-1012",
                                                        "cn->bh is NULL");
@@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
         reiserfs_mounted_fs_count--;
         /* wait for all commits to finish */
         cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
+
+       /*
+        * We must release the write lock here because
+        * the workqueue job (flush_async_commit) needs this lock
+        */
+       reiserfs_write_unlock(sb);
         flush_workqueue(commit_wq);
+
         if (!reiserfs_mounted_fs_count) {
                 destroy_workqueue(commit_wq);
                 commit_wq = NULL;
         }
+       reiserfs_write_lock(sb);
  
         free_journal_ram(sb);
  
@@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb,
         /* read in the log blocks, memcpy to the corresponding real block */
         ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
         for (i = 0; i < get_desc_trans_len(desc); i++) {
+
+               reiserfs_write_unlock(sb);
                 wait_on_buffer(log_blocks[i]);
+               reiserfs_write_lock(sb);
+
                 if (!buffer_uptodate(log_blocks[i])) {
                         reiserfs_warning(sb, "journal-1212",
                                          "REPLAY FAILURE fsck required! "
@@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s)
         init_waitqueue_entry(&wait, current);
         add_wait_queue(&journal->j_join_wait, &wait);
         set_current_state(TASK_UNINTERRUPTIBLE);
-       if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
+       if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
+               reiserfs_write_unlock(s);
                 schedule();
+               reiserfs_write_lock(s);
+       }
         __set_current_state(TASK_RUNNING);
         remove_wait_queue(&journal->j_join_wait, &wait);
  }
@@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
         struct reiserfs_journal *journal = SB_JOURNAL(sb);
         unsigned long bcount = journal->j_bcount;
         while (1) {
+               reiserfs_write_unlock(sb);
                 schedule_timeout_uninterruptible(1);
+               reiserfs_write_lock(sb);
                 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
                 while ((atomic_read(&journal->j_wcount) > 0 ||
                         atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
  
         if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
                 unlock_journal(sb);
+               reiserfs_write_unlock(sb);
                 reiserfs_wait_on_write_block(sb);
+               reiserfs_write_lock(sb);
                 PROC_INFO_INC(sb, journal.journal_relock_writers);
                 goto relock;
         }
@@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work)
         struct reiserfs_journal_list *jl;
         struct list_head *entry;
  
-       lock_kernel();
+       reiserfs_write_lock(sb);
         if (!list_empty(&journal->j_journal_list)) {
                 /* last entry is the youngest, commit it and you get everything */
                 entry = journal->j_journal_list.prev;
                 jl = JOURNAL_LIST_ENTRY(entry);
                 flush_commit_list(sb, jl, 1);
         }
-       unlock_kernel();
+       reiserfs_write_unlock(sb);
  }
  
  /*
@@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
          * the new transaction is fully setup, and we've already flushed the
          * ordered bh list
          */
-       mutex_lock(&jl->j_commit_mutex);
+       reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
  
         /* save the transaction id in case we need to commit it later */
         commit_trans_id = jl->j_trans_id;
@@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
          * is lost.
          */
         if (!list_empty(&jl->j_tail_bh_list)) {
-               unlock_kernel();
+               reiserfs_write_unlock(sb);
                 write_ordered_buffers(&journal->j_dirty_buffers_lock,
                                       journal, jl, &jl->j_tail_bh_list);
-               lock_kernel();
+               reiserfs_write_lock(sb);
         }
         BUG_ON(!list_empty(&jl->j_tail_bh_list));
         mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c

new file mode 100644 (file)

index 0000000..cdd8d9e
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,63 @@
+#include <linux/reiserfs_fs.h>
+#include <linux/mutex.h>
+
+/*
+ * The previous reiserfs locking scheme was heavily based on
+ * the tricky properties of the Bkl:
+ *
+ * - it was acquired recursively by a same task
+ * - the performances relied on the release-while-schedule() property
+ *
+ * Now that we replace it by a mutex, we still want to keep the same
+ * recursive property to avoid big changes in the code structure.
+ * We use our own lock_owner here because the owner field on a mutex
+ * is only available in SMP or mutex debugging, also we only need this field
+ * for this mutex, no need for a system wide mutex facility.
+ *
+ * Also this lock is often released before a call that could block because
+ * reiserfs performances were partialy based on the release while schedule()
+ * property of the Bkl.
+ */
+void reiserfs_write_lock(struct super_block *s)
+{
+       struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+       if (sb_i->lock_owner != current) {
+               mutex_lock(&sb_i->lock);
+               sb_i->lock_owner = current;
+       }
+
+       /* No need to protect it, only the current task touches it */
+       sb_i->lock_depth++;
+}
+
+void reiserfs_write_unlock(struct super_block *s)
+{
+       struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+
+       /*
+        * Are we unlocking without even holding the lock?
+        * Such a situation could even raise a BUG() if we don't
+        * want the data become corrupted
+        */
+       WARN_ONCE(sb_i->lock_owner != current,
+                 "Superblock write lock imbalance");
+
+       if (--sb_i->lock_depth == -1) {
+               sb_i->lock_owner = NULL;
+               mutex_unlock(&sb_i->lock);
+       }
+}
+
+/*
+ * Utility function to force a BUG if it is called without the superblock
+ * write lock held.  caller is the string printed just before calling BUG()
+ */
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
+{
+       struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+       if (sb_i->lock_depth < 0)
+               reiserfs_panic(sb, "%s called without kernel lock held %d",
+                              caller);
+}
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c

index 18b315d3d104ea0cebfbb5a1991356038dc8c4fd..b3a94d20f0fcd37b044596369687a7d1cebfa2f5 100644 (file)
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
  
                         set_buffer_uptodate(bh);
                         mark_buffer_dirty(bh);
+                       reiserfs_write_unlock(s);
                         sync_dirty_buffer(bh);
+                       reiserfs_write_lock(s);
                         // update bitmap_info stuff
                         bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
                         brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c

index d036ee5b1c81a8bd43d8f8836fbd78b68c462acb..6bd99a99a6528b0da53b803cc08a962b72fe91c1 100644 (file)
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -629,7 +629,9 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,        /* Key to s
                                 search_by_key_reada(sb, reada_bh,
                                                     reada_blocks, reada_count);
                         ll_rw_block(READ, 1, &bh);
+                       reiserfs_write_unlock(sb);
                         wait_on_buffer(bh);
+                       reiserfs_write_lock(sb);
                         if (!buffer_uptodate(bh))
                                 goto io_error;
                 } else {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c

index 7adea74d6a8ac829d3c1efb877b9feec5027ccaa..e1cfb80d0bf3313ec2768b001b7340407c3af884 100644 (file)
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
         struct reiserfs_transaction_handle th;
         th.t_trans_id = 0;
  
-       lock_kernel();
+       reiserfs_write_lock(s);
  
         if (s->s_dirt)
                 reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
  
         reiserfs_proc_info_done(s);
  
+       reiserfs_write_unlock(s);
+       mutex_destroy(&REISERFS_SB(s)->lock);
         kfree(s->s_fs_info);
         s->s_fs_info = NULL;
-
-       unlock_kernel();
  }
  
  static struct kmem_cache *reiserfs_inode_cachep;
@@ -1168,11 +1168,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
         unsigned int qfmt = 0;
  #ifdef CONFIG_QUOTA
         int i;
+#endif
+
+       reiserfs_write_lock(s);
  
+#ifdef CONFIG_QUOTA
         memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
  #endif
  
-       lock_kernel();
         rs = SB_DISK_SUPER_BLOCK(s);
  
         if (!reiserfs_parse_options
@@ -1295,12 +1298,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
  
  out_ok:
         replace_mount_options(s, new_opts);
-       unlock_kernel();
+       reiserfs_write_unlock(s);
         return 0;
  
  out_err:
         kfree(new_opts);
-       unlock_kernel();
+       reiserfs_write_unlock(s);
         return err;
  }
  
@@ -1404,7 +1407,9 @@ static int read_super_block(struct super_block *s, int offset)
  static int reread_meta_blocks(struct super_block *s)
  {
         ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
+       reiserfs_write_unlock(s);
         wait_on_buffer(SB_BUFFER_WITH_SB(s));
+       reiserfs_write_lock(s);
         if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
                 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
                 return 1;
@@ -1613,7 +1618,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
         sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
         if (!sbi) {
                 errval = -ENOMEM;
-               goto error;
+               goto error_alloc;
         }
         s->s_fs_info = sbi;
         /* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1632,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
         /* setup default block allocator options */
         reiserfs_init_alloc_options(s);
  
+       mutex_init(&REISERFS_SB(s)->lock);
+       REISERFS_SB(s)->lock_depth = -1;
+
+       /*
+        * This function is called with the bkl, which also was the old
+        * locking used here.
+        * do_journal_begin() will soon check if we hold the lock (ie: was the
+        * bkl). This is likely because do_journal_begin() has several another
+        * callers because at this time, it doesn't seem to be necessary to
+        * protect against anything.
+        * Anyway, let's be conservative and lock for now.
+        */
+       reiserfs_write_lock(s);
+
         jdev_name = NULL;
         if (reiserfs_parse_options
             (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1871,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
         init_waitqueue_head(&(sbi->s_wait));
         spin_lock_init(&sbi->bitmap_lock);
  
+       reiserfs_write_unlock(s);
+
         return (0);
  
  error:
+       reiserfs_write_unlock(s);
+error_alloc:
         if (jinit_done) {       /* kill the commit thread, free journal ram */
                 journal_release_error(NULL, s);
         }
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h

index dd31e7bae35cd606943ca4f52bc585c42a9a65f7..e47328f51801e74913e8314eb8f1648476391b57 100644 (file)
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -52,11 +52,13 @@
  #define REISERFS_IOC32_GETVERSION      FS_IOC32_GETVERSION
  #define REISERFS_IOC32_SETVERSION      FS_IOC32_SETVERSION
  
-/* Locking primitives */
-/* Right now we are still falling back to (un)lock_kernel, but eventually that
-   would evolve into real per-fs locks */
-#define reiserfs_write_lock( sb ) lock_kernel()
-#define reiserfs_write_unlock( sb ) unlock_kernel()
+/*
+ * Locking primitives. The write lock is a per superblock
+ * special mutex that has properties close to the Big Kernel Lock
+ * which was used in the previous locking scheme.
+ */
+void reiserfs_write_lock(struct super_block *s);
+void reiserfs_write_unlock(struct super_block *s);
  
  struct fid;
  
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h

index dab68bbed6757d950afa3987135b5e34cea405ae..045c37213675c853b014ab809ae19879b6a9abf3 100644 (file)
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -7,6 +7,8 @@
  #ifdef __KERNEL__
  #include <linux/workqueue.h>
  #include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
  #endif
  
  typedef enum {
@@ -355,6 +357,13 @@ struct reiserfs_sb_info {
         struct reiserfs_journal *s_journal;     /* pointer to journal information */
         unsigned short s_mount_state;   /* reiserfs state (valid, invalid) */
  
+       /* Serialize writers access, replace the old bkl */
+       struct mutex lock;
+       /* Owner of the lock (can be recursive) */
+       struct task_struct *lock_owner;
+       /* Depth of the lock, start from -1 like the bkl */
+       int lock_depth;
+
         /* Comment? -Hans */
         void (*end_io_handler) (struct buffer_head *, int);
         hashf_t s_hash_function;        /* pointer to function which is used
author	Frederic Weisbecker <fweisbec@gmail.com>
	Tue, 7 Apr 2009 02:19:49 +0000 (04:19 +0200)
committer	Frederic Weisbecker <fweisbec@gmail.com>
	Mon, 14 Sep 2009 05:17:59 +0000 (07:17 +0200)
fs/reiserfs/Makefile		patch \| blob \| history
fs/reiserfs/bitmap.c		patch \| blob \| history
fs/reiserfs/dir.c		patch \| blob \| history
fs/reiserfs/fix_node.c		patch \| blob \| history
fs/reiserfs/inode.c		patch \| blob \| history
fs/reiserfs/ioctl.c		patch \| blob \| history
fs/reiserfs/journal.c		patch \| blob \| history
fs/reiserfs/lock.c	[new file with mode: 0644]	patch \| blob
fs/reiserfs/resize.c		patch \| blob \| history
fs/reiserfs/stree.c		patch \| blob \| history
fs/reiserfs/super.c		patch \| blob \| history
include/linux/reiserfs_fs.h		patch \| blob \| history
include/linux/reiserfs_fs_sb.h		patch \| blob \| history