ext4: Save error information to the superblock for analysis
authorTheodore Ts'o <tytso@mit.edu>
Tue, 27 Jul 2010 15:56:03 +0000 (11:56 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 27 Jul 2010 15:56:03 +0000 (11:56 -0400)
Save number of file system errors, and the time function name, line
number, block number, and inode number of the first and most recent
errors reported on the file system in the superblock.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/block_validity.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/inode.c
fs/ext4/super.c

index 5b6973fbf1bdde32bb1c2a4b6ff73a9d2df9d692..3db5084db9bd06c2d492cd76dd2a014e7bb07bf6 100644 (file)
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
 
        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
            (start_blk + count < start_blk) ||
-           (start_blk + count > ext4_blocks_count(sbi->s_es)))
+           (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+               sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                return 0;
+       }
        while (n) {
                entry = rb_entry(n, struct ext4_system_zone, node);
                if (start_blk + count - 1 < entry->start_blk)
                        n = n->rb_left;
                else if (start_blk >= (entry->start_blk + entry->count))
                        n = n->rb_right;
-               else
+               else {
+                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                        return 0;
+               }
        }
        return 1;
 }
index 088938148f5c194c2adcb58bdfc38da73fec8589..6b96125e7255f51b7cd70e18ca7d4ce061fcf45e 100644 (file)
@@ -1011,9 +1011,24 @@ struct ext4_super_block {
                                              snapshot's future use */
        __le32  s_snapshot_list;        /* inode number of the head of the
                                           on-disk snapshot list */
-       __u32   s_reserved[155];        /* Padding to the end of the block */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+       __le32  s_error_count;          /* number of fs errors */
+       __le32  s_first_error_time;     /* first time an error happened */
+       __le32  s_first_error_ino;      /* inode involved in first error */
+       __le64  s_first_error_block;    /* block involved of first error */
+       __u8    s_first_error_func[32]; /* function where the error happened */
+       __le32  s_first_error_line;     /* line number where error happened */
+       __le32  s_last_error_time;      /* most recent time of an error */
+       __le32  s_last_error_ino;       /* inode involved in last error */
+       __le32  s_last_error_line;      /* line number where error happened */
+       __le64  s_last_error_block;     /* block involved of last error */
+       __u8    s_last_error_func[32];  /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_reserved)
+       __le32   s_reserved[128];        /* Padding to the end of the block */
 };
 
+#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
+
 #ifdef __KERNEL__
 
 /*
index 23425cd68daa4f840e1faabef8f3e6357b7b2b25..6e272ef6ba96c4938cc357ef498c760bde67d39e 100644 (file)
@@ -134,6 +134,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
                if (inode && inode_needs_sync(inode)) {
                        sync_dirty_buffer(bh);
                        if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                               struct ext4_super_block *es;
+
+                               es = EXT4_SB(inode->i_sb)->s_es;
+                               es->s_last_error_block =
+                                       cpu_to_le64(bh->b_blocknr);
                                ext4_error_inode(inode, where, line,
                                                 bh->b_blocknr,
                                        "IO error syncing itable block");
index 69ea663ef03e917a2ceae6554b2bc601c4a435ef..755ba8682233197baa0475872665a794c9e4b12c 100644 (file)
@@ -341,6 +341,7 @@ static int __ext4_check_blockref(const char *function, unsigned int line,
                                 struct inode *inode,
                                 __le32 *p, unsigned int max)
 {
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
        __le32 *bref = p;
        unsigned int blk;
 
@@ -349,6 +350,7 @@ static int __ext4_check_blockref(const char *function, unsigned int line,
                if (blk &&
                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
+                       es->s_last_error_block = cpu_to_le64(blk);
                        ext4_error_inode(inode, function, line, blk,
                                         "invalid block");
                        return -EIO;
index bcf74b31d01478ed13ad90bcb6d00cc5573e7c2b..a94d3f56898fca3a47f44fbe009170b0acd203b4 100644 (file)
@@ -307,6 +307,35 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
        jbd2_journal_abort_handle(handle);
 }
 
+static void __save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+       es->s_last_error_time = cpu_to_le32(get_seconds());
+       strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+       es->s_last_error_line = cpu_to_le32(line);
+       if (!es->s_first_error_time) {
+               es->s_first_error_time = es->s_last_error_time;
+               strncpy(es->s_first_error_func, func,
+                       sizeof(es->s_first_error_func));
+               es->s_first_error_line = cpu_to_le32(line);
+               es->s_first_error_ino = es->s_last_error_ino;
+               es->s_first_error_block = es->s_last_error_block;
+       }
+       es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+}
+
+static void save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       __save_error_info(sb, func, line);
+       ext4_commit_super(sb, 1);
+}
+
+
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
  *
@@ -324,11 +353,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
 
 static void ext4_handle_error(struct super_block *sb)
 {
-       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-
        if (sb->s_flags & MS_RDONLY)
                return;
 
@@ -343,7 +367,6 @@ static void ext4_handle_error(struct super_block *sb)
                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                sb->s_flags |= MS_RDONLY;
        }
-       ext4_commit_super(sb, 1);
        if (test_opt(sb, ERRORS_PANIC))
                panic("EXT4-fs (device %s): panic forced after error\n",
                        sb->s_id);
@@ -369,7 +392,11 @@ void ext4_error_inode(struct inode *inode, const char *function,
                      const char *fmt, ...)
 {
        va_list args;
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       save_error_info(inode->i_sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
               inode->i_sb->s_id, function, line, inode->i_ino);
@@ -387,9 +414,13 @@ void ext4_error_file(struct file *file, const char *function,
                     unsigned int line, const char *fmt, ...)
 {
        va_list args;
+       struct ext4_super_block *es;
        struct inode *inode = file->f_dentry->d_inode;
        char pathname[80], *path;
 
+       es = EXT4_SB(inode->i_sb)->s_es;
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       save_error_info(inode->i_sb, function, line);
        va_start(args, fmt);
        path = d_path(&(file->f_path), pathname, sizeof(pathname));
        if (!path)
@@ -459,6 +490,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
        errstr = ext4_decode_error(sb, errno, nbuf);
        printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
               sb->s_id, function, line, errstr);
+       save_error_info(sb, function, line);
 
        ext4_handle_error(sb);
 }
@@ -478,6 +510,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
 {
        va_list args;
 
+       save_error_info(sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
               function, line);
@@ -485,18 +518,16 @@ void __ext4_abort(struct super_block *sb, const char *function,
        printk("\n");
        va_end(args);
 
+       if ((sb->s_flags & MS_RDONLY) == 0) {
+               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+               sb->s_flags |= MS_RDONLY;
+               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               if (EXT4_SB(sb)->s_journal)
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+               save_error_info(sb, function, line);
+       }
        if (test_opt(sb, ERRORS_PANIC))
                panic("EXT4-fs panic from previous error\n");
-
-       if (sb->s_flags & MS_RDONLY)
-               return;
-
-       ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       sb->s_flags |= MS_RDONLY;
-       EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
-       if (EXT4_SB(sb)->s_journal)
-               jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
 void ext4_msg (struct super_block * sb, const char *prefix,
@@ -534,6 +565,9 @@ __acquires(bitlock)
        va_list args;
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
+       es->s_last_error_ino = cpu_to_le32(ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       __save_error_info(sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
               sb->s_id, function, line, grp);
@@ -546,11 +580,10 @@ __acquires(bitlock)
        va_end(args);
 
        if (test_opt(sb, ERRORS_CONT)) {
-               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
                ext4_commit_super(sb, 0);
                return;
        }
+
        ext4_unlock_group(sb, grp);
        ext4_handle_error(sb);
        /*
@@ -3332,8 +3365,17 @@ static int ext4_load_journal(struct super_block *sb,
 
        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
                err = jbd2_journal_wipe(journal, !really_read_only);
-       if (!err)
+       if (!err) {
+               char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+               if (save)
+                       memcpy(save, ((char *) es) +
+                              EXT4_S_ERR_START, EXT4_S_ERR_LEN);
                err = jbd2_journal_load(journal);
+               if (save)
+                       memcpy(((char *) es) + EXT4_S_ERR_START,
+                              save, EXT4_S_ERR_LEN);
+               kfree(save);
+       }
 
        if (err) {
                ext4_msg(sb, KERN_ERR, "error loading journal");