UBI: preserve corrupted PEBs
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
Fri, 3 Sep 2010 20:08:15 +0000 (23:08 +0300)
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
Tue, 19 Oct 2010 14:19:57 +0000 (17:19 +0300)
Currently UBI erases all corrupted eraseblocks, irrespectively of the nature
of corruption: corruption due to power cuts and non-power cut corruption.
The former case is OK, but the latter is not, because UBI may destroy
potentially important data.

With this patch, during scanning, when UBI hits a PEB with corrupted VID
header, it checks whether this PEB contains only 0xFF data. If yes, it is
safe to erase this PEB and it is put to the 'erase' list. If not, this may
be important data and it is better to avoid erasing this PEB. Instead,
UBI puts it to the corr list and moves out of the pool of available PEB.
IOW, UBI preserves this PEB.

Such corrupted PEB lessen the amount of available PEBs. So the more of them
we accumulate, the less PEBs are available. The maximum amount of non-power
cut corrupted PEBs is 8.

This patch is a response to UBIFS problem where reporter
(Matthew L. Creech <mlcreech@gmail.com>) observes that UBIFS index points
to an unmapped LEB. The theory is that corresponding PEB somehow got
corrupted and UBI wiped it. This patch (actually a series of patches)
tries to make sure such PEBs are preserved - this would make it is easier
to analyze the corruption.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
drivers/mtd/ubi/build.c
drivers/mtd/ubi/eba.c
drivers/mtd/ubi/scan.c
drivers/mtd/ubi/ubi.h
drivers/mtd/ubi/vmt.c
drivers/mtd/ubi/vtbl.c
drivers/mtd/ubi/wl.c

index f247c4e7b40de9df7cbf08850f2e96432347db8f..5ebe280225d60a69f28bc81c5c4593a47e017365 100644 (file)
@@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
 
        ubi->bad_peb_count = si->bad_peb_count;
        ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
+       ubi->corr_peb_count = si->corr_peb_count;
        ubi->max_ec = si->max_ec;
        ubi->mean_ec = si->mean_ec;
        ubi_msg("max. sequence number:       %llu", si->max_sqnum);
@@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
        ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
        ubi_msg("number of good PEBs:        %d", ubi->good_peb_count);
        ubi_msg("number of bad PEBs:         %d", ubi->bad_peb_count);
+       ubi_msg("number of corrupted PEBs:   %d", ubi->corr_peb_count);
        ubi_msg("max. allowed volumes:       %d", ubi->vtbl_slots);
        ubi_msg("wear-leveling threshold:    %d", CONFIG_MTD_UBI_WL_THRESHOLD);
        ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
index 334865ef5220c4310ec2762771258c89bdc4dc0e..4be671815014ee3b054f2ba8f704781dfbc1f032 100644 (file)
@@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi,
 
        ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
                 " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
+       if (ubi->corr_peb_count)
+               ubi_warn("%d PEBs are corrupted and not used",
+                       ubi->corr_peb_count);
 }
 
 /**
@@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
        if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
                ubi_err("no enough physical eraseblocks (%d, need %d)",
                        ubi->avail_pebs, EBA_RESERVED_PEBS);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                err = -ENOSPC;
                goto out_free;
        }
index def0bf03d7fe2dc97373b94529a3ba197913aea3..30b710216f264cf2c43095ea65f48876b18ef741 100644 (file)
@@ -706,8 +706,8 @@ out_free:
 struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
                                           struct ubi_scan_info *si)
 {
-       int err = 0, i;
-       struct ubi_scan_leb *seb;
+       int err = 0;
+       struct ubi_scan_leb *seb, *tmp_seb;
 
        if (!list_empty(&si->free)) {
                seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
@@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
                return seb;
        }
 
-       for (i = 0; i < 2; i++) {
-               struct list_head *head;
-               struct ubi_scan_leb *tmp_seb;
-
-               if (i == 0)
-                       head = &si->erase;
-               else
-                       head = &si->corr;
-
-               /*
-                * We try to erase the first physical eraseblock from the @head
-                * list and pick it if we succeed, or try to erase the
-                * next one if not. And so forth. We don't want to take care
-                * about bad eraseblocks here - they'll be handled later.
-                */
-               list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
-                       if (seb->ec == UBI_SCAN_UNKNOWN_EC)
-                               seb->ec = si->mean_ec;
+       /*
+        * We try to erase the first physical eraseblock from the erase list
+        * and pick it if we succeed, or try to erase the next one if not. And
+        * so forth. We don't want to take care about bad eraseblocks here -
+        * they'll be handled later.
+        */
+       list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
+               if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+                       seb->ec = si->mean_ec;
 
-                       err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
-                       if (err)
-                               continue;
+               err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+               if (err)
+                       continue;
 
-                       seb->ec += 1;
-                       list_del(&seb->u.list);
-                       dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
-                       return seb;
-               }
+               seb->ec += 1;
+               list_del(&seb->u.list);
+               dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+               return seb;
        }
 
-       ubi_err("no eraseblocks found");
+       ubi_err("no free eraseblocks");
        return ERR_PTR(-ENOSPC);
 }
 
index 8831d7ba9f215cf63673a66a39fb2ed63cbcab01..0b0149c41fe3c7da9fd0b393b9a76c3cb90996c3 100644 (file)
@@ -361,6 +361,8 @@ struct ubi_wl_entry;
  * @peb_size: physical eraseblock size
  * @bad_peb_count: count of bad physical eraseblocks
  * @good_peb_count: count of good physical eraseblocks
+ * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
+ *                  used by UBI)
  * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
  * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
  * @min_io_size: minimal input/output unit size of the underlying MTD device
@@ -447,6 +449,7 @@ struct ubi_device {
        int peb_size;
        int bad_peb_count;
        int good_peb_count;
+       int corr_peb_count;
        int erroneous_peb_count;
        int max_erroneous;
        int min_io_size;
index e42afab9a9fe2c290585a0f3111ca2543581a7ee..c47620dfc722c33417b33d8f8a1608f1478a0241 100644 (file)
@@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
        /* Reserve physical eraseblocks */
        if (vol->reserved_pebs > ubi->avail_pebs) {
                dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
+               if (ubi->corr_peb_count)
+                       dbg_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                err = -ENOSPC;
                goto out_unlock;
        }
@@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
                if (pebs > ubi->avail_pebs) {
                        dbg_err("not enough PEBs: requested %d, available %d",
                                pebs, ubi->avail_pebs);
+                       if (ubi->corr_peb_count)
+                               dbg_err("%d PEBs are corrupted and not used",
+                                       ubi->corr_peb_count);
                        spin_unlock(&ubi->volumes_lock);
                        err = -ENOSPC;
                        goto out_free;
index 3bfe00a9466757170399f2778d5afdb14412f37e..fcdb7f65fe0beb3388b60daeab433733d4f1ecc7 100644 (file)
@@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
        ubi->vol_count += 1;
        vol->ubi = ubi;
 
-       if (reserved_pebs > ubi->avail_pebs)
+       if (reserved_pebs > ubi->avail_pebs) {
                ubi_err("not enough PEBs, required %d, available %d",
                        reserved_pebs, ubi->avail_pebs);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
+       }
        ubi->rsvd_pebs += reserved_pebs;
        ubi->avail_pebs -= reserved_pebs;
 
@@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
                        return PTR_ERR(ubi->vtbl);
        }
 
-       ubi->avail_pebs = ubi->good_peb_count;
+       ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;
 
        /*
         * The layout volume is OK, initialize the corresponding in-RAM data
index 605ecb1e22bb575340dce7338bd288a07c0d4353..655bbbe415d99b89074e1344953f8e60f10ac0be 100644 (file)
@@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
                ubi->lookuptbl[e->pnum] = e;
        }
 
-       list_for_each_entry(seb, &si->corr, u.list) {
-               cond_resched();
-
-               e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
-               if (!e)
-                       goto out_free;
-
-               e->pnum = seb->pnum;
-               e->ec = seb->ec;
-               ubi->lookuptbl[e->pnum] = e;
-               if (schedule_erase(ubi, e, 0)) {
-                       kmem_cache_free(ubi_wl_entry_slab, e);
-                       goto out_free;
-               }
-       }
-
        ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
                ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
                        cond_resched();
@@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
        if (ubi->avail_pebs < WL_RESERVED_PEBS) {
                ubi_err("no enough physical eraseblocks (%d, need %d)",
                        ubi->avail_pebs, WL_RESERVED_PEBS);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                goto out_free;
        }
        ubi->avail_pebs -= WL_RESERVED_PEBS;