From: Artem Bityutskiy Date: Fri, 5 Dec 2008 11:37:02 +0000 (+0200) Subject: UBI: handle write errors in WL worker X-Git-Tag: v2.6.29-rc1~534^2~5 X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=6fa6f5bbc3a2ad833a3d4b798140602004f70f5a;p=linux-2.6-omap-h63xx.git UBI: handle write errors in WL worker When a PEB is moved and a write error happens, UBI switches to R/O mode, which is wrong, because we just copy the data and may select a different PEB and re-try this. This patch fixes WL worker's behavior. Signed-off-by: Artem Bityutskiy --- diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 2e4d6bf9458..048a606cebd 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -949,10 +949,14 @@ write_error: * This function copies logical eraseblock from physical eraseblock @from to * physical eraseblock @to. The @vid_hdr buffer may be changed by this * function. Returns: - * o %0 in case of success; - * o %1 if the operation was canceled and should be tried later (e.g., - * because a bit-flip was detected at the target PEB); - * o %2 if the volume is being deleted and this LEB should not be moved. + * o %0 in case of success; + * o %1 if the operation was canceled because the volume is being deleted + * or because the PEB was put meanwhile; + * o %2 if the operation was canceled because there was a write error to the + * target PEB; + * o %-EAGAIN if the operation was canceled because a bit-flip was detected + * in the target PEB; + * o a negative error code in case of failure. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr) @@ -978,7 +982,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * Note, we may race with volume deletion, which means that the volume * this logical eraseblock belongs to might be being deleted. Since the - * volume deletion unmaps all the volume's logical eraseblocks, it will + * volume deletion un-maps all the volume's logical eraseblocks, it will * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. */ vol = ubi->volumes[idx]; @@ -986,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* No need to do further work, cancel */ dbg_eba("volume %d is being removed, cancel", vol_id); spin_unlock(&ubi->volumes_lock); - return 2; + return 1; } spin_unlock(&ubi->volumes_lock); @@ -1023,7 +1027,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * OK, now the LEB is locked and we can safely start moving it. Since - * this function utilizes thie @ubi->peb1_buf buffer which is shared + * this function utilizes the @ubi->peb1_buf buffer which is shared * with some other functions, so lock the buffer by taking the * @ubi->buf_mutex. */ @@ -1068,8 +1072,11 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); - if (err) + if (err) { + if (err == -EIO) + err = 2; goto out_unlock_buf; + } cond_resched(); @@ -1079,14 +1086,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (err != UBI_IO_BITFLIPS) ubi_warn("cannot read VID header back from PEB %d", to); else - err = 1; + err = -EAGAIN; goto out_unlock_buf; } if (data_size > 0) { err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); - if (err) + if (err) { + if (err == -EIO) + err = 2; goto out_unlock_buf; + } cond_resched(); @@ -1101,15 +1111,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ubi_warn("cannot read data back from PEB %d", to); else - err = 1; + err = -EAGAIN; goto out_unlock_buf; } cond_resched(); if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { - ubi_warn("read data back from PEB %d - it is different", - to); + ubi_warn("read data back from PEB %d and it is " + "different", to); + err = -EINVAL; goto out_unlock_buf; } } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 442099d76ec..abf65ea414e 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -738,7 +738,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { - int err, scrubbing = 0; + int err, scrubbing = 0, torture = 0; struct ubi_wl_prot_entry *uninitialized_var(pe); struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; @@ -842,20 +842,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); if (err) { - + if (err == -EAGAIN) + goto out_not_moved; if (err < 0) goto out_error; - if (err == 1) + if (err == 2) { + /* Target PEB write error, torture it */ + torture = 1; goto out_not_moved; + } /* - * For some reason the LEB was not moved - it might be because - * the volume is being deleted. We should prevent this PEB from - * being selected for wear-levelling movement for some "time", - * so put it to the protection tree. + * The LEB has not been moved because the volume is being + * deleted or the PEB has been put meanwhile. We should prevent + * this PEB from being selected for wear-leveling movement + * again, so put it to the protection tree. */ - dbg_wl("cancelled moving PEB %d", e1->pnum); + dbg_wl("canceled moving PEB %d", e1->pnum); + ubi_assert(err == 1); + pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); if (!pe) { err = -ENOMEM; @@ -920,9 +926,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, /* * For some reasons the LEB was not moved, might be an error, might be * something else. @e1 was not changed, so return it back. @e2 might - * be changed, schedule it for erasure. + * have been changed, schedule it for erasure. */ out_not_moved: + dbg_wl("canceled moving PEB %d", e1->pnum); ubi_free_vid_hdr(ubi, vid_hdr); vid_hdr = NULL; spin_lock(&ubi->wl_lock); @@ -930,12 +937,13 @@ out_not_moved: wl_tree_add(e1, &ubi->scrub); else wl_tree_add(e1, &ubi->used); + ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; - ubi->move_to_put = ubi->wl_scheduled = 0; + ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); e1 = NULL; - err = schedule_erase(ubi, e2, 0); + err = schedule_erase(ubi, e2, torture); if (err) goto out_error; @@ -1324,7 +1332,7 @@ int ubi_wl_flush(struct ubi_device *ubi) up_write(&ubi->work_sem); /* - * And in case last was the WL worker and it cancelled the LEB + * And in case last was the WL worker and it canceled the LEB * movement, flush again. */ while (ubi->works_count) {