]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/ata/libata-eh.c
libata: improve EH retry delay handling
[linux-2.6-omap-h63xx.git] / drivers / ata / libata-eh.c
index d94359a24d419cbe36ee09bb155d9a32ef8d81fa..5b5ae631ed037ed28f0bc1699285d2a5df3dfd84 100644 (file)
@@ -66,15 +66,17 @@ enum {
        ATA_ECAT_DUBIOUS_TOUT_HSM       = 6,
        ATA_ECAT_DUBIOUS_UNK_DEV        = 7,
        ATA_ECAT_NR                     = 8,
-};
 
-/* Waiting in ->prereset can never be reliable.  It's sometimes nice
- * to wait there but it can't be depended upon; otherwise, we wouldn't
- * be resetting.  Just give it enough time for most drives to spin up.
- */
-enum {
-       ATA_EH_PRERESET_TIMEOUT         = 10 * HZ,
-       ATA_EH_FASTDRAIN_INTERVAL       = 3 * HZ,
+       /* always put at least this amount of time between resets */
+       ATA_EH_RESET_COOL_DOWN          =  5000,
+
+       /* Waiting in ->prereset can never be reliable.  It's
+        * sometimes nice to wait there but it can't be depended upon;
+        * otherwise, we wouldn't be resetting.  Just give it enough
+        * time for most drives to spin up.
+        */
+       ATA_EH_PRERESET_TIMEOUT         = 10000,
+       ATA_EH_FASTDRAIN_INTERVAL       =  3000,
 };
 
 /* The following table determines how we sequence resets.  Each entry
@@ -84,10 +86,10 @@ enum {
  * are mostly for error handling, hotplug and retarded devices.
  */
 static const unsigned long ata_eh_reset_timeouts[] = {
-       10 * HZ,        /* most drives spin up by 10sec */
-       10 * HZ,        /* > 99% working drives spin up before 20sec */
-       35 * HZ,        /* give > 30 secs of idleness for retarded devices */
-       5 * HZ,         /* and sweet one last chance */
+       10000,  /* most drives spin up by 10sec */
+       10000,  /* > 99% working drives spin up before 20sec */
+       35000,  /* give > 30 secs of idleness for retarded devices */
+        5000,  /* and sweet one last chance */
        /* > 1 min has elapsed, give up */
 };
 
@@ -486,6 +488,9 @@ void ata_scsi_error(struct Scsi_Host *host)
                                if (ata_ncq_enabled(dev))
                                        ehc->saved_ncq_enabled |= 1 << devno;
                        }
+
+                       /* set last reset timestamp to some time in the past */
+                       ehc->last_reset = jiffies - 60 * HZ;
                }
 
                ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
@@ -641,7 +646,7 @@ void ata_eh_fastdrain_timerfn(unsigned long arg)
                /* some qcs have finished, give it another chance */
                ap->fastdrain_cnt = cnt;
                ap->fastdrain_timer.expires =
-                       jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+                       ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
                add_timer(&ap->fastdrain_timer);
        }
 
@@ -681,7 +686,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
 
        /* activate fast drain */
        ap->fastdrain_cnt = cnt;
-       ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+       ap->fastdrain_timer.expires =
+               ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
        add_timer(&ap->fastdrain_timer);
 }
 
@@ -1308,12 +1314,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
        unsigned int err_mask = 0, action = 0;
        u32 hotplug_mask;
 
-       if (serror & SERR_PERSISTENT) {
-               err_mask |= AC_ERR_ATA_BUS;
-               action |= ATA_EH_RESET;
-       }
-       if (serror &
-           (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
+       if (serror & (SERR_PERSISTENT | SERR_DATA)) {
                err_mask |= AC_ERR_ATA_BUS;
                action |= ATA_EH_RESET;
        }
@@ -1357,7 +1358,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
  *     LOCKING:
  *     Kernel thread context (may sleep).
  */
-static void ata_eh_analyze_ncq_error(struct ata_link *link)
+void ata_eh_analyze_ncq_error(struct ata_link *link)
 {
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
@@ -1402,6 +1403,7 @@ static void ata_eh_analyze_ncq_error(struct ata_link *link)
        /* we've got the perpetrator, condemn it */
        qc = __ata_qc_from_tag(ap, tag);
        memcpy(&qc->result_tf, &tf, sizeof(tf));
+       qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
        qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
        ehc->i.err_mask &= ~AC_ERR_DEV;
 }
@@ -2046,19 +2048,11 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
                        unsigned int *classes, unsigned long deadline)
 {
        struct ata_device *dev;
-       int rc;
 
        ata_link_for_each_dev(dev, link)
                classes[dev->devno] = ATA_DEV_UNKNOWN;
 
-       rc = reset(link, classes, deadline);
-
-       /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
-       ata_link_for_each_dev(dev, link)
-               if (classes[dev->devno] == ATA_DEV_UNKNOWN)
-                       classes[dev->devno] = ATA_DEV_NONE;
-
-       return rc;
+       return reset(link, classes, deadline);
 }
 
 static int ata_eh_followup_srst_needed(struct ata_link *link,
@@ -2095,14 +2089,22 @@ int ata_eh_reset(struct ata_link *link, int classify,
        ata_reset_fn_t reset;
        unsigned long flags;
        u32 sstatus;
-       int rc;
+       int nr_known, rc;
+
+       /*
+        * Prepare to reset
+        */
+       now = jiffies;
+       deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN);
+       if (time_before(now, deadline))
+               schedule_timeout_uninterruptible(deadline - now);
 
-       /* about to reset */
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags |= ATA_PFLAG_RESETTING;
        spin_unlock_irqrestore(ap->lock, flags);
 
        ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
 
        ata_link_for_each_dev(dev, link) {
                /* If we issue an SRST then an ATA drive (not ATAPI)
@@ -2123,16 +2125,8 @@ int ata_eh_reset(struct ata_link *link, int classify,
                        ap->ops->set_piomode(ap, dev);
        }
 
-       if (!softreset && !hardreset) {
-               if (verbose)
-                       ata_link_printk(link, KERN_INFO, "no reset method "
-                                       "available, skipping reset\n");
-               if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
-                       lflags |= ATA_LFLAG_ASSUME_ATA;
-               goto done;
-       }
-
        /* prefer hardreset */
+       reset = NULL;
        ehc->i.action &= ~ATA_EH_RESET;
        if (hardreset) {
                reset = hardreset;
@@ -2140,15 +2134,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
        } else if (softreset) {
                reset = softreset;
                ehc->i.action = ATA_EH_SOFTRESET;
-       } else {
-               ata_link_printk(link, KERN_ERR, "BUG: no reset method, "
-                               "please report to linux-ide@vger.kernel.org\n");
-               dump_stack();
-               return -EINVAL;
        }
 
        if (prereset) {
-               rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT);
+               rc = prereset(link,
+                             ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
                if (rc) {
                        if (rc == -ENOENT) {
                                ata_link_printk(link, KERN_DEBUG,
@@ -2164,55 +2154,72 @@ int ata_eh_reset(struct ata_link *link, int classify,
                                        "prereset failed (errno=%d)\n", rc);
                        goto out;
                }
-       }
 
-       /* prereset() might have cleared ATA_EH_RESET */
-       if (!(ehc->i.action & ATA_EH_RESET)) {
-               /* prereset told us not to reset, bang classes and return */
-               ata_link_for_each_dev(dev, link)
-                       classes[dev->devno] = ATA_DEV_NONE;
-               rc = 0;
-               goto out;
+               /* prereset() might have cleared ATA_EH_RESET.  If so,
+                * bang classes and return.
+                */
+               if (reset && !(ehc->i.action & ATA_EH_RESET)) {
+                       ata_link_for_each_dev(dev, link)
+                               classes[dev->devno] = ATA_DEV_NONE;
+                       rc = 0;
+                       goto out;
+               }
        }
 
  retry:
-       deadline = jiffies + ata_eh_reset_timeouts[try++];
+       /*
+        * Perform reset
+        */
+       ehc->last_reset = jiffies;
+       if (ata_is_host_link(link))
+               ata_eh_freeze_port(ap);
 
-       /* shut up during boot probing */
-       if (verbose)
-               ata_link_printk(link, KERN_INFO, "%s resetting link\n",
-                               reset == softreset ? "soft" : "hard");
+       deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
 
-       /* mark that this EH session started with reset */
-       if (reset == hardreset)
-               ehc->i.flags |= ATA_EHI_DID_HARDRESET;
-       else
-               ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
+       if (reset) {
+               if (verbose)
+                       ata_link_printk(link, KERN_INFO, "%s resetting link\n",
+                                       reset == softreset ? "soft" : "hard");
+
+               /* mark that this EH session started with reset */
+               if (reset == hardreset)
+                       ehc->i.flags |= ATA_EHI_DID_HARDRESET;
+               else
+                       ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
 
-       rc = ata_do_reset(link, reset, classes, deadline);
+               rc = ata_do_reset(link, reset, classes, deadline);
 
-       if (reset == hardreset &&
-           ata_eh_followup_srst_needed(link, rc, classify, classes)) {
-               /* okay, let's do follow-up softreset */
-               reset = softreset;
+               if (reset == hardreset &&
+                   ata_eh_followup_srst_needed(link, rc, classify, classes)) {
+                       /* okay, let's do follow-up softreset */
+                       reset = softreset;
 
-               if (!reset) {
-                       ata_link_printk(link, KERN_ERR,
-                                       "follow-up softreset required "
-                                       "but no softreset avaliable\n");
-                       rc = -EINVAL;
-                       goto fail;
+                       if (!reset) {
+                               ata_link_printk(link, KERN_ERR,
+                                               "follow-up softreset required "
+                                               "but no softreset avaliable\n");
+                               rc = -EINVAL;
+                               goto fail;
+                       }
+
+                       ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+                       rc = ata_do_reset(link, reset, classes, deadline);
                }
 
-               ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
-               rc = ata_do_reset(link, reset, classes, deadline);
+               /* -EAGAIN can happen if we skipped followup SRST */
+               if (rc && rc != -EAGAIN)
+                       goto fail;
+       } else {
+               if (verbose)
+                       ata_link_printk(link, KERN_INFO, "no reset method "
+                                       "available, skipping reset\n");
+               if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
+                       lflags |= ATA_LFLAG_ASSUME_ATA;
        }
 
-       /* -EAGAIN can happen if we skipped followup SRST */
-       if (rc && rc != -EAGAIN)
-               goto fail;
-
- done:
+       /*
+        * Post-reset processing
+        */
        ata_link_for_each_dev(dev, link) {
                /* After the reset, the device state is PIO 0 and the
                 * controller state is undefined.  Reset also wakes up
@@ -2235,11 +2242,56 @@ int ata_eh_reset(struct ata_link *link, int classify,
        if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
                link->sata_spd = (sstatus >> 4) & 0xf;
 
+       /* thaw the port */
+       if (ata_is_host_link(link))
+               ata_eh_thaw_port(ap);
+
+       /* postreset() should clear hardware SError.  Although SError
+        * is cleared during link resume, clearing SError here is
+        * necessary as some PHYs raise hotplug events after SRST.
+        * This introduces race condition where hotplug occurs between
+        * reset and here.  This race is mediated by cross checking
+        * link onlineness and classification result later.
+        */
        if (postreset)
                postreset(link, classes);
 
+       /* clear cached SError */
+       spin_lock_irqsave(link->ap->lock, flags);
+       link->eh_info.serror = 0;
+       spin_unlock_irqrestore(link->ap->lock, flags);
+
+       /* Make sure onlineness and classification result correspond.
+        * Hotplug could have happened during reset and some
+        * controllers fail to wait while a drive is spinning up after
+        * being hotplugged causing misdetection.  By cross checking
+        * link onlineness and classification result, those conditions
+        * can be reliably detected and retried.
+        */
+       nr_known = 0;
+       ata_link_for_each_dev(dev, link) {
+               /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
+               if (classes[dev->devno] == ATA_DEV_UNKNOWN)
+                       classes[dev->devno] = ATA_DEV_NONE;
+               else
+                       nr_known++;
+       }
+
+       if (classify && !nr_known && ata_link_online(link)) {
+               if (try < max_tries) {
+                       ata_link_printk(link, KERN_WARNING, "link online but "
+                                      "device misclassified, retrying\n");
+                       rc = -EAGAIN;
+                       goto fail;
+               }
+               ata_link_printk(link, KERN_WARNING,
+                              "link online but device misclassified, "
+                              "device detection might fail\n");
+       }
+
        /* reset successful, schedule revalidation */
        ata_eh_done(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
        ehc->i.action |= ATA_EH_REVALIDATE;
 
        rc = 0;
@@ -2266,9 +2318,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
        if (time_before(now, deadline)) {
                unsigned long delta = deadline - now;
 
-               ata_link_printk(link, KERN_WARNING, "reset failed "
-                               "(errno=%d), retrying in %u secs\n",
-                               rc, (jiffies_to_msecs(delta) + 999) / 1000);
+               ata_link_printk(link, KERN_WARNING,
+                       "reset failed (errno=%d), retrying in %u secs\n",
+                       rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
                while (delta)
                        delta = schedule_timeout_uninterruptible(delta);
@@ -2585,8 +2637,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 {
        struct ata_link *link;
        struct ata_device *dev;
-       int nr_failed_devs, nr_disabled_devs;
-       int reset, rc;
+       int nr_failed_devs;
+       int rc;
        unsigned long flags;
 
        DPRINTK("ENTER\n");
@@ -2628,8 +2680,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
  retry:
        rc = 0;
        nr_failed_devs = 0;
-       nr_disabled_devs = 0;
-       reset = 0;
 
        /* if UNLOADING, finish immediately */
        if (ap->pflags & ATA_PFLAG_UNLOADING)
@@ -2643,40 +2693,24 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
                if (ata_eh_skip_recovery(link))
                        ehc->i.action = 0;
 
-               /* do we need to reset? */
-               if (ehc->i.action & ATA_EH_RESET)
-                       reset = 1;
-
                ata_link_for_each_dev(dev, link)
                        ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
        }
 
        /* reset */
-       if (reset) {
-               /* if PMP is attached, this function only deals with
-                * downstream links, port should stay thawed.
-                */
-               if (!sata_pmp_attached(ap))
-                       ata_eh_freeze_port(ap);
-
-               ata_port_for_each_link(link, ap) {
-                       struct ata_eh_context *ehc = &link->eh_context;
+       ata_port_for_each_link(link, ap) {
+               struct ata_eh_context *ehc = &link->eh_context;
 
-                       if (!(ehc->i.action & ATA_EH_RESET))
-                               continue;
+               if (!(ehc->i.action & ATA_EH_RESET))
+                       continue;
 
-                       rc = ata_eh_reset(link, ata_link_nr_vacant(link),
-                                         prereset, softreset, hardreset,
-                                         postreset);
-                       if (rc) {
-                               ata_link_printk(link, KERN_ERR,
-                                               "reset failed, giving up\n");
-                               goto out;
-                       }
+               rc = ata_eh_reset(link, ata_link_nr_vacant(link),
+                                 prereset, softreset, hardreset, postreset);
+               if (rc) {
+                       ata_link_printk(link, KERN_ERR,
+                                       "reset failed, giving up\n");
+                       goto out;
                }
-
-               if (!sata_pmp_attached(ap))
-                       ata_eh_thaw_port(ap);
        }
 
        /* the rest */
@@ -2712,8 +2746,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 dev_fail:
                nr_failed_devs++;
-               if (ata_eh_handle_dev_fail(dev, rc))
-                       nr_disabled_devs++;
+               ata_eh_handle_dev_fail(dev, rc);
 
                if (ap->pflags & ATA_PFLAG_FROZEN) {
                        /* PMP reset requires working host port.
@@ -2725,18 +2758,8 @@ dev_fail:
                }
        }
 
-       if (nr_failed_devs) {
-               if (nr_failed_devs != nr_disabled_devs) {
-                       ata_port_printk(ap, KERN_WARNING, "failed to recover "
-                                       "some devices, retrying in 5 secs\n");
-                       ssleep(5);
-               } else {
-                       /* no device left to recover, repeat fast */
-                       msleep(500);
-               }
-
+       if (nr_failed_devs)
                goto retry;
-       }
 
  out:
        if (rc && r_failed_link)