jbd2: fix error handling for checkpoint io

author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)

committer Theodore Ts'o <tytso@mit.edu>

Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)
author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)
committer Theodore Ts'o <tytso@mit.edu>
Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 42895d3694581885de894790f505557fda891c24..9203c3332f170887a2aff921c81946826ddd1cf9 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
         int ret = 0;
         struct buffer_head *bh = jh2bh(jh);
  
-       if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
+       if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
+           !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
                 JBUFFER_TRACE(jh, "remove from checkpoint list");
                 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
                 jbd_unlock_bh_state(bh);
@@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
   * buffers. Note that we take the buffers in the opposite ordering
   * from the one in which they were submitted for IO.
   *
+ * Return 0 on success, and return <0 if some buffers have failed
+ * to be written out.
+ *
   * Called with j_list_lock held.
   */
-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
  {
         struct journal_head *jh;
         struct buffer_head *bh;
         tid_t this_tid;
         int released = 0;
+       int ret = 0;
  
         this_tid = transaction->t_tid;
  restart:
         /* Did somebody clean up the transaction in the meanwhile? */
         if (journal->j_checkpoint_transactions != transaction ||
                         transaction->t_tid != this_tid)
-               return;
+               return ret;
         while (!released && transaction->t_checkpoint_io_list) {
                 jh = transaction->t_checkpoint_io_list;
                 bh = jh2bh(jh);
@@ -210,6 +215,9 @@ restart:
                         spin_lock(&journal->j_list_lock);
                         goto restart;
                 }
+               if (unlikely(buffer_write_io_error(bh)))
+                       ret = -EIO;
+
                 /*
                  * Now in whatever state the buffer currently is, we know that
                  * it has been written out and so we can drop it from the list
@@ -219,6 +227,8 @@ restart:
                 jbd2_journal_remove_journal_head(bh);
                 __brelse(bh);
         }
+
+       return ret;
  }
  
  #define NR_BATCH       64
@@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
   * Try to flush one buffer from the checkpoint list to disk.
   *
   * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.
+ * scan of the checkpoint list.  Return <0 if the buffer has failed to
+ * be written out.
   *
   * Called with j_list_lock held and drops it if 1 is returned
   * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -274,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                 jbd2_log_wait_commit(journal, tid);
                 ret = 1;
         } else if (!buffer_dirty(bh)) {
+               ret = 1;
+               if (unlikely(buffer_write_io_error(bh)))
+                       ret = -EIO;
                 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
                 BUFFER_TRACE(bh, "remove from checkpoint");
                 __jbd2_journal_remove_checkpoint(jh);
@@ -281,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                 jbd_unlock_bh_state(bh);
                 jbd2_journal_remove_journal_head(bh);
                 __brelse(bh);
-               ret = 1;
         } else {
                 /*
                  * Important: we are about to write the buffer, and
@@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
   * to disk. We submit larger chunks of data at once.
   *
   * The journal should be locked before calling this function.
+ * Called with j_checkpoint_mutex held.
   */
  int jbd2_log_do_checkpoint(journal_t *journal)
  {
@@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
          * OK, we need to start writing disk blocks.  Take one transaction
          * and write it.
          */
+       result = 0;
         spin_lock(&journal->j_list_lock);
         if (!journal->j_checkpoint_transactions)
                 goto out;
@@ -357,7 +372,7 @@ restart:
                 int batch_count = 0;
                 struct buffer_head *bhs[NR_BATCH];
                 struct journal_head *jh;
-               int retry = 0;
+               int retry = 0, err;
  
                 while (!retry && transaction->t_checkpoint_list) {
                         struct buffer_head *bh;
@@ -371,6 +386,8 @@ restart:
                         }
                         retry = __process_buffer(journal, jh, bhs, &batch_count,
                                                  transaction);
+                       if (retry < 0 && !result)
+                               result = retry;
                         if (!retry && (need_resched() ||
                                 spin_needbreak(&journal->j_list_lock))) {
                                 spin_unlock(&journal->j_list_lock);
@@ -395,14 +412,18 @@ restart:
                  * Now we have cleaned up the first transaction's checkpoint
                  * list. Let's clean up the second one
                  */
-               __wait_cp_io(journal, transaction);
+               err = __wait_cp_io(journal, transaction);
+               if (!result)
+                       result = err;
         }
  out:
         spin_unlock(&journal->j_list_lock);
-       result = jbd2_cleanup_journal_tail(journal);
         if (result < 0)
-               return result;
-       return 0;
+               jbd2_journal_abort(journal, result);
+       else
+               result = jbd2_cleanup_journal_tail(journal);
+
+       return (result < 0) ? result : 0;
  }
  
  /*
@@ -418,8 +439,9 @@ out:
   * This is the only part of the journaling code which really needs to be
   * aware of transaction aborts.  Checkpointing involves writing to the
   * main filesystem area rather than to the journal, so it can proceed
- * even in abort state, but we must not update the journal superblock if
- * we have an abort error outstanding.
+ * even in abort state, but we must not update the super block if
+ * checkpointing may have failed.  Otherwise, we would lose some metadata
+ * buffers which should be written-back to the filesystem.
   */
  
  int jbd2_cleanup_journal_tail(journal_t *journal)
@@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
         tid_t           first_tid;
         unsigned long   blocknr, freed;
  
+       if (is_journal_aborted(journal))
+               return 1;
+
         /* OK, work out the oldest transaction remaining in the log, and
          * the log block it starts at.
          *
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index 01c3901c3a07599e592e9bd09282a3d6cd42cf24..783de118de9235583bda263db9c9ce665333c4c3 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1451,9 +1451,12 @@ recovery_error:
   *
   * Release a journal_t structure once it is no longer in use by the
   * journaled object.
+ * Return <0 if we couldn't clean up the journal.
   */
-void jbd2_journal_destroy(journal_t *journal)
+int jbd2_journal_destroy(journal_t *journal)
  {
+       int err = 0;
+
         /* Wait for the commit thread to wake up and die. */
         journal_kill_thread(journal);
  
@@ -1476,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal)
         J_ASSERT(journal->j_checkpoint_transactions == NULL);
         spin_unlock(&journal->j_list_lock);
  
-       /* We can now mark the journal as empty. */
-       journal->j_tail = 0;
-       journal->j_tail_sequence = ++journal->j_transaction_sequence;
         if (journal->j_sb_buffer) {
-               jbd2_journal_update_superblock(journal, 1);
+               if (!is_journal_aborted(journal)) {
+                       /* We can now mark the journal as empty. */
+                       journal->j_tail = 0;
+                       journal->j_tail_sequence =
+                               ++journal->j_transaction_sequence;
+                       jbd2_journal_update_superblock(journal, 1);
+               } else {
+                       err = -EIO;
+               }
                 brelse(journal->j_sb_buffer);
         }
  
@@ -1492,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal)
                 jbd2_journal_destroy_revoke(journal);
         kfree(journal->j_wbuf);
         kfree(journal);
+
+       return err;
  }
  
  
@@ -1717,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal)
         spin_lock(&journal->j_list_lock);
         while (!err && journal->j_checkpoint_transactions != NULL) {
                 spin_unlock(&journal->j_list_lock);
+               mutex_lock(&journal->j_checkpoint_mutex);
                 err = jbd2_log_do_checkpoint(journal);
+               mutex_unlock(&journal->j_checkpoint_mutex);
                 spin_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
+
+       if (is_journal_aborted(journal))
+               return -EIO;
+
         jbd2_cleanup_journal_tail(journal);
  
         /* Finally, mark the journal as really needing no recovery.
@@ -1742,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal)
         J_ASSERT(journal->j_head == journal->j_tail);
         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
         spin_unlock(&journal->j_state_lock);
-       return err;
+       return 0;
  }
  
  /**
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c

index 058f50f65b766605eb7c88cb56e80f84156c05d8..73063285b13f7c7d868fb8553690bb2d0cbe5a03 100644 (file)
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -225,7 +225,7 @@ do {                                                                        \
   */
  int jbd2_journal_recover(journal_t *journal)
  {
-       int                     err;
+       int                     err, err2;
         journal_superblock_t *  sb;
  
         struct recovery_info    info;
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal)
         journal->j_transaction_sequence = ++info.end_transaction;
  
         jbd2_journal_clear_revoke(journal);
-       sync_blockdev(journal->j_fs_dev);
+       err2 = sync_blockdev(journal->j_fs_dev);
+       if (!err)
+               err = err2;
+
         return err;
  }
  
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 66c3499478b57579c8b4a719bdee3b8a2c7c42bb..c9e7d781db3196e593ec92273894c9ef0d5b0eee 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1060,7 +1060,7 @@ extern void          jbd2_journal_clear_features
                    (journal_t *, unsigned long, unsigned long, unsigned long);
  extern int        jbd2_journal_create     (journal_t *);
  extern int        jbd2_journal_load       (journal_t *journal);
-extern void       jbd2_journal_destroy    (journal_t *);
+extern int        jbd2_journal_destroy    (journal_t *);
  extern int        jbd2_journal_recover    (journal_t *journal);
  extern int        jbd2_journal_wipe       (journal_t *, int);
  extern int        jbd2_journal_skip_recovery   (journal_t *);
author	Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
	Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)
committer	Theodore Ts'o <tytso@mit.edu>
	Sat, 11 Oct 2008 00:29:13 +0000 (20:29 -0400)
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
fs/jbd2/recovery.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history