Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt

index eb154ef36c2a4b708004ff08660f2a7ebe971fa7..174eaff7ded9f1e7f9ef71882b446d87cea8c359 100644 (file)
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -2,19 +2,24 @@
  Ext4 Filesystem
  ===============
  
-This is a development version of the ext4 filesystem, an advanced level
-of the ext3 filesystem which incorporates scalability and reliability
-enhancements for supporting large filesystems (64 bit) in keeping with
-increasing disk capacities and state-of-the-art feature requirements.
+Ext4 is an an advanced level of the ext3 filesystem which incorporates
+scalability and reliability enhancements for supporting large filesystems
+(64 bit) in keeping with increasing disk capacities and state-of-the-art
+feature requirements.
  
-Mailing list: linux-ext4@vger.kernel.org
+Mailing list:  linux-ext4@vger.kernel.org
+Web site:      http://ext4.wiki.kernel.org
  
  
  1. Quick usage instructions:
  ===========================
  
+Note: More extensive information for getting started with ext4 can be
+      found at the ext4 wiki site at the URL:
+      http://ext4.wiki.kernel.org/index.php/Ext4_Howto
+
    - Compile and install the latest version of e2fsprogs (as of this
-    writing version 1.41) from:
+    writing version 1.41.3) from:
  
      http://sourceforge.net/project/showfiles.php?group_id=2406
         
@@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org
  
         # mke2fs -t ext4 /dev/hda1
  
-    Or configure an existing ext3 filesystem to support extents and set
-    the test_fs flag to indicate that it's ok for an in-development
-    filesystem to touch this filesystem:
+    Or to configure an existing ext3 filesystem to support extents: 
  
-       # tune2fs -O extents -E test_fs /dev/hda1
+       # tune2fs -O extents /dev/hda1
  
      If the filesystem was created with 128 byte inodes, it can be
      converted to use 256 byte for greater efficiency via:
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
  The big performance win will come with mballoc, delalloc and flex_bg
  grouping of bitmaps and inode tables.  Some test results available here:
  
- - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
- - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
  
  3. Options
  ==========
@@ -214,9 +217,6 @@ noreservation
  bsddf          (*)     Make 'df' act like BSD.
  minixdf                        Make 'df' act like Minix.
  
-check=none             Don't do extra checking of bitmaps on mount.
-nocheck
-
  debug                  Extra debugging information is sent to syslog.
  
  errors=remount-ro(*)   Remount the filesystem read-only on an error.
@@ -253,8 +253,6 @@ nobh                        (a) cache disk block mapping information
                         "nobh" option tries to avoid associating buffer
                         heads (supported only for "writeback" mode).
  
-mballoc                (*)     Use the multiple block allocator for block allocation
-nomballoc              disabled multiple block allocator for block allocation.
  stripe=n               Number of filesystem blocks that mballoc will try
                         to use for allocation size and alignment. For RAID5/6
                         systems this should be the number of data
diff --git a/fs/Kconfig b/fs/Kconfig

index 9e9d70c02a07c9a8876f71c3445ac5092c563810..d0a1174fb516c14f63862e42919d74c044df7882 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -160,7 +160,7 @@ config EXT4_FS
           filesystem initially.
  
           To compile this file system support as a module, choose M here. The
-         module will be called ext4dev.
+         module will be called ext4.
  
           If unsure, say N.
  
diff --git a/fs/Makefile b/fs/Makefile

index d0c69f57e5bfdebd65a668443428c44373360ab0..2168c902d5ca61027236bb75812591d16575ec1d 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM)             += dlm/
  # Do not add any filesystems before this line
  obj-$(CONFIG_REISERFS_FS)      += reiserfs/
  obj-$(CONFIG_EXT3_FS)          += ext3/ # Before ext2 so root fs can be ext3
-obj-$(CONFIG_EXT4_FS)          += ext4/ # Before ext2 so root fs can be ext4dev
+obj-$(CONFIG_EXT4_FS)          += ext4/ # Before ext2 so root fs can be ext4
  obj-$(CONFIG_JBD)              += jbd/
  obj-$(CONFIG_JBD2)             += jbd2/
  obj-$(CONFIG_EXT2_FS)          += ext2/
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index bd2ece22882755b02599563a1bee89c5b10d2b87..b9821be709bddb000077b9a4b15efe34e9392bd0 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
  
         /* this isn't the right place to decide whether block is metadata
          * inode.c/extents.c knows better, but for safety ... */
-       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
-                       ext4_should_journal_data(inode))
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+               metadata = 1;
+
+       /* We need to make sure we don't reuse
+        * block released untill the transaction commit.
+        * writeback mode have weak data consistency so
+        * don't force data as metadata when freeing block
+        * for writeback mode.
+        */
+       if (metadata == 0 && !ext4_should_writeback_data(inode))
                 metadata = 1;
  
         sb = inode->i_sb;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 6690a41cdd9fc8ca6536d3a6077222e65830a834..4880cc3e672778d54944326d265fa9b50b8d7275 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,7 +511,6 @@ do {                                                                               \
  /*
   * Mount flags
   */
-#define EXT4_MOUNT_CHECK               0x00001 /* Do mount-time checks */
  #define EXT4_MOUNT_OLDALLOC            0x00002  /* Don't use the new Orlov allocator */
  #define EXT4_MOUNT_GRPID               0x00004 /* Create files with directory's group */
  #define EXT4_MOUNT_DEBUG               0x00008 /* Some debugging messages */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h

index 6a0b40d43264b232a806f0e265150343c6375077..445fde603df800cb4a1bf8144f34eb9aeb17b52a 100644 (file)
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -99,9 +99,6 @@ struct ext4_sb_info {
         struct inode *s_buddy_cache;
         long s_blocks_reserved;
         spinlock_t s_reserve_lock;
-       struct list_head s_active_transaction;
-       struct list_head s_closed_transaction;
-       struct list_head s_committed_transaction;
         spinlock_t s_md_lock;
         tid_t s_last_transaction;
         unsigned short *s_mb_offsets, *s_mb_maxs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 9b4ec9decfd1b6020c13bbd57d86af7eab46006b..8dbf6953845ba61097703006553c34d1d83bd81f 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
         int ret = 0, err, nr_pages, i;
         unsigned long index, end;
         struct pagevec pvec;
+       long pages_skipped;
  
         BUG_ON(mpd->next_page <= mpd->first_page);
         pagevec_init(&pvec, 0);
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
         end = mpd->next_page - 1;
  
         while (index <= end) {
-               /* XXX: optimize tail */
-               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+               /*
+                * We can use PAGECACHE_TAG_DIRTY lookup here because
+                * even though we have cleared the dirty flag on the page
+                * We still keep the page in the radix tree with tag
+                * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
+                * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
+                * which is called via the below writepage callback.
+                */
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                       PAGECACHE_TAG_DIRTY,
+                                       min(end - index,
+                                       (pgoff_t)PAGEVEC_SIZE-1) + 1);
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
  
-                       index = page->index;
-                       if (index > end)
-                               break;
-                       index++;
-
+                       pages_skipped = mpd->wbc->pages_skipped;
                         err = mapping->a_ops->writepage(page, mpd->wbc);
-                       if (!err)
+                       if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+                               /*
+                                * have successfully written the page
+                                * without skipping the same
+                                */
                                 mpd->pages_written++;
                         /*
                          * In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
                                struct writeback_control *wbc,
                                struct mpage_da_data *mpd)
  {
-       long to_write;
         int ret;
  
         if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
         mpd->pages_written = 0;
         mpd->retval = 0;
  
-       to_write = wbc->nr_to_write;
-
         ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-
         /*
          * Handle last extent of pages
          */
         if (!mpd->io_done && mpd->next_page != mpd->first_page) {
                 if (mpage_da_map_blocks(mpd) == 0)
                         mpage_da_submit_io(mpd);
-       }
  
-       wbc->nr_to_write = to_write - mpd->pages_written;
+               mpd->io_done = 1;
+               ret = MPAGE_DA_EXTENT_TAIL;
+       }
+       wbc->nr_to_write -= mpd->pages_written;
         return ret;
  }
  
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
  static int ext4_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc)
  {
+       pgoff_t index;
+       int range_whole = 0;
         handle_t *handle = NULL;
-       loff_t range_start = 0;
         struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
+       int no_nrwrite_index_update;
+       long pages_written = 0, pages_skipped;
         int needed_blocks, ret = 0, nr_to_writebump = 0;
-       long to_write, pages_skipped = 0;
         struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
  
         /*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
                 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
                 wbc->nr_to_write = sbi->s_mb_stream_request;
         }
+       if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+               range_whole = 1;
  
-       if (!wbc->range_cyclic)
-               /*
-                * If range_cyclic is not set force range_cont
-                * and save the old writeback_index
-                */
-               wbc->range_cont = 1;
-
-       range_start =  wbc->range_start;
-       pages_skipped = wbc->pages_skipped;
+       if (wbc->range_cyclic)
+               index = mapping->writeback_index;
+       else
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
  
         mpd.wbc = wbc;
         mpd.inode = mapping->host;
  
-restart_loop:
-       to_write = wbc->nr_to_write;
-       while (!ret && to_write > 0) {
+       /*
+        * we don't want write_cache_pages to update
+        * nr_to_write and writeback_index
+        */
+       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+       wbc->no_nrwrite_index_update = 1;
+       pages_skipped = wbc->pages_skipped;
+
+       while (!ret && wbc->nr_to_write > 0) {
  
                 /*
                  * we  insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ restart_loop:
                         dump_stack();
                         goto out_writepages;
                 }
-               to_write -= wbc->nr_to_write;
-
                 mpd.get_block = ext4_da_get_block_write;
                 ret = mpage_da_writepages(mapping, wbc, &mpd);
  
                 ext4_journal_stop(handle);
  
-               if (mpd.retval == -ENOSPC)
+               if (mpd.retval == -ENOSPC) {
+                       /* commit the transaction which would
+                        * free blocks released in the transaction
+                        * and try again
+                        */
                         jbd2_journal_force_commit_nested(sbi->s_journal);
-
-               /* reset the retry count */
-               if (ret == MPAGE_DA_EXTENT_TAIL) {
+                       wbc->pages_skipped = pages_skipped;
+                       ret = 0;
+               } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                         /*
                          * got one extent now try with
                          * rest of the pages
                          */
-                       to_write += wbc->nr_to_write;
+                       pages_written += mpd.pages_written;
+                       wbc->pages_skipped = pages_skipped;
                         ret = 0;
-               } else if (wbc->nr_to_write) {
+               } else if (wbc->nr_to_write)
                         /*
                          * There is no more writeout needed
                          * or we requested for a noblocking writeout
                          * and we found the device congested
                          */
-                       to_write += wbc->nr_to_write;
                         break;
-               }
-               wbc->nr_to_write = to_write;
-       }
-
-       if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
-               /* We skipped pages in this loop */
-               wbc->range_start = range_start;
-               wbc->nr_to_write = to_write +
-                               wbc->pages_skipped - pages_skipped;
-               wbc->pages_skipped = pages_skipped;
-               goto restart_loop;
         }
+       if (pages_skipped != wbc->pages_skipped)
+               printk(KERN_EMERG "This should not happen leaving %s "
+                               "with nr_to_write = %ld ret = %d\n",
+                               __func__, wbc->nr_to_write, ret);
+
+       /* Update index */
+       index += pages_written;
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               /*
+                * set the writeback_index so that range_cyclic
+                * mode will write it back later
+                */
+               mapping->writeback_index = index;
  
  out_writepages:
-       wbc->nr_to_write = to_write - nr_to_writebump;
-       wbc->range_start = range_start;
+       if (!no_nrwrite_index_update)
+               wbc->no_nrwrite_index_update = 0;
+       wbc->nr_to_write -= nr_to_writebump;
         return ret;
  }
  
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
         struct inode *inode = &(ei->vfs_inode);
         u64 i_blocks = inode->i_blocks;
         struct super_block *sb = inode->i_sb;
-       int err = 0;
  
         if (i_blocks <= ~0U) {
                 /*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
                 raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                 raw_inode->i_blocks_high = 0;
                 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
-       } else if (i_blocks <= 0xffffffffffffULL) {
+               return 0;
+       }
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
+               return -EFBIG;
+
+       if (i_blocks <= 0xffffffffffffULL) {
                 /*
                  * i_blocks can be represented in a 48 bit variable
                  * as multiple of 512 bytes
                  */
-               err = ext4_update_rocompat_feature(handle, sb,
-                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
-               if (err)
-                       goto  err_out;
-               /* i_block is stored in the split  48 bit fields */
                 raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
                 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
         } else {
-               /*
-                * i_blocks should be represented in a 48 bit variable
-                * as multiple of  file system block size
-                */
-               err = ext4_update_rocompat_feature(handle, sb,
-                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
-               if (err)
-                       goto  err_out;
                 ei->i_flags |= EXT4_HUGE_FILE_FL;
                 /* i_block is stored in file system block size */
                 i_blocks = i_blocks >> (inode->i_blkbits - 9);
                 raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
         }
-err_out:
-       return err;
+       return 0;
  }
  
  /*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index b580714f0d859c107e94a3f6c61286141fb0cdab..dfe17a1340523c9c10d3c1f14a1077b4fd6939c9 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
         }
  
         INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+       meta_group_info[i]->bb_free_root.rb_node = NULL;;
  
  #ifdef DOUBLE_CHECK
         {
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         }
  
         spin_lock_init(&sbi->s_md_lock);
-       INIT_LIST_HEAD(&sbi->s_active_transaction);
-       INIT_LIST_HEAD(&sbi->s_closed_transaction);
-       INIT_LIST_HEAD(&sbi->s_committed_transaction);
         spin_lock_init(&sbi->s_bal_lock);
  
         sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         ext4_mb_init_per_dev_proc(sb);
         ext4_mb_history_init(sb);
  
+       sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+
         printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
         return 0;
  }
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
                 list_del(&pa->pa_group_list);
                 count++;
-               kfree(pa);
+               kmem_cache_free(ext4_pspace_cachep, pa);
         }
         if (count)
                 mb_debug("mballoc: %u PAs left\n", count);
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
         struct ext4_group_info *grinfo;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       /* release freed, non-committed blocks */
-       spin_lock(&sbi->s_md_lock);
-       list_splice_init(&sbi->s_closed_transaction,
-                       &sbi->s_committed_transaction);
-       list_splice_init(&sbi->s_active_transaction,
-                       &sbi->s_committed_transaction);
-       spin_unlock(&sbi->s_md_lock);
-       ext4_mb_free_committed_blocks(sb);
-
         if (sbi->s_group_info) {
                 for (i = 0; i < sbi->s_groups_count; i++) {
                         grinfo = ext4_get_group_info(sb, i);
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
         return 0;
  }
  
-static noinline_for_stack void
-ext4_mb_free_committed_blocks(struct super_block *sb)
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
  {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       int err;
-       int i;
-       int count = 0;
-       int count2 = 0;
-       struct ext4_free_metadata *md;
+       struct super_block *sb = journal->j_private;
         struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+       struct ext4_free_data *entry;
+       ext4_fsblk_t discard_block;
+       struct list_head *l, *ltmp;
  
-       if (list_empty(&sbi->s_committed_transaction))
-               return;
-
-       /* there is committed blocks to be freed yet */
-       do {
-               /* get next array of blocks */
-               md = NULL;
-               spin_lock(&sbi->s_md_lock);
-               if (!list_empty(&sbi->s_committed_transaction)) {
-                       md = list_entry(sbi->s_committed_transaction.next,
-                                       struct ext4_free_metadata, list);
-                       list_del(&md->list);
-               }
-               spin_unlock(&sbi->s_md_lock);
-
-               if (md == NULL)
-                       break;
+       list_for_each_safe(l, ltmp, &txn->t_private_list) {
+               entry = list_entry(l, struct ext4_free_data, list);
  
                 mb_debug("gonna free %u blocks in group %lu (0x%p):",
-                               md->num, md->group, md);
+                        entry->count, entry->group, entry);
  
-               err = ext4_mb_load_buddy(sb, md->group, &e4b);
+               err = ext4_mb_load_buddy(sb, entry->group, &e4b);
                 /* we expect to find existing buddy because it's pinned */
                 BUG_ON(err != 0);
  
+               db = e4b.bd_info;
                 /* there are blocks to put in buddy to make them really free */
-               count += md->num;
+               count += entry->count;
                 count2++;
-               ext4_lock_group(sb, md->group);
-               for (i = 0; i < md->num; i++) {
-                       mb_debug(" %u", md->blocks[i]);
-                       mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
+               ext4_lock_group(sb, entry->group);
+               /* Take it out of per group rb tree */
+               rb_erase(&entry->node, &(db->bb_free_root));
+               mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+
+               if (!db->bb_free_root.rb_node) {
+                       /* No more items in the per group rb tree
+                        * balance refcounts from ext4_mb_free_metadata()
+                        */
+                       page_cache_release(e4b.bd_buddy_page);
+                       page_cache_release(e4b.bd_bitmap_page);
                 }
-               mb_debug("\n");
-               ext4_unlock_group(sb, md->group);
-
-               /* balance refcounts from ext4_mb_free_metadata() */
-               page_cache_release(e4b.bd_buddy_page);
-               page_cache_release(e4b.bd_bitmap_page);
-
-               kfree(md);
+               ext4_unlock_group(sb, entry->group);
+               discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+                       + entry->start_blk
+                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+               trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
+                          (unsigned long long) discard_block, entry->count);
+               sb_issue_discard(sb, discard_block, entry->count);
+
+               kmem_cache_free(ext4_free_ext_cachep, entry);
                 ext4_mb_release_desc(&e4b);
-
-       } while (md);
+       }
  
         mb_debug("freed %u blocks in %u structures\n", count, count2);
  }
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
  
  static int ext4_mb_init_per_dev_proc(struct super_block *sb)
  {
+#ifdef CONFIG_PROC_FS
         mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct proc_dir_entry *proc;
@@ -2735,10 +2723,14 @@ err_out:
         remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
         remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
         return -ENOMEM;
+#else
+       return 0;
+#endif
  }
  
  static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
  {
+#ifdef CONFIG_PROC_FS
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
         if (sbi->s_proc == NULL)
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
         remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
         remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
         remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
-
+#endif
         return 0;
  }
  
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
                 kmem_cache_destroy(ext4_pspace_cachep);
                 return -ENOMEM;
         }
+
+       ext4_free_ext_cachep =
+               kmem_cache_create("ext4_free_block_extents",
+                                    sizeof(struct ext4_free_data),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       if (ext4_free_ext_cachep == NULL) {
+               kmem_cache_destroy(ext4_pspace_cachep);
+               kmem_cache_destroy(ext4_ac_cachep);
+               return -ENOMEM;
+       }
         return 0;
  }
  
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
         /* XXX: synchronize_rcu(); */
         kmem_cache_destroy(ext4_pspace_cachep);
         kmem_cache_destroy(ext4_ac_cachep);
+       kmem_cache_destroy(ext4_free_ext_cachep);
  }
  
  
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                 goto out1;
         }
  
-       ext4_mb_poll_new_transaction(sb, handle);
-
         *errp = ext4_mb_initialize_context(ac, ar);
         if (*errp) {
                 ar->len = 0;
@@ -4384,35 +4385,20 @@ out1:
  
         return block;
  }
-static void ext4_mb_poll_new_transaction(struct super_block *sb,
-                                               handle_t *handle)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-       if (sbi->s_last_transaction == handle->h_transaction->t_tid)
-               return;
-
-       /* new transaction! time to close last one and free blocks for
-        * committed transaction. we know that only transaction can be
-        * active, so previos transaction can be being logged and we
-        * know that transaction before previous is known to be already
-        * logged. this means that now we may free blocks freed in all
-        * transactions before previous one. hope I'm clear enough ... */
  
-       spin_lock(&sbi->s_md_lock);
-       if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
-               mb_debug("new transaction %lu, old %lu\n",
-                               (unsigned long) handle->h_transaction->t_tid,
-                               (unsigned long) sbi->s_last_transaction);
-               list_splice_init(&sbi->s_closed_transaction,
-                               &sbi->s_committed_transaction);
-               list_splice_init(&sbi->s_active_transaction,
-                               &sbi->s_closed_transaction);
-               sbi->s_last_transaction = handle->h_transaction->t_tid;
-       }
-       spin_unlock(&sbi->s_md_lock);
-
-       ext4_mb_free_committed_blocks(sb);
+/*
+ * We can merge two free data extents only if the physical blocks
+ * are contiguous, AND the extents were freed by the same transaction,
+ * AND the blocks are associated with the same group.
+ */
+static int can_merge(struct ext4_free_data *entry1,
+                       struct ext4_free_data *entry2)
+{
+       if ((entry1->t_tid == entry2->t_tid) &&
+           (entry1->group == entry2->group) &&
+           ((entry1->start_blk + entry1->count) == entry2->start_blk))
+               return 1;
+       return 0;
  }
  
  static noinline_for_stack int
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
         struct ext4_group_info *db = e4b->bd_info;
         struct super_block *sb = e4b->bd_sb;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_free_metadata *md;
-       int i;
+       struct ext4_free_data *entry, *new_entry;
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+
  
         BUG_ON(e4b->bd_bitmap_page == NULL);
         BUG_ON(e4b->bd_buddy_page == NULL);
  
+       new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+       new_entry->start_blk = block;
+       new_entry->group  = group;
+       new_entry->count = count;
+       new_entry->t_tid = handle->h_transaction->t_tid;
+       new_node = &new_entry->node;
+
         ext4_lock_group(sb, group);
-       for (i = 0; i < count; i++) {
-               md = db->bb_md_cur;
-               if (md && db->bb_tid != handle->h_transaction->t_tid) {
-                       db->bb_md_cur = NULL;
-                       md = NULL;
+       if (!*n) {
+               /* first free block exent. We need to
+                  protect buddy cache from being freed,
+                * otherwise we'll refresh it from
+                * on-disk bitmap and lose not-yet-available
+                * blocks */
+               page_cache_get(e4b->bd_buddy_page);
+               page_cache_get(e4b->bd_bitmap_page);
+       }
+       while (*n) {
+               parent = *n;
+               entry = rb_entry(parent, struct ext4_free_data, node);
+               if (block < entry->start_blk)
+                       n = &(*n)->rb_left;
+               else if (block >= (entry->start_blk + entry->count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_error(sb, __func__,
+                           "Double free of blocks %d (%d %d)\n",
+                           block, entry->start_blk, entry->count);
+                       return 0;
                 }
+       }
  
-               if (md == NULL) {
-                       ext4_unlock_group(sb, group);
-                       md = kmalloc(sizeof(*md), GFP_NOFS);
-                       if (md == NULL)
-                               return -ENOMEM;
-                       md->num = 0;
-                       md->group = group;
-
-                       ext4_lock_group(sb, group);
-                       if (db->bb_md_cur == NULL) {
-                               spin_lock(&sbi->s_md_lock);
-                               list_add(&md->list, &sbi->s_active_transaction);
-                               spin_unlock(&sbi->s_md_lock);
-                               /* protect buddy cache from being freed,
-                                * otherwise we'll refresh it from
-                                * on-disk bitmap and lose not-yet-available
-                                * blocks */
-                               page_cache_get(e4b->bd_buddy_page);
-                               page_cache_get(e4b->bd_bitmap_page);
-                               db->bb_md_cur = md;
-                               db->bb_tid = handle->h_transaction->t_tid;
-                               mb_debug("new md 0x%p for group %lu\n",
-                                               md, md->group);
-                       } else {
-                               kfree(md);
-                               md = db->bb_md_cur;
-                       }
+       rb_link_node(new_node, parent, n);
+       rb_insert_color(new_node, &db->bb_free_root);
+
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, node);
+               if (can_merge(entry, new_entry)) {
+                       new_entry->start_blk = entry->start_blk;
+                       new_entry->count += entry->count;
+                       rb_erase(node, &(db->bb_free_root));
+                       spin_lock(&sbi->s_md_lock);
+                       list_del(&entry->list);
+                       spin_unlock(&sbi->s_md_lock);
+                       kmem_cache_free(ext4_free_ext_cachep, entry);
                 }
+       }
  
-               BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
-               md->blocks[md->num] = block + i;
-               md->num++;
-               if (md->num == EXT4_BB_MAX_BLOCKS) {
-                       /* no more space, put full container on a sb's list */
-                       db->bb_md_cur = NULL;
+       node = rb_next(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, node);
+               if (can_merge(new_entry, entry)) {
+                       new_entry->count += entry->count;
+                       rb_erase(node, &(db->bb_free_root));
+                       spin_lock(&sbi->s_md_lock);
+                       list_del(&entry->list);
+                       spin_unlock(&sbi->s_md_lock);
+                       kmem_cache_free(ext4_free_ext_cachep, entry);
                 }
         }
+       /* Add the extent to transaction's private list */
+       spin_lock(&sbi->s_md_lock);
+       list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+       spin_unlock(&sbi->s_md_lock);
         ext4_unlock_group(sb, group);
         return 0;
  }
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
  
         *freed = 0;
  
-       ext4_mb_poll_new_transaction(sb, handle);
-
         sbi = EXT4_SB(sb);
         es = EXT4_SB(sb)->s_es;
         if (block < le32_to_cpu(es->s_first_data_block) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h

index b3b4828f8b894c3cda416e0af366f2ecc890a284..b5dff1fff1e5b33af1c583f026b883fae74eca82 100644 (file)
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,8 @@
  #include <linux/pagemap.h>
  #include <linux/seq_file.h>
  #include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/marker.h>
  #include "ext4_jbd2.h"
  #include "ext4.h"
  #include "group.h"
@@ -98,23 +100,29 @@
  
  static struct kmem_cache *ext4_pspace_cachep;
  static struct kmem_cache *ext4_ac_cachep;
+static struct kmem_cache *ext4_free_ext_cachep;
  
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS     30
+struct ext4_free_data {
+       /* this links the free block information from group_info */
+       struct rb_node node;
  
-struct ext4_free_metadata {
-       ext4_group_t group;
-       unsigned short num;
-       ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+       /* this links the free block information from ext4_sb_info */
         struct list_head list;
+
+       /* group which free block extent belongs */
+       ext4_group_t group;
+
+       /* free block extent */
+       ext4_grpblk_t start_blk;
+       ext4_grpblk_t count;
+
+       /* transaction which freed this extent */
+       tid_t   t_tid;
  };
  
  struct ext4_group_info {
         unsigned long   bb_state;
-       unsigned long   bb_tid;
-       struct ext4_free_metadata *bb_md_cur;
+       struct rb_root  bb_free_root;
         unsigned short  bb_first_free;
         unsigned short  bb_free;
         unsigned short  bb_fragments;
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
  
  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                         ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
  static void ext4_mb_return_to_preallocation(struct inode *inode,
                                         struct ext4_buddy *e4b, sector_t block,
                                         int count);
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
                         struct super_block *, struct ext4_prealloc_space *pa);
  static int ext4_mb_init_per_dev_proc(struct super_block *sb);
  static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
  
  
  static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index dea8f13c2fd98f3457c50faefd84be2c78399adf..9b2b2bc4ec175e2ac43bf43113866e1c5dde38e5 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
          */
  }
  
-int ext4_update_compat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 compat)
-{
-       int err = 0;
-       if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_COMPAT_FEATURE(sb, compat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
-int ext4_update_rocompat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 rocompat)
-{
-       int err = 0;
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
-int ext4_update_incompat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 incompat)
-{
-       int err = 0;
-       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
  /*
   * Open the external journal device
   */
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
  enum {
         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
-       Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+       Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
         Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
         Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
@@ -915,7 +855,7 @@ enum {
         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
         Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
-       Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
         Opt_inode_readahead_blks
  };
  
@@ -933,8 +873,6 @@ static const match_table_t tokens = {
         {Opt_err_panic, "errors=panic"},
         {Opt_err_ro, "errors=remount-ro"},
         {Opt_nouid32, "nouid32"},
-       {Opt_nocheck, "nocheck"},
-       {Opt_nocheck, "check=none"},
         {Opt_debug, "debug"},
         {Opt_oldalloc, "oldalloc"},
         {Opt_orlov, "orlov"},
@@ -973,8 +911,6 @@ static const match_table_t tokens = {
         {Opt_extents, "extents"},
         {Opt_noextents, "noextents"},
         {Opt_i_version, "i_version"},
-       {Opt_mballoc, "mballoc"},
-       {Opt_nomballoc, "nomballoc"},
         {Opt_stripe, "stripe=%u"},
         {Opt_resize, "resize"},
         {Opt_delalloc, "delalloc"},
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
                 case Opt_nouid32:
                         set_opt(sbi->s_mount_opt, NO_UID32);
                         break;
-               case Opt_nocheck:
-                       clear_opt(sbi->s_mount_opt, CHECK);
-                       break;
                 case Opt_debug:
                         set_opt(sbi->s_mount_opt, DEBUG);
                         break;
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
                 if (block_bitmap < first_block || block_bitmap > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                                "Block bitmap for group %lu not in group "
-                              "(block %llu)!", i, block_bitmap);
+                              "(block %llu)!\n", i, block_bitmap);
                         return 0;
                 }
                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                                "Inode bitmap for group %lu not in group "
-                              "(block %llu)!", i, inode_bitmap);
+                              "(block %llu)!\n", i, inode_bitmap);
                         return 0;
                 }
                 inode_table = ext4_inode_table(sb, gdp);
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                                "Inode table for group %lu not in group "
-                              "(block %llu)!", i, inode_table);
+                              "(block %llu)!\n", i, inode_table);
                         return 0;
                 }
                 spin_lock(sb_bgl_lock(sbi, i));
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
   *
   * Note, this does *not* consider any metadata overhead for vfs i_blocks.
   */
-static loff_t ext4_max_size(int blkbits)
+static loff_t ext4_max_size(int blkbits, int has_huge_files)
  {
         loff_t res;
         loff_t upper_limit = MAX_LFS_FILESIZE;
  
         /* small i_blocks in vfs inode? */
-       if (sizeof(blkcnt_t) < sizeof(u64)) {
+       if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                 /*
                  * CONFIG_LSF is not enabled implies the inode
                  * i_block represent total blocks in 512 bytes
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
   * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
   * We need to be 1 filesystem block less than the 2^48 sector limit.
   */
-static loff_t ext4_max_bitmap_size(int bits)
+static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
  {
         loff_t res = EXT4_NDIR_BLOCKS;
         int meta_blocks;
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
          * total number of  512 bytes blocks of the file
          */
  
-       if (sizeof(blkcnt_t) < sizeof(u64)) {
+       if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                 /*
-                * CONFIG_LSF is not enabled implies the inode
-                * i_block represent total blocks in 512 bytes
-                * 32 == size of vfs inode i_blocks * 8
+                * !has_huge_files or CONFIG_LSF is not enabled
+                * implies the inode i_block represent total blocks in
+                * 512 bytes 32 == size of vfs inode i_blocks * 8
                  */
                 upper_limit = (1LL << 32) - 1;
  
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         int blocksize;
         int db_count;
         int i;
-       int needs_recovery;
+       int needs_recovery, has_huge_files;
         __le32 features;
         __u64 blocks_count;
         int err;
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        sb->s_id, le32_to_cpu(features));
                 goto failed_mount;
         }
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+       has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                   EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+       if (has_huge_files) {
                 /*
                  * Large file size enabled file system can only be
                  * mount if kernel is build with CONFIG_LSF
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 }
         }
  
-       sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
-       sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
+       sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
+                                                     has_huge_files);
+       sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
  
         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                         "available.\n");
         }
  
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+                               "requested data journaling mode\n");
+               clear_opt(sbi->s_mount_opt, DELALLOC);
+       } else if (test_opt(sb, DELALLOC))
+               printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+
+       ext4_ext_init(sb);
+       err = ext4_mb_init(sb, needs_recovery);
+       if (err) {
+               printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
+                      err);
+               goto failed_mount4;
+       }
+
         /*
          * akpm: core read_super() calls in here with the superblock locked.
          * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
                "writeback");
  
-       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-               printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
-                               "requested data journaling mode\n");
-               clear_opt(sbi->s_mount_opt, DELALLOC);
-       } else if (test_opt(sb, DELALLOC))
-               printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
-
-       ext4_ext_init(sb);
-       err = ext4_mb_init(sb, needs_recovery);
-       if (err) {
-               printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
-                      err);
-               goto failed_mount4;
-       }
-
         lock_kernel();
         return 0;
  
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index 0abe02c4242aa82d28aeef435a8f459a4ec729f5..8b119e16aa36d9970c881a5c41b0f98ff06962fa 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -995,6 +995,9 @@ restart_loop:
         }
         spin_unlock(&journal->j_list_lock);
  
+       if (journal->j_commit_callback)
+               journal->j_commit_callback(journal, commit_transaction);
+
         trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
                    journal->j_devname, commit_transaction->t_tid,
                    journal->j_tail_sequence);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index e5d540588fa9609296f446c232278202bd1f2d6b..39b7805a599a79fd80c0150073cbf0f473fe8bcf 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
         transaction->t_expires = jiffies + journal->j_commit_interval;
         spin_lock_init(&transaction->t_handle_lock);
         INIT_LIST_HEAD(&transaction->t_inode_list);
+       INIT_LIST_HEAD(&transaction->t_private_list);
  
         /* Set up the commit timer for the new transaction. */
         journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 463d6f10b64f7ffaea2f4fc13e57bc2ca4a95c8e..c7d106ef22e2f535f2c78f470a300b5fb7e8b930 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -641,6 +641,11 @@ struct transaction_s
          */
         int t_handle_count;
  
+       /*
+        * For use by the filesystem to store fs-specific data
+        * structures associated with the transaction
+        */
+       struct list_head        t_private_list;
  };
  
  struct transaction_run_stats_s {
@@ -935,6 +940,10 @@ struct journal_s
  
         pid_t                   j_last_sync_writer;
  
+       /* This function is called when a transaction is closed */
+       void                    (*j_commit_callback)(journal_t *,
+                                                    transaction_t *);
+
         /*
          * Journal statistics
          */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 12b15c561a1f1ca3dcfe91c9f64e439857fc6bd6..e585657e9831afe4c5d36e5f52fabd1246b03ab1 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,7 +63,15 @@ struct writeback_control {
         unsigned for_writepages:1;      /* This is a writepages() call */
         unsigned range_cyclic:1;        /* range_start is cyclic */
         unsigned more_io:1;             /* more io to be dispatched */
-       unsigned range_cont:1;
+       /*
+        * write_cache_pages() won't update wbc->nr_to_write and
+        * mapping->writeback_index if no_nrwrite_index_update
+        * is set.  write_cache_pages() may write more than we
+        * requested and we want to make sure nr_to_write and
+        * writeback_index are updated in a consistent manner
+        * so we use a single control to update them
+        */
+       unsigned no_nrwrite_index_update:1;
  };
  
  /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index c130a137c12938cb0d7baa3ac6a9b8f333beb53c..b40f6d5f8fe9bc24750fb829742ef655f0f4c42c 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
         pgoff_t end;            /* Inclusive */
         int scanned = 0;
         int range_whole = 0;
+       long nr_to_write = wbc->nr_to_write;
  
         if (wbc->nonblocking && bdi_write_congested(bdi)) {
                 wbc->encountered_congestion = 1;
@@ -939,7 +940,7 @@ retry:
                                 unlock_page(page);
                                 ret = 0;
                         }
-                       if (ret || (--(wbc->nr_to_write) <= 0))
+                       if (ret || (--nr_to_write <= 0))
                                 done = 1;
                         if (wbc->nonblocking && bdi_write_congested(bdi)) {
                                 wbc->encountered_congestion = 1;
@@ -958,11 +959,12 @@ retry:
                 index = 0;
                 goto retry;
         }
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = index;
+       if (!wbc->no_nrwrite_index_update) {
+               if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
+                       mapping->writeback_index = index;
+               wbc->nr_to_write = nr_to_write;
+       }
  
-       if (wbc->range_cont)
-               wbc->range_start = index << PAGE_CACHE_SHIFT;
         return ret;
  }
  EXPORT_SYMBOL(write_cache_pages);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
Documentation/filesystems/ext4.txt		patch \| blob \| history
fs/Kconfig		patch \| blob \| history
fs/Makefile		patch \| blob \| history
fs/ext4/balloc.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ext4_sb.h		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/mballoc.h		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history
include/linux/writeback.h		patch \| blob \| history
mm/page-writeback.c		patch \| blob \| history