]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 22:08:47 +0000 (15:08 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6: (95 commits)
  V4L/DVB (9296): Patch to remove warning message during cx88-dvb compilation
  V4L/DVB (9294): gspca: Add a stop sequence in t613.
  V4L/DVB (9293): gspca: Separate and fix the sensor dependant sequences in t613.
  V4L/DVB (9292): gspca: Call the control setting functions at init time in t613.
  V4L/DVB (9291): gspca: Do not set the white balance temperature by default in t613.
  V4L/DVB (9290): gspca: Adjust the sensor init sequences in t613.
  V4L/DVB (9289): gspca: Other sensor identified as om6802 in t613.
  V4L/DVB (9288): gspca: Write to the USB device and not USB interface in t613.
  V4L/DVB (9287): gspca: Change the name of the multi bytes write function in t613.
  V4L/DVB (9286): gspca: Compilation problem of gspca.c and the kernel version.
  V4L/DVB (9283): Correct typo and enable setting the gain on the mt9m111 sensor
  V4L/DVB (9282): Properly iterate the urbs when destroying them.
  V4L/DVB (9281): gspca: Add hflip and vflip to the po1030 sensor
  V4L/DVB (9280): gspca: Use the gspca debug macros
  V4L/DVB (9279): gspca: Correct some copyright headers
  V4L/DVB (9278): gspca: Remove the m5602_debug variable
  V4L/DVB (9277): gspca: propagate an error in m5602_start_transfer()
  V4L/DVB (9276): videobuf-dvb: two functions are now static
  V4L/DVB (9275): dvb: input data pointer of cx24116_writeregN() should be const
  V4L/DVB (9274): Remove spurious messages and turn into debug.
  ...

15 files changed:
Documentation/filesystems/ext4.txt
fs/Kconfig
fs/Makefile
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/ext4_sb.h
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/super.c
fs/jbd2/commit.c
fs/jbd2/transaction.c
include/linux/jbd2.h
include/linux/writeback.h
mm/page-writeback.c

index eb154ef36c2a4b708004ff08660f2a7ebe971fa7..174eaff7ded9f1e7f9ef71882b446d87cea8c359 100644 (file)
@@ -2,19 +2,24 @@
 Ext4 Filesystem
 ===============
 
-This is a development version of the ext4 filesystem, an advanced level
-of the ext3 filesystem which incorporates scalability and reliability
-enhancements for supporting large filesystems (64 bit) in keeping with
-increasing disk capacities and state-of-the-art feature requirements.
+Ext4 is an an advanced level of the ext3 filesystem which incorporates
+scalability and reliability enhancements for supporting large filesystems
+(64 bit) in keeping with increasing disk capacities and state-of-the-art
+feature requirements.
 
-Mailing list: linux-ext4@vger.kernel.org
+Mailing list:  linux-ext4@vger.kernel.org
+Web site:      http://ext4.wiki.kernel.org
 
 
 1. Quick usage instructions:
 ===========================
 
+Note: More extensive information for getting started with ext4 can be
+      found at the ext4 wiki site at the URL:
+      http://ext4.wiki.kernel.org/index.php/Ext4_Howto
+
   - Compile and install the latest version of e2fsprogs (as of this
-    writing version 1.41) from:
+    writing version 1.41.3) from:
 
     http://sourceforge.net/project/showfiles.php?group_id=2406
        
@@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org
 
        # mke2fs -t ext4 /dev/hda1
 
-    Or configure an existing ext3 filesystem to support extents and set
-    the test_fs flag to indicate that it's ok for an in-development
-    filesystem to touch this filesystem:
+    Or to configure an existing ext3 filesystem to support extents: 
 
-       # tune2fs -O extents -E test_fs /dev/hda1
+       # tune2fs -O extents /dev/hda1
 
     If the filesystem was created with 128 byte inodes, it can be
     converted to use 256 byte for greater efficiency via:
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
 The big performance win will come with mballoc, delalloc and flex_bg
 grouping of bitmaps and inode tables.  Some test results available here:
 
- - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
- - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
 
 3. Options
 ==========
@@ -214,9 +217,6 @@ noreservation
 bsddf          (*)     Make 'df' act like BSD.
 minixdf                        Make 'df' act like Minix.
 
-check=none             Don't do extra checking of bitmaps on mount.
-nocheck
-
 debug                  Extra debugging information is sent to syslog.
 
 errors=remount-ro(*)   Remount the filesystem read-only on an error.
@@ -253,8 +253,6 @@ nobh                        (a) cache disk block mapping information
                        "nobh" option tries to avoid associating buffer
                        heads (supported only for "writeback" mode).
 
-mballoc                (*)     Use the multiple block allocator for block allocation
-nomballoc              disabled multiple block allocator for block allocation.
 stripe=n               Number of filesystem blocks that mballoc will try
                        to use for allocation size and alignment. For RAID5/6
                        systems this should be the number of data
index 9e9d70c02a07c9a8876f71c3445ac5092c563810..d0a1174fb516c14f63862e42919d74c044df7882 100644 (file)
@@ -160,7 +160,7 @@ config EXT4_FS
          filesystem initially.
 
          To compile this file system support as a module, choose M here. The
-         module will be called ext4dev.
+         module will be called ext4.
 
          If unsure, say N.
 
index d0c69f57e5bfdebd65a668443428c44373360ab0..2168c902d5ca61027236bb75812591d16575ec1d 100644 (file)
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM)             += dlm/
 # Do not add any filesystems before this line
 obj-$(CONFIG_REISERFS_FS)      += reiserfs/
 obj-$(CONFIG_EXT3_FS)          += ext3/ # Before ext2 so root fs can be ext3
-obj-$(CONFIG_EXT4_FS)          += ext4/ # Before ext2 so root fs can be ext4dev
+obj-$(CONFIG_EXT4_FS)          += ext4/ # Before ext2 so root fs can be ext4
 obj-$(CONFIG_JBD)              += jbd/
 obj-$(CONFIG_JBD2)             += jbd2/
 obj-$(CONFIG_EXT2_FS)          += ext2/
index bd2ece22882755b02599563a1bee89c5b10d2b87..b9821be709bddb000077b9a4b15efe34e9392bd0 100644 (file)
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 
        /* this isn't the right place to decide whether block is metadata
         * inode.c/extents.c knows better, but for safety ... */
-       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
-                       ext4_should_journal_data(inode))
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+               metadata = 1;
+
+       /* We need to make sure we don't reuse
+        * block released untill the transaction commit.
+        * writeback mode have weak data consistency so
+        * don't force data as metadata when freeing block
+        * for writeback mode.
+        */
+       if (metadata == 0 && !ext4_should_writeback_data(inode))
                metadata = 1;
 
        sb = inode->i_sb;
index 6690a41cdd9fc8ca6536d3a6077222e65830a834..4880cc3e672778d54944326d265fa9b50b8d7275 100644 (file)
@@ -511,7 +511,6 @@ do {                                                                               \
 /*
  * Mount flags
  */
-#define EXT4_MOUNT_CHECK               0x00001 /* Do mount-time checks */
 #define EXT4_MOUNT_OLDALLOC            0x00002  /* Don't use the new Orlov allocator */
 #define EXT4_MOUNT_GRPID               0x00004 /* Create files with directory's group */
 #define EXT4_MOUNT_DEBUG               0x00008 /* Some debugging messages */
index 6a0b40d43264b232a806f0e265150343c6375077..445fde603df800cb4a1bf8144f34eb9aeb17b52a 100644 (file)
@@ -99,9 +99,6 @@ struct ext4_sb_info {
        struct inode *s_buddy_cache;
        long s_blocks_reserved;
        spinlock_t s_reserve_lock;
-       struct list_head s_active_transaction;
-       struct list_head s_closed_transaction;
-       struct list_head s_committed_transaction;
        spinlock_t s_md_lock;
        tid_t s_last_transaction;
        unsigned short *s_mb_offsets, *s_mb_maxs;
index 9b4ec9decfd1b6020c13bbd57d86af7eab46006b..8dbf6953845ba61097703006553c34d1d83bd81f 100644 (file)
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
        int ret = 0, err, nr_pages, i;
        unsigned long index, end;
        struct pagevec pvec;
+       long pages_skipped;
 
        BUG_ON(mpd->next_page <= mpd->first_page);
        pagevec_init(&pvec, 0);
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
        end = mpd->next_page - 1;
 
        while (index <= end) {
-               /* XXX: optimize tail */
-               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+               /*
+                * We can use PAGECACHE_TAG_DIRTY lookup here because
+                * even though we have cleared the dirty flag on the page
+                * We still keep the page in the radix tree with tag
+                * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
+                * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
+                * which is called via the below writepage callback.
+                */
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                       PAGECACHE_TAG_DIRTY,
+                                       min(end - index,
+                                       (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
 
-                       index = page->index;
-                       if (index > end)
-                               break;
-                       index++;
-
+                       pages_skipped = mpd->wbc->pages_skipped;
                        err = mapping->a_ops->writepage(page, mpd->wbc);
-                       if (!err)
+                       if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+                               /*
+                                * have successfully written the page
+                                * without skipping the same
+                                */
                                mpd->pages_written++;
                        /*
                         * In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc,
                               struct mpage_da_data *mpd)
 {
-       long to_write;
        int ret;
 
        if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
        mpd->pages_written = 0;
        mpd->retval = 0;
 
-       to_write = wbc->nr_to_write;
-
        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-
        /*
         * Handle last extent of pages
         */
        if (!mpd->io_done && mpd->next_page != mpd->first_page) {
                if (mpage_da_map_blocks(mpd) == 0)
                        mpage_da_submit_io(mpd);
-       }
 
-       wbc->nr_to_write = to_write - mpd->pages_written;
+               mpd->io_done = 1;
+               ret = MPAGE_DA_EXTENT_TAIL;
+       }
+       wbc->nr_to_write -= mpd->pages_written;
        return ret;
 }
 
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 static int ext4_da_writepages(struct address_space *mapping,
                              struct writeback_control *wbc)
 {
+       pgoff_t index;
+       int range_whole = 0;
        handle_t *handle = NULL;
-       loff_t range_start = 0;
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
+       int no_nrwrite_index_update;
+       long pages_written = 0, pages_skipped;
        int needed_blocks, ret = 0, nr_to_writebump = 0;
-       long to_write, pages_skipped = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
        /*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
                nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
                wbc->nr_to_write = sbi->s_mb_stream_request;
        }
+       if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+               range_whole = 1;
 
-       if (!wbc->range_cyclic)
-               /*
-                * If range_cyclic is not set force range_cont
-                * and save the old writeback_index
-                */
-               wbc->range_cont = 1;
-
-       range_start =  wbc->range_start;
-       pages_skipped = wbc->pages_skipped;
+       if (wbc->range_cyclic)
+               index = mapping->writeback_index;
+       else
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
 
        mpd.wbc = wbc;
        mpd.inode = mapping->host;
 
-restart_loop:
-       to_write = wbc->nr_to_write;
-       while (!ret && to_write > 0) {
+       /*
+        * we don't want write_cache_pages to update
+        * nr_to_write and writeback_index
+        */
+       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+       wbc->no_nrwrite_index_update = 1;
+       pages_skipped = wbc->pages_skipped;
+
+       while (!ret && wbc->nr_to_write > 0) {
 
                /*
                 * we  insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ restart_loop:
                        dump_stack();
                        goto out_writepages;
                }
-               to_write -= wbc->nr_to_write;
-
                mpd.get_block = ext4_da_get_block_write;
                ret = mpage_da_writepages(mapping, wbc, &mpd);
 
                ext4_journal_stop(handle);
 
-               if (mpd.retval == -ENOSPC)
+               if (mpd.retval == -ENOSPC) {
+                       /* commit the transaction which would
+                        * free blocks released in the transaction
+                        * and try again
+                        */
                        jbd2_journal_force_commit_nested(sbi->s_journal);
-
-               /* reset the retry count */
-               if (ret == MPAGE_DA_EXTENT_TAIL) {
+                       wbc->pages_skipped = pages_skipped;
+                       ret = 0;
+               } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
                         * got one extent now try with
                         * rest of the pages
                         */
-                       to_write += wbc->nr_to_write;
+                       pages_written += mpd.pages_written;
+                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
-               } else if (wbc->nr_to_write) {
+               } else if (wbc->nr_to_write)
                        /*
                         * There is no more writeout needed
                         * or we requested for a noblocking writeout
                         * and we found the device congested
                         */
-                       to_write += wbc->nr_to_write;
                        break;
-               }
-               wbc->nr_to_write = to_write;
-       }
-
-       if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
-               /* We skipped pages in this loop */
-               wbc->range_start = range_start;
-               wbc->nr_to_write = to_write +
-                               wbc->pages_skipped - pages_skipped;
-               wbc->pages_skipped = pages_skipped;
-               goto restart_loop;
        }
+       if (pages_skipped != wbc->pages_skipped)
+               printk(KERN_EMERG "This should not happen leaving %s "
+                               "with nr_to_write = %ld ret = %d\n",
+                               __func__, wbc->nr_to_write, ret);
+
+       /* Update index */
+       index += pages_written;
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               /*
+                * set the writeback_index so that range_cyclic
+                * mode will write it back later
+                */
+               mapping->writeback_index = index;
 
 out_writepages:
-       wbc->nr_to_write = to_write - nr_to_writebump;
-       wbc->range_start = range_start;
+       if (!no_nrwrite_index_update)
+               wbc->no_nrwrite_index_update = 0;
+       wbc->nr_to_write -= nr_to_writebump;
        return ret;
 }
 
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
        struct inode *inode = &(ei->vfs_inode);
        u64 i_blocks = inode->i_blocks;
        struct super_block *sb = inode->i_sb;
-       int err = 0;
 
        if (i_blocks <= ~0U) {
                /*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                raw_inode->i_blocks_high = 0;
                ei->i_flags &= ~EXT4_HUGE_FILE_FL;
-       } else if (i_blocks <= 0xffffffffffffULL) {
+               return 0;
+       }
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
+               return -EFBIG;
+
+       if (i_blocks <= 0xffffffffffffULL) {
                /*
                 * i_blocks can be represented in a 48 bit variable
                 * as multiple of 512 bytes
                 */
-               err = ext4_update_rocompat_feature(handle, sb,
-                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
-               if (err)
-                       goto  err_out;
-               /* i_block is stored in the split  48 bit fields */
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
                ei->i_flags &= ~EXT4_HUGE_FILE_FL;
        } else {
-               /*
-                * i_blocks should be represented in a 48 bit variable
-                * as multiple of  file system block size
-                */
-               err = ext4_update_rocompat_feature(handle, sb,
-                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
-               if (err)
-                       goto  err_out;
                ei->i_flags |= EXT4_HUGE_FILE_FL;
                /* i_block is stored in file system block size */
                i_blocks = i_blocks >> (inode->i_blkbits - 9);
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
        }
-err_out:
-       return err;
+       return 0;
 }
 
 /*
index b580714f0d859c107e94a3f6c61286141fb0cdab..dfe17a1340523c9c10d3c1f14a1077b4fd6939c9 100644 (file)
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        }
 
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+       meta_group_info[i]->bb_free_root.rb_node = NULL;;
 
 #ifdef DOUBLE_CHECK
        {
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        }
 
        spin_lock_init(&sbi->s_md_lock);
-       INIT_LIST_HEAD(&sbi->s_active_transaction);
-       INIT_LIST_HEAD(&sbi->s_closed_transaction);
-       INIT_LIST_HEAD(&sbi->s_committed_transaction);
        spin_lock_init(&sbi->s_bal_lock);
 
        sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        ext4_mb_init_per_dev_proc(sb);
        ext4_mb_history_init(sb);
 
+       sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+
        printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
        return 0;
 }
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
                pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
                list_del(&pa->pa_group_list);
                count++;
-               kfree(pa);
+               kmem_cache_free(ext4_pspace_cachep, pa);
        }
        if (count)
                mb_debug("mballoc: %u PAs left\n", count);
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
        struct ext4_group_info *grinfo;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       /* release freed, non-committed blocks */
-       spin_lock(&sbi->s_md_lock);
-       list_splice_init(&sbi->s_closed_transaction,
-                       &sbi->s_committed_transaction);
-       list_splice_init(&sbi->s_active_transaction,
-                       &sbi->s_committed_transaction);
-       spin_unlock(&sbi->s_md_lock);
-       ext4_mb_free_committed_blocks(sb);
-
        if (sbi->s_group_info) {
                for (i = 0; i < sbi->s_groups_count; i++) {
                        grinfo = ext4_get_group_info(sb, i);
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
 
-static noinline_for_stack void
-ext4_mb_free_committed_blocks(struct super_block *sb)
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       int err;
-       int i;
-       int count = 0;
-       int count2 = 0;
-       struct ext4_free_metadata *md;
+       struct super_block *sb = journal->j_private;
        struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+       struct ext4_free_data *entry;
+       ext4_fsblk_t discard_block;
+       struct list_head *l, *ltmp;
 
-       if (list_empty(&sbi->s_committed_transaction))
-               return;
-
-       /* there is committed blocks to be freed yet */
-       do {
-               /* get next array of blocks */
-               md = NULL;
-               spin_lock(&sbi->s_md_lock);
-               if (!list_empty(&sbi->s_committed_transaction)) {
-                       md = list_entry(sbi->s_committed_transaction.next,
-                                       struct ext4_free_metadata, list);
-                       list_del(&md->list);
-               }
-               spin_unlock(&sbi->s_md_lock);
-
-               if (md == NULL)
-                       break;
+       list_for_each_safe(l, ltmp, &txn->t_private_list) {
+               entry = list_entry(l, struct ext4_free_data, list);
 
                mb_debug("gonna free %u blocks in group %lu (0x%p):",
-                               md->num, md->group, md);
+                        entry->count, entry->group, entry);
 
-               err = ext4_mb_load_buddy(sb, md->group, &e4b);
+               err = ext4_mb_load_buddy(sb, entry->group, &e4b);
                /* we expect to find existing buddy because it's pinned */
                BUG_ON(err != 0);
 
+               db = e4b.bd_info;
                /* there are blocks to put in buddy to make them really free */
-               count += md->num;
+               count += entry->count;
                count2++;
-               ext4_lock_group(sb, md->group);
-               for (i = 0; i < md->num; i++) {
-                       mb_debug(" %u", md->blocks[i]);
-                       mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
+               ext4_lock_group(sb, entry->group);
+               /* Take it out of per group rb tree */
+               rb_erase(&entry->node, &(db->bb_free_root));
+               mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+
+               if (!db->bb_free_root.rb_node) {
+                       /* No more items in the per group rb tree
+                        * balance refcounts from ext4_mb_free_metadata()
+                        */
+                       page_cache_release(e4b.bd_buddy_page);
+                       page_cache_release(e4b.bd_bitmap_page);
                }
-               mb_debug("\n");
-               ext4_unlock_group(sb, md->group);
-
-               /* balance refcounts from ext4_mb_free_metadata() */
-               page_cache_release(e4b.bd_buddy_page);
-               page_cache_release(e4b.bd_bitmap_page);
-
-               kfree(md);
+               ext4_unlock_group(sb, entry->group);
+               discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+                       + entry->start_blk
+                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+               trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
+                          (unsigned long long) discard_block, entry->count);
+               sb_issue_discard(sb, discard_block, entry->count);
+
+               kmem_cache_free(ext4_free_ext_cachep, entry);
                ext4_mb_release_desc(&e4b);
-
-       } while (md);
+       }
 
        mb_debug("freed %u blocks in %u structures\n", count, count2);
 }
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
 
 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
 {
+#ifdef CONFIG_PROC_FS
        mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct proc_dir_entry *proc;
@@ -2735,10 +2723,14 @@ err_out:
        remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
        remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
        return -ENOMEM;
+#else
+       return 0;
+#endif
 }
 
 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
 {
+#ifdef CONFIG_PROC_FS
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
        if (sbi->s_proc == NULL)
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
        remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
        remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
        remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
-
+#endif
        return 0;
 }
 
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
                kmem_cache_destroy(ext4_pspace_cachep);
                return -ENOMEM;
        }
+
+       ext4_free_ext_cachep =
+               kmem_cache_create("ext4_free_block_extents",
+                                    sizeof(struct ext4_free_data),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       if (ext4_free_ext_cachep == NULL) {
+               kmem_cache_destroy(ext4_pspace_cachep);
+               kmem_cache_destroy(ext4_ac_cachep);
+               return -ENOMEM;
+       }
        return 0;
 }
 
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
        /* XXX: synchronize_rcu(); */
        kmem_cache_destroy(ext4_pspace_cachep);
        kmem_cache_destroy(ext4_ac_cachep);
+       kmem_cache_destroy(ext4_free_ext_cachep);
 }
 
 
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                goto out1;
        }
 
-       ext4_mb_poll_new_transaction(sb, handle);
-
        *errp = ext4_mb_initialize_context(ac, ar);
        if (*errp) {
                ar->len = 0;
@@ -4384,35 +4385,20 @@ out1:
 
        return block;
 }
-static void ext4_mb_poll_new_transaction(struct super_block *sb,
-                                               handle_t *handle)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-       if (sbi->s_last_transaction == handle->h_transaction->t_tid)
-               return;
-
-       /* new transaction! time to close last one and free blocks for
-        * committed transaction. we know that only transaction can be
-        * active, so previos transaction can be being logged and we
-        * know that transaction before previous is known to be already
-        * logged. this means that now we may free blocks freed in all
-        * transactions before previous one. hope I'm clear enough ... */
 
-       spin_lock(&sbi->s_md_lock);
-       if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
-               mb_debug("new transaction %lu, old %lu\n",
-                               (unsigned long) handle->h_transaction->t_tid,
-                               (unsigned long) sbi->s_last_transaction);
-               list_splice_init(&sbi->s_closed_transaction,
-                               &sbi->s_committed_transaction);
-               list_splice_init(&sbi->s_active_transaction,
-                               &sbi->s_closed_transaction);
-               sbi->s_last_transaction = handle->h_transaction->t_tid;
-       }
-       spin_unlock(&sbi->s_md_lock);
-
-       ext4_mb_free_committed_blocks(sb);
+/*
+ * We can merge two free data extents only if the physical blocks
+ * are contiguous, AND the extents were freed by the same transaction,
+ * AND the blocks are associated with the same group.
+ */
+static int can_merge(struct ext4_free_data *entry1,
+                       struct ext4_free_data *entry2)
+{
+       if ((entry1->t_tid == entry2->t_tid) &&
+           (entry1->group == entry2->group) &&
+           ((entry1->start_blk + entry1->count) == entry2->start_blk))
+               return 1;
+       return 0;
 }
 
 static noinline_for_stack int
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        struct ext4_group_info *db = e4b->bd_info;
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_free_metadata *md;
-       int i;
+       struct ext4_free_data *entry, *new_entry;
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+
 
        BUG_ON(e4b->bd_bitmap_page == NULL);
        BUG_ON(e4b->bd_buddy_page == NULL);
 
+       new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+       new_entry->start_blk = block;
+       new_entry->group  = group;
+       new_entry->count = count;
+       new_entry->t_tid = handle->h_transaction->t_tid;
+       new_node = &new_entry->node;
+
        ext4_lock_group(sb, group);
-       for (i = 0; i < count; i++) {
-               md = db->bb_md_cur;
-               if (md && db->bb_tid != handle->h_transaction->t_tid) {
-                       db->bb_md_cur = NULL;
-                       md = NULL;
+       if (!*n) {
+               /* first free block exent. We need to
+                  protect buddy cache from being freed,
+                * otherwise we'll refresh it from
+                * on-disk bitmap and lose not-yet-available
+                * blocks */
+               page_cache_get(e4b->bd_buddy_page);
+               page_cache_get(e4b->bd_bitmap_page);
+       }
+       while (*n) {
+               parent = *n;
+               entry = rb_entry(parent, struct ext4_free_data, node);
+               if (block < entry->start_blk)
+                       n = &(*n)->rb_left;
+               else if (block >= (entry->start_blk + entry->count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_error(sb, __func__,
+                           "Double free of blocks %d (%d %d)\n",
+                           block, entry->start_blk, entry->count);
+                       return 0;
                }
+       }
 
-               if (md == NULL) {
-                       ext4_unlock_group(sb, group);
-                       md = kmalloc(sizeof(*md), GFP_NOFS);
-                       if (md == NULL)
-                               return -ENOMEM;
-                       md->num = 0;
-                       md->group = group;
-
-                       ext4_lock_group(sb, group);
-                       if (db->bb_md_cur == NULL) {
-                               spin_lock(&sbi->s_md_lock);
-                               list_add(&md->list, &sbi->s_active_transaction);
-                               spin_unlock(&sbi->s_md_lock);
-                               /* protect buddy cache from being freed,
-                                * otherwise we'll refresh it from
-                                * on-disk bitmap and lose not-yet-available
-                                * blocks */
-                               page_cache_get(e4b->bd_buddy_page);
-                               page_cache_get(e4b->bd_bitmap_page);
-                               db->bb_md_cur = md;
-                               db->bb_tid = handle->h_transaction->t_tid;
-                               mb_debug("new md 0x%p for group %lu\n",
-                                               md, md->group);
-                       } else {
-                               kfree(md);
-                               md = db->bb_md_cur;
-                       }
+       rb_link_node(new_node, parent, n);
+       rb_insert_color(new_node, &db->bb_free_root);
+
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, node);
+               if (can_merge(entry, new_entry)) {
+                       new_entry->start_blk = entry->start_blk;
+                       new_entry->count += entry->count;
+                       rb_erase(node, &(db->bb_free_root));
+                       spin_lock(&sbi->s_md_lock);
+                       list_del(&entry->list);
+                       spin_unlock(&sbi->s_md_lock);
+                       kmem_cache_free(ext4_free_ext_cachep, entry);
                }
+       }
 
-               BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
-               md->blocks[md->num] = block + i;
-               md->num++;
-               if (md->num == EXT4_BB_MAX_BLOCKS) {
-                       /* no more space, put full container on a sb's list */
-                       db->bb_md_cur = NULL;
+       node = rb_next(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, node);
+               if (can_merge(new_entry, entry)) {
+                       new_entry->count += entry->count;
+                       rb_erase(node, &(db->bb_free_root));
+                       spin_lock(&sbi->s_md_lock);
+                       list_del(&entry->list);
+                       spin_unlock(&sbi->s_md_lock);
+                       kmem_cache_free(ext4_free_ext_cachep, entry);
                }
        }
+       /* Add the extent to transaction's private list */
+       spin_lock(&sbi->s_md_lock);
+       list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+       spin_unlock(&sbi->s_md_lock);
        ext4_unlock_group(sb, group);
        return 0;
 }
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 
        *freed = 0;
 
-       ext4_mb_poll_new_transaction(sb, handle);
-
        sbi = EXT4_SB(sb);
        es = EXT4_SB(sb)->s_es;
        if (block < le32_to_cpu(es->s_first_data_block) ||
index b3b4828f8b894c3cda416e0af366f2ecc890a284..b5dff1fff1e5b33af1c583f026b883fae74eca82 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/pagemap.h>
 #include <linux/seq_file.h>
 #include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/marker.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "group.h"
 
 static struct kmem_cache *ext4_pspace_cachep;
 static struct kmem_cache *ext4_ac_cachep;
+static struct kmem_cache *ext4_free_ext_cachep;
 
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS     30
+struct ext4_free_data {
+       /* this links the free block information from group_info */
+       struct rb_node node;
 
-struct ext4_free_metadata {
-       ext4_group_t group;
-       unsigned short num;
-       ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+       /* this links the free block information from ext4_sb_info */
        struct list_head list;
+
+       /* group which free block extent belongs */
+       ext4_group_t group;
+
+       /* free block extent */
+       ext4_grpblk_t start_blk;
+       ext4_grpblk_t count;
+
+       /* transaction which freed this extent */
+       tid_t   t_tid;
 };
 
 struct ext4_group_info {
        unsigned long   bb_state;
-       unsigned long   bb_tid;
-       struct ext4_free_metadata *bb_md_cur;
+       struct rb_root  bb_free_root;
        unsigned short  bb_first_free;
        unsigned short  bb_free;
        unsigned short  bb_fragments;
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
 
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
 static void ext4_mb_return_to_preallocation(struct inode *inode,
                                        struct ext4_buddy *e4b, sector_t block,
                                        int count);
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
                        struct super_block *, struct ext4_prealloc_space *pa);
 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
 
 
 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
index dea8f13c2fd98f3457c50faefd84be2c78399adf..9b2b2bc4ec175e2ac43bf43113866e1c5dde38e5 100644 (file)
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
         */
 }
 
-int ext4_update_compat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 compat)
-{
-       int err = 0;
-       if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_COMPAT_FEATURE(sb, compat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
-int ext4_update_rocompat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 rocompat)
-{
-       int err = 0;
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
-int ext4_update_incompat_feature(handle_t *handle,
-                                       struct super_block *sb, __u32 incompat)
-{
-       int err = 0;
-       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
-               err = ext4_journal_get_write_access(handle,
-                               EXT4_SB(sb)->s_sbh);
-               if (err)
-                       return err;
-               EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
-               sb->s_dirt = 1;
-               handle->h_sync = 1;
-               BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
-                                       "call ext4_journal_dirty_met adata");
-               err = ext4_journal_dirty_metadata(handle,
-                               EXT4_SB(sb)->s_sbh);
-       }
-       return err;
-}
-
 /*
  * Open the external journal device
  */
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
 enum {
        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
-       Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+       Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
        Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
@@ -915,7 +855,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
        Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
-       Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
        Opt_inode_readahead_blks
 };
 
@@ -933,8 +873,6 @@ static const match_table_t tokens = {
        {Opt_err_panic, "errors=panic"},
        {Opt_err_ro, "errors=remount-ro"},
        {Opt_nouid32, "nouid32"},
-       {Opt_nocheck, "nocheck"},
-       {Opt_nocheck, "check=none"},
        {Opt_debug, "debug"},
        {Opt_oldalloc, "oldalloc"},
        {Opt_orlov, "orlov"},
@@ -973,8 +911,6 @@ static const match_table_t tokens = {
        {Opt_extents, "extents"},
        {Opt_noextents, "noextents"},
        {Opt_i_version, "i_version"},
-       {Opt_mballoc, "mballoc"},
-       {Opt_nomballoc, "nomballoc"},
        {Opt_stripe, "stripe=%u"},
        {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
                case Opt_nouid32:
                        set_opt(sbi->s_mount_opt, NO_UID32);
                        break;
-               case Opt_nocheck:
-                       clear_opt(sbi->s_mount_opt, CHECK);
-                       break;
                case Opt_debug:
                        set_opt(sbi->s_mount_opt, DEBUG);
                        break;
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Block bitmap for group %lu not in group "
-                              "(block %llu)!", i, block_bitmap);
+                              "(block %llu)!\n", i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Inode bitmap for group %lu not in group "
-                              "(block %llu)!", i, inode_bitmap);
+                              "(block %llu)!\n", i, inode_bitmap);
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Inode table for group %lu not in group "
-                              "(block %llu)!", i, inode_table);
+                              "(block %llu)!\n", i, inode_table);
                        return 0;
                }
                spin_lock(sb_bgl_lock(sbi, i));
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
  *
  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
  */
-static loff_t ext4_max_size(int blkbits)
+static loff_t ext4_max_size(int blkbits, int has_huge_files)
 {
        loff_t res;
        loff_t upper_limit = MAX_LFS_FILESIZE;
 
        /* small i_blocks in vfs inode? */
-       if (sizeof(blkcnt_t) < sizeof(u64)) {
+       if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                /*
                 * CONFIG_LSF is not enabled implies the inode
                 * i_block represent total blocks in 512 bytes
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
  * We need to be 1 filesystem block less than the 2^48 sector limit.
  */
-static loff_t ext4_max_bitmap_size(int bits)
+static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 {
        loff_t res = EXT4_NDIR_BLOCKS;
        int meta_blocks;
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
         * total number of  512 bytes blocks of the file
         */
 
-       if (sizeof(blkcnt_t) < sizeof(u64)) {
+       if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                /*
-                * CONFIG_LSF is not enabled implies the inode
-                * i_block represent total blocks in 512 bytes
-                * 32 == size of vfs inode i_blocks * 8
+                * !has_huge_files or CONFIG_LSF is not enabled
+                * implies the inode i_block represent total blocks in
+                * 512 bytes 32 == size of vfs inode i_blocks * 8
                 */
                upper_limit = (1LL << 32) - 1;
 
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        int blocksize;
        int db_count;
        int i;
-       int needs_recovery;
+       int needs_recovery, has_huge_files;
        __le32 features;
        __u64 blocks_count;
        int err;
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                       sb->s_id, le32_to_cpu(features));
                goto failed_mount;
        }
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+       has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                   EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+       if (has_huge_files) {
                /*
                 * Large file size enabled file system can only be
                 * mount if kernel is build with CONFIG_LSF
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                }
        }
 
-       sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
-       sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
+       sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
+                                                     has_huge_files);
+       sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
 
        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        "available.\n");
        }
 
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+                               "requested data journaling mode\n");
+               clear_opt(sbi->s_mount_opt, DELALLOC);
+       } else if (test_opt(sb, DELALLOC))
+               printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+
+       ext4_ext_init(sb);
+       err = ext4_mb_init(sb, needs_recovery);
+       if (err) {
+               printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
+                      err);
+               goto failed_mount4;
+       }
+
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
               test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
               "writeback");
 
-       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-               printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
-                               "requested data journaling mode\n");
-               clear_opt(sbi->s_mount_opt, DELALLOC);
-       } else if (test_opt(sb, DELALLOC))
-               printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
-
-       ext4_ext_init(sb);
-       err = ext4_mb_init(sb, needs_recovery);
-       if (err) {
-               printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
-                      err);
-               goto failed_mount4;
-       }
-
        lock_kernel();
        return 0;
 
index 0abe02c4242aa82d28aeef435a8f459a4ec729f5..8b119e16aa36d9970c881a5c41b0f98ff06962fa 100644 (file)
@@ -995,6 +995,9 @@ restart_loop:
        }
        spin_unlock(&journal->j_list_lock);
 
+       if (journal->j_commit_callback)
+               journal->j_commit_callback(journal, commit_transaction);
+
        trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
                   journal->j_devname, commit_transaction->t_tid,
                   journal->j_tail_sequence);
index e5d540588fa9609296f446c232278202bd1f2d6b..39b7805a599a79fd80c0150073cbf0f473fe8bcf 100644 (file)
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
        INIT_LIST_HEAD(&transaction->t_inode_list);
+       INIT_LIST_HEAD(&transaction->t_private_list);
 
        /* Set up the commit timer for the new transaction. */
        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
index 463d6f10b64f7ffaea2f4fc13e57bc2ca4a95c8e..c7d106ef22e2f535f2c78f470a300b5fb7e8b930 100644 (file)
@@ -641,6 +641,11 @@ struct transaction_s
         */
        int t_handle_count;
 
+       /*
+        * For use by the filesystem to store fs-specific data
+        * structures associated with the transaction
+        */
+       struct list_head        t_private_list;
 };
 
 struct transaction_run_stats_s {
@@ -935,6 +940,10 @@ struct journal_s
 
        pid_t                   j_last_sync_writer;
 
+       /* This function is called when a transaction is closed */
+       void                    (*j_commit_callback)(journal_t *,
+                                                    transaction_t *);
+
        /*
         * Journal statistics
         */
index 12b15c561a1f1ca3dcfe91c9f64e439857fc6bd6..e585657e9831afe4c5d36e5f52fabd1246b03ab1 100644 (file)
@@ -63,7 +63,15 @@ struct writeback_control {
        unsigned for_writepages:1;      /* This is a writepages() call */
        unsigned range_cyclic:1;        /* range_start is cyclic */
        unsigned more_io:1;             /* more io to be dispatched */
-       unsigned range_cont:1;
+       /*
+        * write_cache_pages() won't update wbc->nr_to_write and
+        * mapping->writeback_index if no_nrwrite_index_update
+        * is set.  write_cache_pages() may write more than we
+        * requested and we want to make sure nr_to_write and
+        * writeback_index are updated in a consistent manner
+        * so we use a single control to update them
+        */
+       unsigned no_nrwrite_index_update:1;
 };
 
 /*
index c130a137c12938cb0d7baa3ac6a9b8f333beb53c..b40f6d5f8fe9bc24750fb829742ef655f0f4c42c 100644 (file)
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
        int range_whole = 0;
+       long nr_to_write = wbc->nr_to_write;
 
        if (wbc->nonblocking && bdi_write_congested(bdi)) {
                wbc->encountered_congestion = 1;
@@ -939,7 +940,7 @@ retry:
                                unlock_page(page);
                                ret = 0;
                        }
-                       if (ret || (--(wbc->nr_to_write) <= 0))
+                       if (ret || (--nr_to_write <= 0))
                                done = 1;
                        if (wbc->nonblocking && bdi_write_congested(bdi)) {
                                wbc->encountered_congestion = 1;
@@ -958,11 +959,12 @@ retry:
                index = 0;
                goto retry;
        }
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = index;
+       if (!wbc->no_nrwrite_index_update) {
+               if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
+                       mapping->writeback_index = index;
+               wbc->nr_to_write = nr_to_write;
+       }
 
-       if (wbc->range_cont)
-               wbc->range_start = index << PAGE_CACHE_SHIFT;
        return ret;
 }
 EXPORT_SYMBOL(write_cache_pages);