]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
ext4: journal credit fix for the delayed allocation's writepages() function
authorMingming Cao <cmm@us.ibm.com>
Wed, 20 Aug 2008 02:15:58 +0000 (22:15 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 20 Aug 2008 02:15:58 +0000 (22:15 -0400)
Previous delalloc writepages implementation started a new transaction
outside of a loop which called get_block() to do the block allocation.
Since we didn't know exactly how many blocks would need to be allocated,
the estimated journal credits required was very conservative and caused
many issues.

With the reworked delayed allocation, a new transaction is created for
each get_block(), thus we don't need to guess how many credits for the
multiple chunk of allocation.  We start every transaction with enough
credits for inserting a single exent.  When estimate the credits for
indirect blocks to allocate a chunk of blocks, we need to know the
number of data blocks to allocate.  We use the total number of reserved
delalloc datablocks; if that is too big, for non-extent files, we need
to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS.

Code cleanup from Aneesh.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/extents.c
fs/ext4/inode.c

index 5596b70efa20f53dcf2b8547fb8ffedce187d884..b24d3c53f20cd20407d5d5373c79b725d308cc45 100644 (file)
@@ -1753,7 +1753,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * When pass the actual path, the caller should calculate credits
  * under i_data_sem.
  */
-int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
                                                struct ext4_ext_path *path)
 {
        if (path) {
@@ -1772,12 +1772,12 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
                         *  and other metadat blocks still need to be
                         *  accounted.
                         */
-                       /* 1 one bitmap, 1 block group descriptor */
+                       /* 1 bitmap, 1 block group descriptor */
                        ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
                }
        }
 
-       return ext4_chunk_trans_blocks(inode, num);
+       return ext4_chunk_trans_blocks(inode, nrblocks);
 }
 
 /*
@@ -1791,7 +1791,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
  * If the nrblocks are discontiguous, they could cause
  * the whole tree split more than once, but this is really rare.
  */
-int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk)
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
        int index;
        int depth = ext_depth(inode);
index 8dd22eade42cd356e197887f6fc53a2404e1943d..d1906d9a22de0b0f7c04deeecc4d7f10ebb72c0a 100644 (file)
@@ -1848,29 +1848,53 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
                                   sector_t logical, struct buffer_head *bh)
 {
-       struct buffer_head *lbh = &mpd->lbh;
        sector_t next;
+       size_t b_size = bh->b_size;
+       struct buffer_head *lbh = &mpd->lbh;
+       int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
 
-       next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
-
+       /* check if thereserved journal credits might overflow */
+       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (nrblocks >= EXT4_MAX_TRANS_DATA) {
+                       /*
+                        * With non-extent format we are limited by the journal
+                        * credit available.  Total credit needed to insert
+                        * nrblocks contiguous blocks is dependent on the
+                        * nrblocks.  So limit nrblocks.
+                        */
+                       goto flush_it;
+               } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
+                               EXT4_MAX_TRANS_DATA) {
+                       /*
+                        * Adding the new buffer_head would make it cross the
+                        * allowed limit for which we have journal credit
+                        * reserved. So limit the new bh->b_size
+                        */
+                       b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
+                                               mpd->inode->i_blkbits;
+                       /* we will do mpage_da_submit_io in the next loop */
+               }
+       }
        /*
         * First block in the extent
         */
        if (lbh->b_size == 0) {
                lbh->b_blocknr = logical;
-               lbh->b_size = bh->b_size;
+               lbh->b_size = b_size;
                lbh->b_state = bh->b_state & BH_FLAGS;
                return;
        }
 
+       next = lbh->b_blocknr + nrblocks;
        /*
         * Can we merge the block to our big extent?
         */
        if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
-               lbh->b_size += bh->b_size;
+               lbh->b_size += b_size;
                return;
        }
 
+flush_it:
        /*
         * We couldn't merge the block to our extent, so we
         * need to flush current  extent and start new one
@@ -2231,17 +2255,29 @@ static int ext4_da_writepage(struct page *page,
 }
 
 /*
- * For now just follow the DIO way to estimate the max credits
- * needed to write out EXT4_MAX_WRITEBACK_PAGES.
- * todo: need to calculate the max credits need for
- * extent based files, currently the DIO credits is based on
- * indirect-blocks mapping way.
- *
- * Probably should have a generic way to calculate credits
- * for DIO, writepages, and truncate
+ * This is called via ext4_da_writepages() to
+ * calulate the total number of credits to reserve to fit
+ * a single extent allocation into a single transaction,
+ * ext4_da_writpeages() will loop calling this before
+ * the block allocation.
  */
-#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS    25
+
+static int ext4_da_writepages_trans_blocks(struct inode *inode)
+{
+       int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+
+       /*
+        * With non-extent format the journal credit needed to
+        * insert nrblocks contiguous block is dependent on
+        * number of contiguous block. So we will limit
+        * number of contiguous block to a sane value
+        */
+       if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+           (max_blocks > EXT4_MAX_TRANS_DATA))
+               max_blocks = EXT4_MAX_TRANS_DATA;
+
+       return ext4_chunk_trans_blocks(inode, max_blocks);
+}
 
 static int ext4_da_writepages(struct address_space *mapping,
                              struct writeback_control *wbc)
@@ -2283,7 +2319,7 @@ restart_loop:
                 * by delalloc
                 */
                BUG_ON(ext4_should_journal_data(inode));
-               needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
+               needed_blocks = ext4_da_writepages_trans_blocks(inode);
 
                /* start a new transaction*/
                handle = ext4_journal_start(inode, needed_blocks);
@@ -4461,11 +4497,9 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * the modification of a single pages into a single transaction,
  * which may include multiple chunks of block allocations.
  *
- * This could be called via ext4_write_begin() or later
- * ext4_da_writepages() in delalyed allocation case.
+ * This could be called via ext4_write_begin()
  *
- * In both case it's possible that we could allocating multiple
- * chunks of blocks. We need to consider the worse case, when
+ * We need to consider the worse case, when
  * one new block per extent.
  */
 int ext4_writepage_trans_blocks(struct inode *inode)