]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/ext4/extents.c
ext4: cleanup block allocator
[linux-2.6-omap-h63xx.git] / fs / ext4 / extents.c
index 668d82e494dd081ea36c5cbfd381b83474ca707c..555155d239df54a08edbd15690ed04c581ed4ad7 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -40,8 +39,9 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/falloc.h>
-#include <linux/ext4_fs_extents.h>
 #include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
 
 
 /*
@@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
        ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
-static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+static int ext4_ext_journal_restart(handle_t *handle, int needed)
 {
        int err;
 
        if (handle->h_buffer_credits > needed)
-               return handle;
-       if (!ext4_journal_extend(handle, needed))
-               return handle;
-       err = ext4_journal_restart(handle, needed);
-
-       return handle;
+               return 0;
+       err = ext4_journal_extend(handle, needed);
+       if (err)
+               return err;
+       return ext4_journal_restart(handle, needed);
 }
 
 /*
@@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        return bg_start + colour + block;
 }
 
+/*
+ * Allocation for a meta data block
+ */
 static ext4_fsblk_t
-ext4_ext_new_block(handle_t *handle, struct inode *inode,
+ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path,
                        struct ext4_extent *ex, int *err)
 {
        ext4_fsblk_t goal, newblock;
 
        goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_block(handle, inode, goal, err);
+       newblock = ext4_new_meta_block(handle, inode, goal, err);
        return newblock;
 }
 
@@ -308,7 +310,7 @@ corrupted:
 }
 
 #define ext4_ext_check_header(inode, eh, depth)        \
-       __ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
+       __ext4_ext_check_header(__func__, inode, eh, depth)
 
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -524,6 +526,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                alloc = 1;
        }
        path[0].p_hdr = eh;
+       path[0].p_bh = NULL;
 
        i = depth;
        /* walk through the tree */
@@ -552,12 +555,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        }
 
        path[ppos].p_depth = i;
-       path[ppos].p_hdr = eh;
        path[ppos].p_ext = NULL;
        path[ppos].p_idx = NULL;
 
        /* find extent */
        ext4_ext_binsearch(inode, path + ppos, block);
+       /* if not an empty leaf */
+       if (path[ppos].p_ext)
+               path[ppos].p_block = ext_pblock(path[ppos].p_ext);
 
        ext4_ext_show_path(inode, path);
 
@@ -614,7 +619,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
        ix->ei_block = cpu_to_le32(logical);
        ext4_idx_store_pblock(ix, ptr);
-       curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+       le16_add_cpu(&curp->p_hdr->eh_entries, 1);
 
        BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
                             > le16_to_cpu(curp->p_hdr->eh_max));
@@ -688,7 +693,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        /* allocate all needed blocks */
        ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
        for (a = 0; a < depth - at; a++) {
-               newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+               newblock = ext4_ext_new_meta_block(handle, inode, path,
+                                                  newext, &err);
                if (newblock == 0)
                        goto cleanup;
                ablocks[a] = newblock;
@@ -736,7 +742,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        }
        if (m) {
                memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
-               neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+               le16_add_cpu(&neh->eh_entries, m);
        }
 
        set_buffer_uptodate(bh);
@@ -753,8 +759,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto cleanup;
-               path[depth].p_hdr->eh_entries =
-                    cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+               le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto cleanup;
@@ -817,8 +822,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                if (m) {
                        memmove(++fidx, path[i].p_idx - m,
                                sizeof(struct ext4_extent_idx) * m);
-                       neh->eh_entries =
-                               cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+                       le16_add_cpu(&neh->eh_entries, m);
                }
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
@@ -834,7 +838,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                        err = ext4_ext_get_access(handle, inode, path + i);
                        if (err)
                                goto cleanup;
-                       path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+                       le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
                        err = ext4_ext_dirty(handle, inode, path + i);
                        if (err)
                                goto cleanup;
@@ -886,7 +890,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock;
        int err = 0;
 
-       newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+       newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
        if (newblock == 0)
                return err;
 
@@ -983,6 +987,8 @@ repeat:
                /* if we found index with free entry, then use that
                 * entry: create all needed subtree and add new leaf */
                err = ext4_ext_split(handle, inode, path, newext, i);
+               if (err)
+                       goto out;
 
                /* refill path */
                ext4_ext_drop_refs(path);
@@ -1369,7 +1375,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
                                * sizeof(struct ext4_extent);
                        memmove(ex + 1, ex + 2, len);
                }
-               eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
+               le16_add_cpu(&eh->eh_entries, -1);
                merge_done = 1;
                WARN_ON(eh->eh_entries == 0);
                if (!eh->eh_entries)
@@ -1560,7 +1566,7 @@ has_space:
                path[depth].p_ext = nearex;
        }
 
-       eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+       le16_add_cpu(&eh->eh_entries, 1);
        nearex = path[depth].p_ext;
        nearex->ee_block = newext->ee_block;
        ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1705,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        err = ext4_ext_get_access(handle, inode, path);
        if (err)
                return err;
-       path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+       le16_add_cpu(&path->p_hdr->eh_entries, -1);
        err = ext4_ext_dirty(handle, inode, path);
        if (err)
                return err;
@@ -1885,11 +1891,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
-               handle = ext4_ext_journal_restart(handle, credits);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
+               err = ext4_ext_journal_restart(handle, credits);
+               if (err)
                        goto out;
-               }
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
@@ -1902,7 +1906,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (num == 0) {
                        /* this extent is removed; mark slot entirely unused */
                        ext4_ext_store_pblock(ex, 0);
-                       eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+                       le16_add_cpu(&eh->eh_entries, -1);
                }
 
                ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1983,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
         * We start scanning from right side, freeing all the blocks
         * after i_size and walking into the tree depth-wise.
         */
-       path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+       path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
        if (path == NULL) {
                ext4_journal_stop(handle);
                return -ENOMEM;
@@ -2266,7 +2270,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                ex->ee_len   = orig_ex.ee_len;
                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                ext4_ext_dirty(handle, inode, path + depth);
-               return le16_to_cpu(ex->ee_len);
+               /* zeroed the full extent */
+               return allocated;
        }
 
        /* ex1: ee_block to iblock - 1 : uninitialized */
@@ -2311,11 +2316,45 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                ex->ee_len   = orig_ex.ee_len;
                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                                ext4_ext_dirty(handle, inode, path + depth);
-                               return le16_to_cpu(ex->ee_len);
+                               /* zeroed the full extent */
+                               return allocated;
 
                        } else if (err)
                                goto fix_extent_len;
 
+                       /*
+                        * We need to zero out the second half because
+                        * an fallocate request can update file size and
+                        * converting the second half to initialized extent
+                        * implies that we can leak some junk data to user
+                        * space.
+                        */
+                       err =  ext4_ext_zeroout(inode, ex3);
+                       if (err) {
+                               /*
+                                * We should actually mark the
+                                * second half as uninit and return error
+                                * Insert would have changed the extent
+                                */
+                               depth = ext_depth(inode);
+                               ext4_ext_drop_refs(path);
+                               path = ext4_ext_find_extent(inode,
+                                                               iblock, path);
+                               if (IS_ERR(path)) {
+                                       err = PTR_ERR(path);
+                                       return err;
+                               }
+                               ex = path[depth].p_ext;
+                               err = ext4_ext_get_access(handle, inode,
+                                                               path + depth);
+                               if (err)
+                                       return err;
+                               ext4_ext_mark_uninitialized(ex);
+                               ext4_ext_dirty(handle, inode, path + depth);
+                               return err;
+                       }
+
+                       /* zeroed the second half */
                        return allocated;
                }
                ex3 = &newex;
@@ -2333,7 +2372,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ex->ee_len   = orig_ex.ee_len;
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
-                       return le16_to_cpu(ex->ee_len);
+                       /* zeroed the full extent */
+                       return allocated;
 
                } else if (err)
                        goto fix_extent_len;
@@ -2381,7 +2421,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ex->ee_len   = orig_ex.ee_len;
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
-                       return le16_to_cpu(ex->ee_len);
+                       /* zero out the first half */
+                       return allocated;
                }
        }
        /*
@@ -2448,7 +2489,8 @@ insert:
                ex->ee_len   = orig_ex.ee_len;
                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                ext4_ext_dirty(handle, inode, path + depth);
-               return le16_to_cpu(ex->ee_len);
+               /* zero out the first half */
+               return allocated;
        } else if (err)
                goto fix_extent_len;
 out:
@@ -2570,8 +2612,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        }
                        if (create == EXT4_CREATE_UNINITIALIZED_EXT)
                                goto out;
-                       if (!create)
+                       if (!create) {
+                               /*
+                                * We have blocks reserved already.  We
+                                * return allocated blocks so that delalloc
+                                * won't do block reservation for us.  But
+                                * the buffer head will be unmapped so that
+                                * a read from the block returns 0s.
+                                */
+                               if (allocated > max_blocks)
+                                       allocated = max_blocks;
+                               /* mark the buffer unwritten */
+                               __set_bit(BH_Unwritten, &bh_result->b_state);
                                goto out2;
+                       }
 
                        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                                path, iblock,
@@ -2668,13 +2722,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                goto out2;
        }
 
-       if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = inode->i_size;
-
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
 outnew:
+       if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
+               EXT4_I(inode)->i_disksize = inode->i_size;
+
        __set_bit(BH_New, &bh_result->b_state);
 
        /* Cache only when it is _not_ an uninitialized extent */
@@ -2761,6 +2815,8 @@ out_stop:
                ext4_orphan_del(handle, inode);
 
        up_write(&EXT4_I(inode)->i_data_sem);
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 }
 
@@ -2785,6 +2841,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
        return needed;
 }
 
+static void ext4_falloc_update_inode(struct inode *inode,
+                               int mode, loff_t new_size, int update_ctime)
+{
+       struct timespec now;
+
+       if (update_ctime) {
+               now = current_fs_time(inode->i_sb);
+               if (!timespec_equal(&inode->i_ctime, &now))
+                       inode->i_ctime = now;
+       }
+       /*
+        * Update only when preallocation was requested beyond
+        * the file size.
+        */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+                               new_size > i_size_read(inode)) {
+               i_size_write(inode, new_size);
+               EXT4_I(inode)->i_disksize = new_size;
+       }
+
+}
+
 /*
  * preallocate space for a file. This implements ext4's fallocate inode
  * operation, which gets called from sys_fallocate system call.
@@ -2796,8 +2874,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
        handle_t *handle;
        ext4_lblk_t block;
+       loff_t new_size;
        unsigned long max_blocks;
-       ext4_fsblk_t nblocks = 0;
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
@@ -2816,9 +2894,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
                return -ENODEV;
 
        block = offset >> blkbits;
+       /*
+        * We can't just convert len to max_blocks because
+        * If blocksize = 4096 offset = 3072 and len = 2048
+        */
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                       - block;
-
+                                                       - block;
        /*
         * credits to insert 1 extent into extent tree + buffers to be able to
         * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2834,7 +2915,6 @@ retry:
                        ret = PTR_ERR(handle);
                        break;
                }
-
                ret = ext4_get_blocks_wrap(handle, inode, block,
                                          max_blocks, &map_bh,
                                          EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2850,61 +2930,24 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
-               if (ret > 0) {
-                       /* check wrap through sign-bit/zero here */
-                       if ((block + ret) < 0 || (block + ret) < block) {
-                               ret = -EIO;
-                               ext4_mark_inode_dirty(handle, inode);
-                               ret2 = ext4_journal_stop(handle);
-                               break;
-                       }
-                       if (buffer_new(&map_bh) && ((block + ret) >
-                           (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
-                           >> blkbits)))
-                                       nblocks = nblocks + ret;
-               }
-
-               /* Update ctime if new blocks get allocated */
-               if (nblocks) {
-                       struct timespec now;
-
-                       now = current_fs_time(inode->i_sb);
-                       if (!timespec_equal(&inode->i_ctime, &now))
-                               inode->i_ctime = now;
-               }
+               if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
+                                               blkbits) >> blkbits))
+                       new_size = offset + len;
+               else
+                       new_size = (block + ret) << blkbits;
 
+               ext4_falloc_update_inode(inode, mode, new_size,
+                                               buffer_new(&map_bh));
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
        }
-
-       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+       if (ret == -ENOSPC &&
+                       ext4_should_retry_alloc(inode->i_sb, &retries)) {
+               ret = 0;
                goto retry;
-
-       /*
-        * Time to update the file size.
-        * Update only when preallocation was requested beyond the file size.
-        */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           (offset + len) > i_size_read(inode)) {
-               if (ret > 0) {
-                       /*
-                        * if no error, we assume preallocation succeeded
-                        * completely
-                        */
-                       i_size_write(inode, offset + len);
-                       EXT4_I(inode)->i_disksize = i_size_read(inode);
-               } else if (ret < 0 && nblocks) {
-                       /* Handle partial allocation scenario */
-                       loff_t newsize;
-
-                       newsize  = (nblocks << blkbits) + i_size_read(inode);
-                       i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
-                       EXT4_I(inode)->i_disksize = i_size_read(inode);
-               }
        }
-
        mutex_unlock(&inode->i_mutex);
        return ret > 0 ? ret2 : ret;
 }