ext4: move headers out of include/linux

[linux-2.6-omap-h63xx.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 0e9055cf700e606f1aacb5432b8744a810242e11..0c94db462c2fde4a838dcbb8480fb1474e27617f 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
  #include <linux/module.h>
  #include <linux/fs.h>
  #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
  #include <linux/jbd2.h>
  #include <linux/highuid.h>
  #include <linux/pagemap.h>
@@ -36,6 +35,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
  #include <linux/bio.h>
+#include "ext4_jbd2.h"
  #include "xattr.h"
  #include "acl.h"
  
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
         BUFFER_TRACE(bh, "call ext4_journal_revoke");
         err = ext4_journal_revoke(handle, blocknr, bh);
         if (err)
-               ext4_abort(inode->i_sb, __FUNCTION__,
+               ext4_abort(inode->i_sb, __func__,
                            "error %d when attempting revoke", err);
         BUFFER_TRACE(bh, "exit");
         return err;
@@ -382,7 +382,7 @@ no_block:
   *     @inode: owner
   *     @ind: descriptor of indirect block.
   *
- *     This function returns the prefered place for block allocation.
+ *     This function returns the preferred place for block allocation.
   *     It is used when heuristic for sequential allocation fails.
   *     Rules are:
   *       + if there is a block to the left of our position - allocate near it.
@@ -403,6 +403,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
         __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
         __le32 *p;
         ext4_fsblk_t bg_start;
+       ext4_fsblk_t last_block;
         ext4_grpblk_t colour;
  
         /* Try to find previous block */
@@ -420,18 +421,23 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
          * into the same cylinder group then.
          */
         bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
-       colour = (current->pid % 16) *
+       last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+       if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+               colour = (current->pid % 16) *
                         (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+       else
+               colour = (current->pid % 16) * ((last_block - bg_start) / 16);
         return bg_start + colour;
  }
  
  /**
- *     ext4_find_goal - find a prefered place for allocation.
+ *     ext4_find_goal - find a preferred place for allocation.
   *     @inode: owner
   *     @block:  block we want
   *     @partial: pointer to the last triple within a chain
   *
- *     Normally this function find the prefered place for block allocation,
+ *     Normally this function find the preferred place for block allocation,
   *     returns it.
   */
  static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
@@ -768,7 +774,6 @@ err_out:
   *
   * `handle' can be NULL if create == 0.
   *
- * The BKL may not be held on entry here.  Be sure to take it early.
   * return > 0, # of blocks mapped or allocated.
   * return = 0, if plain lookup failed.
   * return < 0, error case.
@@ -892,13 +897,49 @@ out:
         return err;
  }
  
-#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+/*
+ * Number of credits we need for writing DIO_MAX_BLOCKS:
+ * We need sb + group descriptor + bitmap + inode -> 4
+ * For B blocks with A block pointers per block we need:
+ * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
+ * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
+ */
+#define DIO_CREDITS 25
+
  
+/*
+ *
+ *
+ * ext4_ext4 get_block() wrapper function
+ * It will do a look up first, and returns if the blocks already mapped.
+ * Otherwise it takes the write lock of the i_data_sem and allocate blocks
+ * and store the allocated blocks in the result buffer head and mark it
+ * mapped.
+ *
+ * If file type is extents based, it will call ext4_ext_get_blocks(),
+ * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * based files
+ *
+ * On success, it returns the number of blocks being mapped or allocate.
+ * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * the result buffer head is unmapped. If the create ==1, it will make sure
+ * the buffer head is mapped.
+ *
+ * It returns 0 if plain look up failed (blocks have not been allocated), in
+ * that casem, buffer head is unmapped
+ *
+ * It returns the error in case of allocation failure.
+ */
  int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                         unsigned long max_blocks, struct buffer_head *bh,
                         int create, int extend_disksize)
  {
         int retval;
+
+       clear_buffer_mapped(bh);
+
         /*
          * Try to see if we can get  the block without requesting
          * for new file system block.
@@ -912,12 +953,26 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                                 inode, block, max_blocks, bh, 0, 0);
         }
         up_read((&EXT4_I(inode)->i_data_sem));
-       if (!create || (retval > 0))
+
+       /* If it is only a block(s) look up */
+       if (!create)
+               return retval;
+
+       /*
+        * Returns if the blocks have already allocated
+        *
+        * Note that if blocks have been preallocated
+        * ext4_ext_get_block() returns th create = 0
+        * with buffer head unmapped.
+        */
+       if (retval > 0 && buffer_mapped(bh))
                 return retval;
  
         /*
-        * We need to allocate new blocks which will result
-        * in i_data update
+        * New blocks allocate and/or writing to uninitialized extent
+        * will possibly result in updating i_data, so we take
+        * the write lock of i_data_sem, and call get_blocks()
+        * with create == 1 flag.
          */
         down_write((&EXT4_I(inode)->i_data_sem));
         /*
@@ -930,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
         } else {
                 retval = ext4_get_blocks_handle(handle, inode, block,
                                 max_blocks, bh, create, extend_disksize);
+
+               if (retval > 0 && buffer_new(bh)) {
+                       /*
+                        * We allocated new blocks which will result in
+                        * i_data's format changing.  Force the migrate
+                        * to fail by clearing migrate flags
+                        */
+                       EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+                                                       ~EXT4_EXT_MIGRATE;
+               }
         }
         up_write((&EXT4_I(inode)->i_data_sem));
         return retval;
@@ -939,49 +1004,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create)
  {
         handle_t *handle = ext4_journal_current_handle();
-       int ret = 0;
+       int ret = 0, started = 0;
         unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
  
-       if (!create)
-               goto get_block;         /* A read */
-
-       if (max_blocks == 1)
-               goto get_block;         /* A single block get */
-
-       if (handle->h_transaction->t_state == T_LOCKED) {
-               /*
-                * Huge direct-io writes can hold off commits for long
-                * periods of time.  Let this commit run.
-                */
-               ext4_journal_stop(handle);
-               handle = ext4_journal_start(inode, DIO_CREDITS);
-               if (IS_ERR(handle))
+       if (create && !handle) {
+               /* Direct IO write... */
+               if (max_blocks > DIO_MAX_BLOCKS)
+                       max_blocks = DIO_MAX_BLOCKS;
+               handle = ext4_journal_start(inode, DIO_CREDITS +
+                             2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+               if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
-               goto get_block;
-       }
-
-       if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
-               /*
-                * Getting low on buffer credits...
-                */
-               ret = ext4_journal_extend(handle, DIO_CREDITS);
-               if (ret > 0) {
-                       /*
-                        * Couldn't extend the transaction.  Start a new one.
-                        */
-                       ret = ext4_journal_restart(handle, DIO_CREDITS);
+                       goto out;
                 }
+               started = 1;
         }
  
-get_block:
-       if (ret == 0) {
-               ret = ext4_get_blocks_wrap(handle, inode, iblock,
+       ret = ext4_get_blocks_wrap(handle, inode, iblock,
                                         max_blocks, bh_result, create, 0);
-               if (ret > 0) {
-                       bh_result->b_size = (ret << inode->i_blkbits);
-                       ret = 0;
-               }
+       if (ret > 0) {
+               bh_result->b_size = (ret << inode->i_blkbits);
+               ret = 0;
         }
+       if (started)
+               ext4_journal_stop(handle);
+out:
         return ret;
  }
  
@@ -1193,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
  {
         int err = jbd2_journal_dirty_data(handle, bh);
         if (err)
-               ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+               ext4_journal_abort_handle(__func__, __func__,
                                                 bh, handle, err);
         return err;
  }
@@ -1671,7 +1718,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
   * if the machine crashes during the write.
   *
   * If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file.
+ * crashes then stale disk data _may_ be exposed inside the file. But current
+ * VFS code falls back into buffered path in that case so we are safe.
   */
  static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
                         const struct iovec *iov, loff_t offset,
@@ -1680,7 +1728,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
         struct ext4_inode_info *ei = EXT4_I(inode);
-       handle_t *handle = NULL;
+       handle_t *handle;
         ssize_t ret;
         int orphan = 0;
         size_t count = iov_length(iov, nr_segs);
@@ -1688,17 +1736,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
         if (rw == WRITE) {
                 loff_t final_size = offset + count;
  
-               handle = ext4_journal_start(inode, DIO_CREDITS);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
                 if (final_size > inode->i_size) {
+                       /* Credits for sb + inode write */
+                       handle = ext4_journal_start(inode, 2);
+                       if (IS_ERR(handle)) {
+                               ret = PTR_ERR(handle);
+                               goto out;
+                       }
                         ret = ext4_orphan_add(handle, inode);
-                       if (ret)
-                               goto out_stop;
+                       if (ret) {
+                               ext4_journal_stop(handle);
+                               goto out;
+                       }
                         orphan = 1;
                         ei->i_disksize = inode->i_size;
+                       ext4_journal_stop(handle);
                 }
         }
  
@@ -1706,18 +1758,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
                                  offset, nr_segs,
                                  ext4_get_block, NULL);
  
-       /*
-        * Reacquire the handle: ext4_get_block() can restart the transaction
-        */
-       handle = ext4_journal_current_handle();
-
-out_stop:
-       if (handle) {
+       if (orphan) {
                 int err;
  
-               if (orphan && inode->i_nlink)
+               /* Credits for sb + inode write */
+               handle = ext4_journal_start(inode, 2);
+               if (IS_ERR(handle)) {
+                       /* This is really bad luck. We've written the data
+                        * but cannot extend i_size. Bail out and pretend
+                        * the write failed... */
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+               if (inode->i_nlink)
                         ext4_orphan_del(handle, inode);
-               if (orphan && ret > 0) {
+               if (ret > 0) {
                         loff_t end = offset + ret;
                         if (end > inode->i_size) {
                                 ei->i_disksize = end;
@@ -2456,12 +2511,10 @@ out_stop:
  static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
                 unsigned long ino, struct ext4_iloc *iloc)
  {
-       unsigned long desc, group_desc;
         ext4_group_t block_group;
         unsigned long offset;
         ext4_fsblk_t block;
-       struct buffer_head *bh;
-       struct ext4_group_desc * gdp;
+       struct ext4_group_desc *gdp;
  
         if (!ext4_valid_inum(sb, ino)) {
                 /*
@@ -2473,22 +2526,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
         }
  
         block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-       if (block_group >= EXT4_SB(sb)->s_groups_count) {
-               ext4_error(sb,"ext4_get_inode_block","group >= groups count");
+       gdp = ext4_get_group_desc(sb, block_group, NULL);
+       if (!gdp)
                 return 0;
-       }
-       smp_rmb();
-       group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
-       desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-       bh = EXT4_SB(sb)->s_group_desc[group_desc];
-       if (!bh) {
-               ext4_error (sb, "ext4_get_inode_block",
-                           "Descriptor not loaded");
-               return 0;
-       }
  
-       gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
-               desc * EXT4_DESC_SIZE(sb));
         /*
          * Figure out the offset within the block group inode table
          */
@@ -2680,21 +2721,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
         }
  }
  
-void ext4_read_inode(struct inode * inode)
+struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
  {
         struct ext4_iloc iloc;
         struct ext4_inode *raw_inode;
-       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_inode_info *ei;
         struct buffer_head *bh;
+       struct inode *inode;
+       long ret;
         int block;
  
+       inode = iget_locked(sb, ino);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+       if (!(inode->i_state & I_NEW))
+               return inode;
+
+       ei = EXT4_I(inode);
  #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
         ei->i_acl = EXT4_ACL_NOT_CACHED;
         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
  #endif
         ei->i_block_alloc_info = NULL;
  
-       if (__ext4_get_inode_loc(inode, &iloc, 0))
+       ret = __ext4_get_inode_loc(inode, &iloc, 0);
+       if (ret < 0)
                 goto bad_inode;
         bh = iloc.bh;
         raw_inode = ext4_raw_inode(&iloc);
@@ -2720,6 +2771,7 @@ void ext4_read_inode(struct inode * inode)
                     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
                         /* this inode is deleted */
                         brelse (bh);
+                       ret = -ESTALE;
                         goto bad_inode;
                 }
                 /* The only unlinked inodes we let through here have
@@ -2747,17 +2799,12 @@ void ext4_read_inode(struct inode * inode)
                 ei->i_data[block] = raw_inode->i_block[block];
         INIT_LIST_HEAD(&ei->i_orphan);
  
-       if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
-           EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
-               /*
-                * When mke2fs creates big inodes it does not zero out
-                * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
-                * so ignore those first few inodes.
-                */
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                     EXT4_INODE_SIZE(inode->i_sb)) {
                         brelse (bh);
+                       ret = -EIO;
                         goto bad_inode;
                 }
                 if (ei->i_extra_isize == 0) {
@@ -2811,11 +2858,12 @@ void ext4_read_inode(struct inode * inode)
         }
         brelse (iloc.bh);
         ext4_set_inode_flags(inode);
-       return;
+       unlock_new_inode(inode);
+       return inode;
  
  bad_inode:
-       make_bad_inode(inode);
-       return;
+       iget_failed(inode);
+       return ERR_PTR(ret);
  }
  
  static int ext4_inode_blocks_set(handle_t *handle,
@@ -2924,7 +2972,8 @@ static int ext4_do_update_inode(handle_t *handle,
         if (ext4_inode_blocks_set(handle, raw_inode, ei))
                 goto out_brelse;
         raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
-       raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+       /* clear the migrate flag in the raw_inode */
+       raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
         if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
             cpu_to_le32(EXT4_OS_HURD))
                 raw_inode->i_file_acl_high =
@@ -3322,7 +3371,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
                                 if (mnt_count !=
                                         le16_to_cpu(sbi->s_es->s_mnt_count)) {
-                                       ext4_warning(inode->i_sb, __FUNCTION__,
+                                       ext4_warning(inode->i_sb, __func__,
                                         "Unable to expand inode %lu. Delete"
                                         " some EAs or run e2fsck.",
                                         inode->i_ino);
@@ -3363,7 +3412,7 @@ void ext4_dirty_inode(struct inode *inode)
                 current_handle->h_transaction != handle->h_transaction) {
                 /* This task has a transaction open against a different fs */
                 printk(KERN_EMERG "%s: transactions do not match!\n",
-                      __FUNCTION__);
+                      __func__);
         } else {
                 jbd_debug(5, "marking dirty.  outer handle=%p\n",
                                 current_handle);