#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
-#include <linux/ext4_jbd2.h>
#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/uio.h>
#include <linux/bio.h>
+#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
BUFFER_TRACE(bh, "call ext4_journal_revoke");
err = ext4_journal_revoke(handle, blocknr, bh);
if (err)
- ext4_abort(inode->i_sb, __FUNCTION__,
+ ext4_abort(inode->i_sb, __func__,
"error %d when attempting revoke", err);
BUFFER_TRACE(bh, "exit");
return err;
* @inode: owner
* @ind: descriptor of indirect block.
*
- * This function returns the prefered place for block allocation.
+ * This function returns the preferred place for block allocation.
* It is used when heuristic for sequential allocation fails.
* Rules are:
* + if there is a block to the left of our position - allocate near it.
__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
__le32 *p;
ext4_fsblk_t bg_start;
+ ext4_fsblk_t last_block;
ext4_grpblk_t colour;
/* Try to find previous block */
* into the same cylinder group then.
*/
bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
- colour = (current->pid % 16) *
+ last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+ if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+ colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ else
+ colour = (current->pid % 16) * ((last_block - bg_start) / 16);
return bg_start + colour;
}
/**
- * ext4_find_goal - find a prefered place for allocation.
+ * ext4_find_goal - find a preferred place for allocation.
* @inode: owner
* @block: block we want
* @partial: pointer to the last triple within a chain
*
- * Normally this function find the prefered place for block allocation,
+ * Normally this function find the preferred place for block allocation,
* returns it.
*/
static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
*
* `handle' can be NULL if create == 0.
*
- * The BKL may not be held on entry here. Be sure to take it early.
* return > 0, # of blocks mapped or allocated.
* return = 0, if plain lookup failed.
* return < 0, error case.
return err;
}
-#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+/*
+ * Number of credits we need for writing DIO_MAX_BLOCKS:
+ * We need sb + group descriptor + bitmap + inode -> 4
+ * For B blocks with A block pointers per block we need:
+ * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
+ * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
+ */
+#define DIO_CREDITS 25
+
+/*
+ *
+ *
+ * ext4_ext4 get_block() wrapper function
+ * It will do a look up first, and returns if the blocks already mapped.
+ * Otherwise it takes the write lock of the i_data_sem and allocate blocks
+ * and store the allocated blocks in the result buffer head and mark it
+ * mapped.
+ *
+ * If file type is extents based, it will call ext4_ext_get_blocks(),
+ * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * based files
+ *
+ * On success, it returns the number of blocks being mapped or allocate.
+ * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * the result buffer head is unmapped. If the create ==1, it will make sure
+ * the buffer head is mapped.
+ *
+ * It returns 0 if plain look up failed (blocks have not been allocated), in
+ * that casem, buffer head is unmapped
+ *
+ * It returns the error in case of allocation failure.
+ */
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
int create, int extend_disksize)
{
int retval;
+
+ clear_buffer_mapped(bh);
+
/*
* Try to see if we can get the block without requesting
* for new file system block.
inode, block, max_blocks, bh, 0, 0);
}
up_read((&EXT4_I(inode)->i_data_sem));
- if (!create || (retval > 0))
+
+ /* If it is only a block(s) look up */
+ if (!create)
+ return retval;
+
+ /*
+ * Returns if the blocks have already allocated
+ *
+ * Note that if blocks have been preallocated
+ * ext4_ext_get_block() returns th create = 0
+ * with buffer head unmapped.
+ */
+ if (retval > 0 && buffer_mapped(bh))
return retval;
/*
- * We need to allocate new blocks which will result
- * in i_data update
+ * New blocks allocate and/or writing to uninitialized extent
+ * will possibly result in updating i_data, so we take
+ * the write lock of i_data_sem, and call get_blocks()
+ * with create == 1 flag.
*/
down_write((&EXT4_I(inode)->i_data_sem));
/*
} else {
retval = ext4_get_blocks_handle(handle, inode, block,
max_blocks, bh, create, extend_disksize);
+
+ if (retval > 0 && buffer_new(bh)) {
+ /*
+ * We allocated new blocks which will result in
+ * i_data's format changing. Force the migrate
+ * to fail by clearing migrate flags
+ */
+ EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+ ~EXT4_EXT_MIGRATE;
+ }
}
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
struct buffer_head *bh_result, int create)
{
handle_t *handle = ext4_journal_current_handle();
- int ret = 0;
+ int ret = 0, started = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
- if (!create)
- goto get_block; /* A read */
-
- if (max_blocks == 1)
- goto get_block; /* A single block get */
-
- if (handle->h_transaction->t_state == T_LOCKED) {
- /*
- * Huge direct-io writes can hold off commits for long
- * periods of time. Let this commit run.
- */
- ext4_journal_stop(handle);
- handle = ext4_journal_start(inode, DIO_CREDITS);
- if (IS_ERR(handle))
+ if (create && !handle) {
+ /* Direct IO write... */
+ if (max_blocks > DIO_MAX_BLOCKS)
+ max_blocks = DIO_MAX_BLOCKS;
+ handle = ext4_journal_start(inode, DIO_CREDITS +
+ 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto get_block;
- }
-
- if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
- /*
- * Getting low on buffer credits...
- */
- ret = ext4_journal_extend(handle, DIO_CREDITS);
- if (ret > 0) {
- /*
- * Couldn't extend the transaction. Start a new one.
- */
- ret = ext4_journal_restart(handle, DIO_CREDITS);
+ goto out;
}
+ started = 1;
}
-get_block:
- if (ret == 0) {
- ret = ext4_get_blocks_wrap(handle, inode, iblock,
+ ret = ext4_get_blocks_wrap(handle, inode, iblock,
max_blocks, bh_result, create, 0);
- if (ret > 0) {
- bh_result->b_size = (ret << inode->i_blkbits);
- ret = 0;
- }
+ if (ret > 0) {
+ bh_result->b_size = (ret << inode->i_blkbits);
+ ret = 0;
}
+ if (started)
+ ext4_journal_stop(handle);
+out:
return ret;
}
{
int err = jbd2_journal_dirty_data(handle, bh);
if (err)
- ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+ ext4_journal_abort_handle(__func__, __func__,
bh, handle, err);
return err;
}
* if the machine crashes during the write.
*
* If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file.
+ * crashes then stale disk data _may_ be exposed inside the file. But current
+ * VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
- handle_t *handle = NULL;
+ handle_t *handle;
ssize_t ret;
int orphan = 0;
size_t count = iov_length(iov, nr_segs);
if (rw == WRITE) {
loff_t final_size = offset + count;
- handle = ext4_journal_start(inode, DIO_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
if (final_size > inode->i_size) {
+ /* Credits for sb + inode write */
+ handle = ext4_journal_start(inode, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
ret = ext4_orphan_add(handle, inode);
- if (ret)
- goto out_stop;
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
orphan = 1;
ei->i_disksize = inode->i_size;
+ ext4_journal_stop(handle);
}
}
offset, nr_segs,
ext4_get_block, NULL);
- /*
- * Reacquire the handle: ext4_get_block() can restart the transaction
- */
- handle = ext4_journal_current_handle();
-
-out_stop:
- if (handle) {
+ if (orphan) {
int err;
- if (orphan && inode->i_nlink)
+ /* Credits for sb + inode write */
+ handle = ext4_journal_start(inode, 2);
+ if (IS_ERR(handle)) {
+ /* This is really bad luck. We've written the data
+ * but cannot extend i_size. Bail out and pretend
+ * the write failed... */
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+ if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- if (orphan && ret > 0) {
+ if (ret > 0) {
loff_t end = offset + ret;
if (end > inode->i_size) {
ei->i_disksize = end;
static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
unsigned long ino, struct ext4_iloc *iloc)
{
- unsigned long desc, group_desc;
ext4_group_t block_group;
unsigned long offset;
ext4_fsblk_t block;
- struct buffer_head *bh;
- struct ext4_group_desc * gdp;
+ struct ext4_group_desc *gdp;
if (!ext4_valid_inum(sb, ino)) {
/*
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
- if (block_group >= EXT4_SB(sb)->s_groups_count) {
- ext4_error(sb,"ext4_get_inode_block","group >= groups count");
+ gdp = ext4_get_group_desc(sb, block_group, NULL);
+ if (!gdp)
return 0;
- }
- smp_rmb();
- group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
- desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- bh = EXT4_SB(sb)->s_group_desc[group_desc];
- if (!bh) {
- ext4_error (sb, "ext4_get_inode_block",
- "Descriptor not loaded");
- return 0;
- }
- gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
- desc * EXT4_DESC_SIZE(sb));
/*
* Figure out the offset within the block group inode table
*/
}
}
-void ext4_read_inode(struct inode * inode)
+struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_inode_info *ei;
struct buffer_head *bh;
+ struct inode *inode;
+ long ret;
int block;
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ ei = EXT4_I(inode);
#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
ei->i_block_alloc_info = NULL;
- if (__ext4_get_inode_loc(inode, &iloc, 0))
+ ret = __ext4_get_inode_loc(inode, &iloc, 0);
+ if (ret < 0)
goto bad_inode;
bh = iloc.bh;
raw_inode = ext4_raw_inode(&iloc);
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
/* this inode is deleted */
brelse (bh);
+ ret = -ESTALE;
goto bad_inode;
}
/* The only unlinked inodes we let through here have
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
- if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
- EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
- /*
- * When mke2fs creates big inodes it does not zero out
- * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
- * so ignore those first few inodes.
- */
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
brelse (bh);
+ ret = -EIO;
goto bad_inode;
}
if (ei->i_extra_isize == 0) {
}
brelse (iloc.bh);
ext4_set_inode_flags(inode);
- return;
+ unlock_new_inode(inode);
+ return inode;
bad_inode:
- make_bad_inode(inode);
- return;
+ iget_failed(inode);
+ return ERR_PTR(ret);
}
static int ext4_inode_blocks_set(handle_t *handle,
if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
- raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ /* clear the migrate flag in the raw_inode */
+ raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
if (mnt_count !=
le16_to_cpu(sbi->s_es->s_mnt_count)) {
- ext4_warning(inode->i_sb, __FUNCTION__,
+ ext4_warning(inode->i_sb, __func__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
current_handle->h_transaction != handle->h_transaction) {
/* This task has a transaction open against a different fs */
printk(KERN_EMERG "%s: transactions do not match!\n",
- __FUNCTION__);
+ __func__);
} else {
jbd_debug(5, "marking dirty. outer handle=%p\n",
current_handle);