void ext4_delete_inode (struct inode * inode)
{
handle_t *handle;
+ int err;
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
if (is_bad_inode(inode))
goto no_delete;
- handle = start_transaction(inode);
+ handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
+ ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
* If we're going to skip the normal cleanup, we still need to
* make sure that the in-core orphan linked list is properly
if (IS_SYNC(inode))
handle->h_sync = 1;
inode->i_size = 0;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't mark inode dirty (err %d)", err);
+ goto stop_handle;
+ }
if (inode->i_blocks)
ext4_truncate(inode);
+
+ /*
+ * ext4_ext_truncate() doesn't reserve any slop when it
+ * restarts journal transactions; therefore there may not be
+ * enough credits left in the handle to remove the inode from
+ * the orphan list and set the dtime field.
+ */
+ if (handle->h_buffer_credits < 3) {
+ err = ext4_journal_extend(handle, 3);
+ if (err > 0)
+ err = ext4_journal_restart(handle, 3);
+ if (err != 0) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't extend journal (err %d)", err);
+ stop_handle:
+ ext4_journal_stop(handle);
+ goto no_delete;
+ }
+ }
+
/*
* Kill off the orphan record which ext4_truncate created.
* AKPM: I think this can be inside the above `if'.
return err;
}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ int ind_blks, dind_blks, tind_blks;
+
+ /* number of new indirect blocks needed */
+ ind_blks = (blocks + icap - 1) / icap;
+
+ dind_blks = (ind_blks + icap - 1) / icap;
+
+ tind_blks = 1;
+
+ return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ if (!blocks)
+ return 0;
+
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_calc_metadata_amount(inode, blocks);
+
+ return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, mdb_free;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ /* recalculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used;
+ mdb = ext4_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+
+ /* update per-inode reservations */
+ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
/*
/*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
*
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it
* mapped.
* which were deferred till now
*/
if ((retval > 0) && buffer_delay(bh))
- ext4_da_release_space(inode, retval, 0);
+ ext4_da_update_reserve_space(inode, retval);
}
up_write((&EXT4_I(inode)->i_data_sem));
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
- unsigned from, to;
int ret = 0, ret2;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
return ret ? ret : copied;
}
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
-
- dind_blks = (ind_blks + icap - 1) / icap;
-
- tind_blks = 1;
-
- return ind_blks + dind_blks + tind_blks;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
-
- return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC;
}
-
/* reduce fs free blocks counter */
percpu_counter_sub(&sbi->s_freeblocks_counter, total);
return 0; /* success */
}
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free, release;
+ if (!to_free)
+ return; /* Nothing to release, exit */
+
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ if (!EXT4_I(inode)->i_reserved_data_blocks) {
+ /*
+ * if there is no reserved blocks, but we try to free some
+ * then the counter is messed up somewhere.
+ * but since this function is called from invalidate
+ * page, it's harmless to return without any action
+ */
+ printk(KERN_INFO "ext4 delalloc try to release %d reserved "
+ "blocks for inode %lu, but there is no reserved "
+ "data blocks\n", to_free, inode->i_ino);
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return;
+ }
+
/* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
release = to_free + mdb_free;
/* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, release);
/* update per-inode reservations */
- BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= to_free;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- ext4_da_release_space(page->mapping->host, 0, to_release);
+ ext4_da_release_space(page->mapping->host, to_release);
}
/*
if (buffer_delay(bh)) {
bh->b_blocknr = pblock;
clear_buffer_delay(bh);
+ bh->b_bdev = inode->i_sb->s_bdev;
+ } else if (buffer_unwritten(bh)) {
+ bh->b_blocknr = pblock;
+ clear_buffer_unwritten(bh);
+ set_buffer_mapped(bh);
+ set_buffer_new(bh);
+ bh->b_bdev = inode->i_sb->s_bdev;
} else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
* If blocks are delayed marked, we need to
* put actual blocknr and drop delayed bit
*/
- if (buffer_delay(lbh))
+ if (buffer_delay(lbh) || buffer_unwritten(lbh))
mpage_put_bnr_to_bhs(mpd, next, &new);
/* go for the remaining blocks */
}
}
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+ (1 << BH_Delay) | (1 << BH_Unwritten))
/*
* mpage_add_bh_to_extent - try to add one more block to extent of blocks
}
page = __grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ ext4_journal_stop(handle);
+ ret = -ENOMEM;
+ goto out;
+ }
*pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
}
static const struct address_space_operations ext4_ordered_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_ordered_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_normal_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_ordered_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_writeback_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_normal_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_writeback_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_normal_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_writeback_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_journalled_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_journalled_writepage,
- .sync_page = block_sync_page,
- .write_begin = ext4_write_begin,
- .write_end = ext4_journalled_write_end,
- .set_page_dirty = ext4_journalled_set_page_dirty,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_journalled_writepage,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_journalled_write_end,
+ .set_page_dirty = ext4_journalled_set_page_dirty,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations ext4_da_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_da_writepage,
- .writepages = ext4_da_writepages,
- .sync_page = block_sync_page,
- .write_begin = ext4_da_write_begin,
- .write_end = ext4_da_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_da_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+ .writepage = ext4_da_writepage,
+ .writepages = ext4_da_writepages,
+ .sync_page = block_sync_page,
+ .write_begin = ext4_da_write_begin,
+ .write_end = ext4_da_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_da_invalidatepage,
+ .releasepage = ext4_releasepage,
+ .direct_IO = ext4_direct_IO,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
void ext4_set_aops(struct inode *inode)
* modify the block allocation tree.
*/
down_write(&ei->i_data_sem);
+
+ ext4_discard_reservation(inode);
+
/*
* The orphan list entry will now protect us from any crash which
* occurs before the truncate completes, so it is now safe to propagate
;
}
- ext4_discard_reservation(inode);
-
up_write(&ei->i_data_sem);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
return 0;
}
+static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
+ int chunk)
+{
+ int indirects;
+
+ /* if nrblocks are contiguous */
+ if (chunk) {
+ /*
+ * With N contiguous data blocks, it need at most
+ * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
+ * 2 dindirect blocks
+ * 1 tindirect block
+ */
+ indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ return indirects + 3;
+ }
+ /*
+ * if nrblocks are not contiguous, worse case, each block touch
+ * a indirect block, and each indirect block touch a double indirect
+ * block, plus a triple indirect block
+ */
+ indirects = nrblocks * 2 + 1;
+ return indirects;
+}
+
+static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+ return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+ return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+}
/*
- * How many blocks doth make a writepage()?
+ * Account for index blocks, block groups bitmaps and block group
+ * descriptor blocks if modify datablocks and index blocks
+ * worse case, the indexs blocks spread over different block groups
*
- * With N blocks per page, it may be:
- * N data blocks
- * 2 indirect block
- * 2 dindirect
- * 1 tindirect
- * N+5 bitmap blocks (from the above)
- * N+5 group descriptor summary blocks
- * 1 inode block
- * 1 superblock.
- * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
+ * If datablocks are discontiguous, they are possible to spread over
+ * different block groups too. If they are contiugous, with flexbg,
+ * they could still across block group boundary.
*
- * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
+ * Also account for superblock, inode, quota and xattr blocks
+ */
+int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+ int groups, gdpblocks;
+ int idxblocks;
+ int ret = 0;
+
+ /*
+ * How many index blocks need to touch to modify nrblocks?
+ * The "Chunk" flag indicating whether the nrblocks is
+ * physically contiguous on disk
+ *
+ * For Direct IO and fallocate, they calls get_block to allocate
+ * one single extent at a time, so they could set the "Chunk" flag
+ */
+ idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
+
+ ret = idxblocks;
+
+ /*
+ * Now let's see how many group bitmaps and group descriptors need
+ * to account
+ */
+ groups = idxblocks;
+ if (chunk)
+ groups += 1;
+ else
+ groups += nrblocks;
+
+ gdpblocks = groups;
+ if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+ groups = EXT4_SB(inode->i_sb)->s_groups_count;
+ if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
+ gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
+
+ /* bitmaps and block group descriptor blocks */
+ ret += groups + gdpblocks;
+
+ /* Blocks for super block, inode, quota and xattr blocks */
+ ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
+
+ return ret;
+}
+
+/*
+ * Calulate the total number of credits to reserve to fit
+ * the modification of a single pages into a single transaction
*
- * With ordered or writeback data it's the same, less the N data blocks.
+ * This could be called via ext4_write_begin() or later
+ * ext4_da_writepages() in delalyed allocation case.
*
- * If the inode's direct blocks can hold an integral number of pages then a
- * page cannot straddle two indirect blocks, and we can only touch one indirect
- * and dindirect block, and the "5" above becomes "3".
+ * In both case it's possible that we could allocating multiple
+ * chunks of blocks. We need to consider the worse case, when
+ * one new block per extent.
*
- * This still overestimates under most circumstances. If we were to pass the
- * start and end offsets in here as well we could do block_to_path() on each
- * block and work out the exact number of indirects which are touched. Pah.
+ * For Direct IO and fallocate, the journal credits reservation
+ * is based on one single extent allocation, so they could use
+ * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
+ * chunk of allocation needs.
*/
-
int ext4_writepage_trans_blocks(struct inode *inode)
{
int bpp = ext4_journal_blocks_per_page(inode);
- int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
int ret;
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_writepage_trans_blocks(inode, bpp);
+ ret = ext4_meta_trans_blocks(inode, bpp, 0);
+ /* Account for data blocks for journalled mode */
if (ext4_should_journal_data(inode))
- ret = 3 * (bpp + indirects) + 2;
- else
- ret = 2 * (bpp + indirects) + 2;
-
-#ifdef CONFIG_QUOTA
- /* We know that structure was already allocated during DQUOT_INIT so
- * we will be updating only the data blocks + inodes */
- ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
+ ret += bpp;
return ret;
}
-
/*
* The caller must have previously called ext4_reserve_inode_write().
* Give this, we know that the caller already has write access to iloc->bh.