BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+ if (mdb_free) {
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs dirty blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ }
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
+ int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned long md_needed, mdblocks, total = 0;
* in order to allocate nrblocks
* worse case is one extent per block
*/
+repeat:
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
mdblocks = ext4_calc_metadata_amount(inode, total);
if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ yield();
+ goto repeat;
+ }
return -ENOSPC;
}
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
release = to_free + mdb_free;
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, release);
+ /* update fs dirty blocks counter for truncate case */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
/* update per-inode reservations */
BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
struct writeback_control *wbc;
int io_done;
long pages_written;
+ int retval;
};
/*
return;
}
+static void ext4_print_free_blocks(struct inode *inode)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ printk(KERN_EMERG "Total free blocks count %lld\n",
+ ext4_count_free_blocks(inode->i_sb));
+ printk(KERN_EMERG "Free/Dirty block details\n");
+ printk(KERN_EMERG "free_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_freeblocks_counter));
+ printk(KERN_EMERG "dirty_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+ printk(KERN_EMERG "Block reservation details\n");
+ printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_data_blocks);
+ printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_meta_blocks);
+ return;
+}
+
/*
* mpage_da_map_blocks - go through given space
*
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
{
int err = 0;
- struct buffer_head *lbh = &mpd->lbh;
- sector_t next = lbh->b_blocknr;
struct buffer_head new;
+ struct buffer_head *lbh = &mpd->lbh;
+ sector_t next;
/*
* We consider only non-mapped and non-allocated blocks
*/
if (buffer_mapped(lbh) && !buffer_delay(lbh))
return 0;
-
new.b_state = lbh->b_state;
new.b_blocknr = 0;
new.b_size = lbh->b_size;
-
+ next = lbh->b_blocknr;
/*
* If we didn't accumulate anything
* to write simply return
*/
if (err == -EAGAIN)
return 0;
+
+ if (err == -ENOSPC &&
+ ext4_count_free_blocks(mpd->inode->i_sb)) {
+ mpd->retval = err;
+ return 0;
+ }
+
/*
* get block failure will cause us
* to loop in writepages. Because
lbh->b_size >> mpd->inode->i_blkbits, err);
printk(KERN_EMERG "This should not happen.!! "
"Data will be lost\n");
+ if (err == -ENOSPC) {
+ ext4_print_free_blocks(mpd->inode);
+ }
/* invlaidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
lbh->b_size >> mpd->inode->i_blkbits);
*/
static int mpage_da_writepages(struct address_space *mapping,
struct writeback_control *wbc,
- get_block_t get_block)
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data mpd;
long to_write;
int ret;
- if (!get_block)
+ if (!mpd->get_block)
return generic_writepages(mapping, wbc);
- mpd.wbc = wbc;
- mpd.inode = mapping->host;
- mpd.lbh.b_size = 0;
- mpd.lbh.b_state = 0;
- mpd.lbh.b_blocknr = 0;
- mpd.first_page = 0;
- mpd.next_page = 0;
- mpd.get_block = get_block;
- mpd.io_done = 0;
- mpd.pages_written = 0;
+ mpd->lbh.b_size = 0;
+ mpd->lbh.b_state = 0;
+ mpd->lbh.b_blocknr = 0;
+ mpd->first_page = 0;
+ mpd->next_page = 0;
+ mpd->io_done = 0;
+ mpd->pages_written = 0;
+ mpd->retval = 0;
to_write = wbc->nr_to_write;
- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
+ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
/*
* Handle last extent of pages
*/
- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
}
- wbc->nr_to_write = to_write - mpd.pages_written;
+ wbc->nr_to_write = to_write - mpd->pages_written;
return ret;
}
handle_t *handle = NULL;
handle = ext4_journal_current_handle();
- if (!handle) {
- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, 0, 0, 0);
- BUG_ON(!ret);
- } else {
- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, create, 0, EXT4_DELALLOC_RSVED);
- }
-
+ BUG_ON(!handle);
+ ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+ bh_result, create, 0, EXT4_DELALLOC_RSVED);
if (ret > 0) {
+
bh_result->b_size = (ret << inode->i_blkbits);
+ if (ext4_should_order_data(inode)) {
+ int retval;
+ retval = ext4_jbd2_file_inode(handle, inode);
+ if (retval)
+ /*
+ * Failed to add inode for ordered
+ * mode. Don't update file size
+ */
+ return retval;
+ }
+
/*
* Update on-disk size along with block allocation
* we don't use 'extend_disksize' as size may change
{
handle_t *handle = NULL;
loff_t range_start = 0;
+ struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
long to_write, pages_skipped = 0;
range_start = wbc->range_start;
pages_skipped = wbc->pages_skipped;
+ mpd.wbc = wbc;
+ mpd.inode = mapping->host;
+
restart_loop:
to_write = wbc->nr_to_write;
while (!ret && to_write > 0) {
dump_stack();
goto out_writepages;
}
- if (ext4_should_order_data(inode)) {
- /*
- * With ordered mode we need to add
- * the inode to the journal handl
- * when we do block allocation.
- */
- ret = ext4_jbd2_file_inode(handle, inode);
- if (ret) {
- ext4_journal_stop(handle);
- goto out_writepages;
- }
- }
-
to_write -= wbc->nr_to_write;
- ret = mpage_da_writepages(mapping, wbc,
- ext4_da_get_block_write);
+
+ mpd.get_block = ext4_da_get_block_write;
+ ret = mpage_da_writepages(mapping, wbc, &mpd);
+
ext4_journal_stop(handle);
+
+ if (mpd.retval == -ENOSPC)
+ jbd2_journal_force_commit_nested(sbi->s_journal);
+
+ /* reset the retry count */
if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
return ret;
}
+#define FALL_BACK_TO_NONDELALLOC 1
+static int ext4_nonda_switch(struct super_block *sb)
+{
+ s64 free_blocks, dirty_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /*
+ * switch to non delalloc mode if we are running low
+ * on free block. The free block accounting via percpu
+ * counters can get slightly wrong with FBC_BATCH getting
+ * accumulated on each CPU without updating global counters
+ * Delalloc need an accurate free block accounting. So switch
+ * to non delalloc when we are near to error range.
+ */
+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+ if (2 * free_blocks < 3 * dirty_blocks ||
+ free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+ /*
+ * free block count is less that 150% of dirty blocks
+ * or free blocks is less that watermark
+ */
+ return 1;
+ }
+ return 0;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
+ if (ext4_nonda_switch(inode->i_sb)) {
+ *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
+ return ext4_write_begin(file, mapping, pos,
+ len, flags, pagep, fsdata);
+ }
+ *fsdata = (void *)0;
retry:
/*
* With delayed allocation, we don't log the i_disksize update
handle_t *handle = ext4_journal_current_handle();
loff_t new_i_size;
unsigned long start, end;
+ int write_mode = (int)(unsigned long)fsdata;
+
+ if (write_mode == FALL_BACK_TO_NONDELALLOC) {
+ if (ext4_should_order_data(inode)) {
+ return ext4_ordered_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
+ } else if (ext4_should_writeback_data(inode)) {
+ return ext4_writeback_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
+ } else {
+ BUG();
+ }
+ }
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
loff_t size;
unsigned long len;
int ret = -EINVAL;
+ void *fsdata;
struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
* on the same page though
*/
ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
- len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+ len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
if (ret < 0)
goto out_unlock;
ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
- len, len, page, NULL);
+ len, len, page, fsdata);
if (ret < 0)
goto out_unlock;
ret = 0;