From: Linus Torvalds Date: Fri, 3 Apr 2009 18:10:33 +0000 (-0700) Subject: Merge branch 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git... X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=20bec8ab1458c24bed0d5492ee15d87807fc415a;hp=-c;p=linux-2.6-omap-h63xx.git Merge branch 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 * 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext3: Add replace-on-rename hueristics for data=writeback mode ext3: Add replace-on-truncate hueristics for data=writeback mode ext3: Use WRITE_SYNC for commits which are caused by fsync() block_write_full_page: Use synchronous writes for WBC_SYNC_ALL writebacks --- 20bec8ab1458c24bed0d5492ee15d87807fc415a diff --combined fs/buffer.c index c2fa1be4923,e7ebd95e0c6..5d55a896ff7 --- a/fs/buffer.c +++ b/fs/buffer.c @@@ -165,6 -165,151 +165,6 @@@ void end_buffer_write_sync(struct buffe put_bh(bh); } -/* - * Write out and wait upon all the dirty data associated with a block - * device via its mapping. Does not take the superblock lock. - */ -int sync_blockdev(struct block_device *bdev) -{ - int ret = 0; - - if (bdev) - ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); - return ret; -} -EXPORT_SYMBOL(sync_blockdev); - -/* - * Write out and wait upon all dirty data associated with this - * device. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_bdev(struct block_device *bdev) -{ - struct super_block *sb = get_super(bdev); - if (sb) { - int res = fsync_super(sb); - drop_super(sb); - return res; - } - return sync_blockdev(bdev); -} - -/** - * freeze_bdev -- lock a filesystem and force it into a consistent state - * @bdev: blockdevice to lock - * - * This takes the block device bd_mount_sem to make sure no new mounts - * happen on bdev until thaw_bdev() is called. - * If a superblock is found on this device, we take the s_umount semaphore - * on it to make sure nobody unmounts until the snapshot creation is done. - * The reference counter (bd_fsfreeze_count) guarantees that only the last - * unfreeze process can unfreeze the frozen filesystem actually when multiple - * freeze requests arrive simultaneously. It counts up in freeze_bdev() and - * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze - * actually. - */ -struct super_block *freeze_bdev(struct block_device *bdev) -{ - struct super_block *sb; - int error = 0; - - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - bdev->bd_fsfreeze_count++; - sb = get_super(bdev); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; - } - bdev->bd_fsfreeze_count++; - - down(&bdev->bd_mount_sem); - sb = get_super(bdev); - if (sb && !(sb->s_flags & MS_RDONLY)) { - sb->s_frozen = SB_FREEZE_WRITE; - smp_wmb(); - - __fsync_super(sb); - - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); - - sync_blockdev(sb->s_bdev); - - if (sb->s_op->freeze_fs) { - error = sb->s_op->freeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; - drop_super(sb); - up(&bdev->bd_mount_sem); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } - } - } - - sync_blockdev(bdev); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - - return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ -} -EXPORT_SYMBOL(freeze_bdev); - -/** - * thaw_bdev -- unlock filesystem - * @bdev: blockdevice to unlock - * @sb: associated superblock - * - * Unlocks the filesystem and marks it writeable again after freeze_bdev(). - */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - int error = 0; - - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return -EINVAL; - } - - bdev->bd_fsfreeze_count--; - if (bdev->bd_fsfreeze_count > 0) { - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; - } - - if (sb) { - BUG_ON(sb->s_bdev != bdev); - if (!(sb->s_flags & MS_RDONLY)) { - if (sb->s_op->unfreeze_fs) { - error = sb->s_op->unfreeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; - bdev->bd_fsfreeze_count++; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return error; - } - } - sb->s_frozen = SB_UNFROZEN; - smp_wmb(); - wake_up(&sb->s_wait_unfrozen); - } - drop_super(sb); - } - - up(&bdev->bd_mount_sem); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; -} -EXPORT_SYMBOL(thaw_bdev); - /* * Various filesystems appear to want __find_get_block to be non-blocking. * But it's the page lock which protects the buffers. To get around this, @@@ -199,13 -344,13 +199,13 @@@ __find_get_block_slow(struct block_devi head = page_buffers(page); bh = head; do { - if (bh->b_blocknr == block) { + if (!buffer_mapped(bh)) + all_mapped = 0; + else if (bh->b_blocknr == block) { ret = bh; get_bh(bh); goto out_unlock; } - if (!buffer_mapped(bh)) - all_mapped = 0; bh = bh->b_this_page; } while (bh != head); @@@ -290,7 -435,7 +290,7 @@@ static void free_more_memory(void &zone); if (zone) try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, - GFP_NOFS); + GFP_NOFS, NULL); } } @@@ -547,39 -692,6 +547,39 @@@ repeat return err; } +void do_thaw_all(unsigned long unused) +{ + struct super_block *sb; + char b[BDEVNAME_SIZE]; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + printk(KERN_WARNING "Emergency Thaw on %s\n", + bdevname(sb->s_bdev, b)); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + printk(KERN_WARNING "Emergency Thaw complete\n"); +} + +/** + * emergency_thaw_all -- forcibly thaw every frozen filesystem + * + * Used for emergency unfreeze of all filesystems via SysRq + */ +void emergency_thaw_all(void) +{ + pdflush_operation(do_thaw_all, 0); +} + /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written @@@ -654,7 -766,14 +654,7 @@@ static void __set_page_dirty(struct pag spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); - - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_dirty_inc(current); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } @@@ -1595,6 -1714,7 +1595,7 @@@ static int __block_write_full_page(stru struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); @@@ -1686,7 -1806,7 +1687,7 @@@ do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); + submit_bh(write_op, bh); nr_underway++; } bh = next; @@@ -1740,7 -1860,7 +1741,7 @@@ recover struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh(WRITE, bh); + submit_bh(write_op, bh); nr_underway++; } bh = next; @@@ -2346,14 -2466,13 +2347,14 @@@ int block_commit_write(struct page *pag * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; - int ret = -EINVAL; + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ lock_page(page); size = i_size_read(inode); @@@ -2373,13 -2492,6 +2374,13 @@@ if (!ret) ret = block_commit_write(page, 0, end); + if (unlikely(ret)) { + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else /* -ENOSPC, -EIO, etc */ + ret = VM_FAULT_SIGBUS; + } + out_unlock: unlock_page(page); return ret; @@@ -3315,6 -3427,7 +3316,6 @@@ EXPORT_SYMBOL(cont_write_begin) EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); -EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); diff --combined fs/ext3/file.c index 521f8238b2f,4a04cbb1c23..5b49704b231 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@@ -33,6 -33,10 +33,10 @@@ */ static int ext3_release_file (struct inode * inode, struct file * filp) { + if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { + filemap_flush(inode->i_mapping); + EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; + } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) @@@ -112,7 -116,7 +116,7 @@@ const struct file_operations ext3_file_ .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, - .ioctl = ext3_ioctl, + .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif diff --combined fs/ext3/inode.c index d3ef6566b01,0f5bca0d82f..466a332e0bd --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@@ -1149,15 -1149,12 +1149,15 @@@ static int ext3_write_begin(struct fil struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - int ret, needed_blocks = ext3_writepage_trans_blocks(inode); + int ret; handle_t *handle; int retries = 0; struct page *page; pgoff_t index; unsigned from, to; + /* Reserve one block more for addition to orphan list in case + * we allocate blocks but write fails for some reason */ + int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); @@@ -1187,19 -1184,14 +1187,19 @@@ retry } write_begin_failed: if (ret) { - ext3_journal_stop(handle); - unlock_page(page); - page_cache_release(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. + * + * Add inode to orphan list in case we crash before truncate + * finishes. */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); if (pos + len > inode->i_size) vmtruncate(inode, inode->i_size); } @@@ -1219,18 -1211,6 +1219,18 @@@ int ext3_journal_dirty_data(handle_t *h return err; } +/* For ordered writepage and write_end functions */ +static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +{ + /* + * Write could have mapped the buffer but it didn't copy the data in + * yet. So avoid filing such buffer into a transaction. + */ + if (buffer_mapped(bh) && buffer_uptodate(bh)) + return ext3_journal_dirty_data(handle, bh); + return 0; +} + /* For write_end() in data=journal mode */ static int write_end_fn(handle_t *handle, struct buffer_head *bh) { @@@ -1241,20 -1221,26 +1241,20 @@@ } /* - * Generic write_end handler for ordered and writeback ext3 journal modes. - * We can't use generic_write_end, because that unlocks the page and we need to - * unlock the page after ext3_journal_stop, but ext3_journal_stop must run - * after block_write_end. + * This is nasty and subtle: ext3_write_begin() could have allocated blocks + * for the whole page but later we failed to copy the data in. Update inode + * size according to what we managed to copy. The rest is going to be + * truncated in write_end function. */ -static int ext3_generic_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) { - struct inode *inode = file->f_mapping->host; - - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); + /* What matters to us is i_disksize. We don't write i_size anywhere */ + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + if (pos + copied > EXT3_I(inode)->i_disksize) { + EXT3_I(inode)->i_disksize = pos + copied; mark_inode_dirty(inode); } - - return copied; } /* @@@ -1274,29 -1260,35 +1274,29 @@@ static int ext3_ordered_write_end(struc unsigned from, to; int ret = 0, ret2; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + copied; ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, ext3_journal_dirty_data); + from, to, NULL, journal_dirty_data_fn); - if (ret == 0) { - /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. - */ - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; - } + if (ret == 0) + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@@ -1307,22 -1299,25 +1307,22 @@@ static int ext3_writeback_write_end(str { handle_t *handle = ext3_journal_current_handle(); struct inode *inode = file->f_mapping->host; - int ret = 0, ret2; - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; + int ret; - ret2 = ext3_journal_stop(handle); - if (!ret) - ret = ret2; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ret = ext3_journal_stop(handle); unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@@ -1343,23 -1338,15 +1343,23 @@@ static int ext3_journalled_write_end(st if (copied < len) { if (!PageUptodate(page)) copied = 0; - page_zero_new_buffers(page, from+copied, to); + page_zero_new_buffers(page, from + copied, to); + to = from + copied; } ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos+copied > inode->i_size) - i_size_write(inode, pos+copied); + + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; @@@ -1374,8 -1361,6 +1374,8 @@@ unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@@ -1443,9 -1428,11 +1443,9 @@@ static int bput_one(handle_t *handle, s return 0; } -static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) { - if (buffer_mapped(bh)) - return ext3_journal_dirty_data(handle, bh); - return 0; + return !buffer_mapped(bh); } /* @@@ -1518,15 -1505,6 +1518,15 @@@ static int ext3_ordered_writepage(struc if (ext3_journal_current_handle()) goto out_fail; + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { + /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ + return block_write_full_page(page, NULL, wbc); + } + page_bufs = page_buffers(page); + handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { @@@ -1534,6 -1512,11 +1534,6 @@@ goto out_fail; } - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - page_bufs = page_buffers(page); walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bget_one); @@@ -2363,6 -2346,9 +2363,9 @@@ void ext3_truncate(struct inode *inode if (!ext3_can_truncate(inode)) return; + if (inode->i_size == 0 && ext3_should_writeback_data(inode)) + ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + /* * We have to lock the EOF page here, because lock_page() nests * outside journal_start(). @@@ -3072,7 -3058,7 +3075,7 @@@ int ext3_setattr(struct dentry *dentry error = PTR_ERR(handle); goto err_out; } - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; if (error) { ext3_journal_stop(handle); return error; @@@ -3163,7 -3149,7 +3166,7 @@@ static int ext3_writepage_trans_blocks( ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so + /* We know that structure was already allocated during vfs_dq_init so * we will be updating only the data blocks + inodes */ ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); #endif @@@ -3254,7 -3240,7 +3257,7 @@@ int ext3_mark_inode_dirty(handle_t *han * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks + * Also, vfs_dq_alloc_space() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing diff --combined fs/ext3/namei.c index 6ddaa0a42b2,ab98a66ab8c..6ff7b973023 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@@ -161,12 -161,12 +161,12 @@@ static struct dx_frame *dx_probe(struc struct dx_frame *frame, int *err); static void dx_release (struct dx_frame *frames); -static int dx_make_map (struct ext3_dir_entry_2 *de, int size, +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, struct dx_hash_info *hinfo, struct dx_map_entry map[]); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, struct dx_map_entry *offsets, int count); -static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize); static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, @@@ -708,14 -708,14 +708,14 @@@ errout * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map (struct ext3_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; char *base = (char *) de; struct dx_hash_info h = *hinfo; - while ((char *) de < base + size) + while ((char *) de < base + blocksize) { if (de->name_len && de->inode) { ext3fs_dirhash(de->name, de->name_len, &h); @@@ -1047,16 -1047,8 +1047,16 @@@ static struct dentry *ext3_lookup(struc return ERR_PTR(-EIO); } inode = ext3_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext3_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } @@@ -1128,14 -1120,13 +1128,14 @@@ dx_move_dirents(char *from, char *to, s * Compact each dir entry in the range to the minimal rec_len. * Returns pointer to last entry in range. */ -static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize) { - struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; + struct ext3_dir_entry_2 *next, *to, *prev; + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base; unsigned rec_len = 0; prev = to = de; - while ((char*)de < base + size) { + while ((char *)de < base + blocksize) { next = ext3_next_entry(de); if (de->inode && de->name_len) { rec_len = EXT3_DIR_REC_LEN(de->name_len); @@@ -2058,7 -2049,7 +2058,7 @@@ static int ext3_rmdir (struct inode * d /* Initialize quotas before so that eventual writes go in * separate transaction */ - DQUOT_INIT(dentry->d_inode); + vfs_dq_init(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@@ -2117,7 -2108,7 +2117,7 @@@ static int ext3_unlink(struct inode * d /* Initialize quotas before so that eventual writes go * in separate transaction */ - DQUOT_INIT(dentry->d_inode); + vfs_dq_init(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@@ -2274,14 -2265,14 +2274,14 @@@ static int ext3_rename (struct inode * struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; struct ext3_dir_entry_2 * old_de, * new_de; - int retval; + int retval, flush_file = 0; old_bh = new_bh = dir_bh = NULL; /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new_dentry->d_inode) - DQUOT_INIT(new_dentry->d_inode); + vfs_dq_init(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); @@@ -2410,6 -2401,8 +2410,8 @@@ ext3_mark_inode_dirty(handle, new_inode); if (!new_inode->i_nlink) ext3_orphan_add(handle, new_inode); + if (ext3_should_writeback_data(new_inode)) + flush_file = 1; } retval = 0; @@@ -2418,6 -2411,8 +2420,8 @@@ end_rename brelse (old_bh); brelse (new_bh); ext3_journal_stop(handle); + if (retval == 0 && flush_file) + filemap_flush(old_inode->i_mapping); return retval; } diff --combined include/linux/ext3_fs.h index e263acaa405,d2630c56cb3..634a5e5aba3 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@@ -208,6 -208,7 +208,7 @@@ static inline __u32 ext3_mask_flags(umo #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ + #define EXT3_STATE_FLUSH_ON_CLOSE 0x00000008 /* Used to pass group descriptor data when online resize is done */ struct ext3_new_group_input { @@@ -893,8 -894,9 +894,8 @@@ extern int ext3_fiemap(struct inode *in u64 start, u64 len); /* ioctl.c */ -extern int ext3_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); -extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); +extern long ext3_ioctl(struct file *, unsigned int, unsigned long); +extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ extern int ext3_orphan_add(handle_t *, struct inode *);