Merge branch 'ext3-latency-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)
diff --combined fs/buffer.c

index c2fa1be4923d19885dab161d45a53588ff296ac5,e7ebd95e0c680979f31ad1eec04f021b8bb03e42..5d55a896ff78f2a79ab9bc2dee72dae77ddb93eb
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -165,6 -165,151 +165,6 @@@ void end_buffer_write_sync(struct buffe
         put_bh(bh);
   }
   
- -/*
- - * Write out and wait upon all the dirty data associated with a block
- - * device via its mapping.  Does not take the superblock lock.
- - */
- -int sync_blockdev(struct block_device *bdev)
- -{
- -      int ret = 0;
- -
- -      if (bdev)
- -              ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
- -      return ret;
- -}
- -EXPORT_SYMBOL(sync_blockdev);
- -
- -/*
- - * Write out and wait upon all dirty data associated with this
- - * device.   Filesystem data as well as the underlying block
- - * device.  Takes the superblock lock.
- - */
- -int fsync_bdev(struct block_device *bdev)
- -{
- -      struct super_block *sb = get_super(bdev);
- -      if (sb) {
- -              int res = fsync_super(sb);
- -              drop_super(sb);
- -              return res;
- -      }
- -      return sync_blockdev(bdev);
- -}
- -
- -/**
- - * freeze_bdev  --  lock a filesystem and force it into a consistent state
- - * @bdev:     blockdevice to lock
- - *
- - * This takes the block device bd_mount_sem to make sure no new mounts
- - * happen on bdev until thaw_bdev() is called.
- - * If a superblock is found on this device, we take the s_umount semaphore
- - * on it to make sure nobody unmounts until the snapshot creation is done.
- - * The reference counter (bd_fsfreeze_count) guarantees that only the last
- - * unfreeze process can unfreeze the frozen filesystem actually when multiple
- - * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
- - * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
- - * actually.
- - */
- -struct super_block *freeze_bdev(struct block_device *bdev)
- -{
- -      struct super_block *sb;
- -      int error = 0;
- -
- -      mutex_lock(&bdev->bd_fsfreeze_mutex);
- -      if (bdev->bd_fsfreeze_count > 0) {
- -              bdev->bd_fsfreeze_count++;
- -              sb = get_super(bdev);
- -              mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -              return sb;
- -      }
- -      bdev->bd_fsfreeze_count++;
- -
- -      down(&bdev->bd_mount_sem);
- -      sb = get_super(bdev);
- -      if (sb && !(sb->s_flags & MS_RDONLY)) {
- -              sb->s_frozen = SB_FREEZE_WRITE;
- -              smp_wmb();
- -
- -              __fsync_super(sb);
- -
- -              sb->s_frozen = SB_FREEZE_TRANS;
- -              smp_wmb();
- -
- -              sync_blockdev(sb->s_bdev);
- -
- -              if (sb->s_op->freeze_fs) {
- -                      error = sb->s_op->freeze_fs(sb);
- -                      if (error) {
- -                              printk(KERN_ERR
- -                                      "VFS:Filesystem freeze failed\n");
- -                              sb->s_frozen = SB_UNFROZEN;
- -                              drop_super(sb);
- -                              up(&bdev->bd_mount_sem);
- -                              bdev->bd_fsfreeze_count--;
- -                              mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -                              return ERR_PTR(error);
- -                      }
- -              }
- -      }
- -
- -      sync_blockdev(bdev);
- -      mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -
- -      return sb;      /* thaw_bdev releases s->s_umount and bd_mount_sem */
- -}
- -EXPORT_SYMBOL(freeze_bdev);
- -
- -/**
- - * thaw_bdev  -- unlock filesystem
- - * @bdev:     blockdevice to unlock
- - * @sb:               associated superblock
- - *
- - * Unlocks the filesystem and marks it writeable again after freeze_bdev().
- - */
- -int thaw_bdev(struct block_device *bdev, struct super_block *sb)
- -{
- -      int error = 0;
- -
- -      mutex_lock(&bdev->bd_fsfreeze_mutex);
- -      if (!bdev->bd_fsfreeze_count) {
- -              mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -              return -EINVAL;
- -      }
- -
- -      bdev->bd_fsfreeze_count--;
- -      if (bdev->bd_fsfreeze_count > 0) {
- -              if (sb)
- -                      drop_super(sb);
- -              mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -              return 0;
- -      }
- -
- -      if (sb) {
- -              BUG_ON(sb->s_bdev != bdev);
- -              if (!(sb->s_flags & MS_RDONLY)) {
- -                      if (sb->s_op->unfreeze_fs) {
- -                              error = sb->s_op->unfreeze_fs(sb);
- -                              if (error) {
- -                                      printk(KERN_ERR
- -                                              "VFS:Filesystem thaw failed\n");
- -                                      sb->s_frozen = SB_FREEZE_TRANS;
- -                                      bdev->bd_fsfreeze_count++;
- -                                      mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -                                      return error;
- -                              }
- -                      }
- -                      sb->s_frozen = SB_UNFROZEN;
- -                      smp_wmb();
- -                      wake_up(&sb->s_wait_unfrozen);
- -              }
- -              drop_super(sb);
- -      }
- -
- -      up(&bdev->bd_mount_sem);
- -      mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -      return 0;
- -}
- -EXPORT_SYMBOL(thaw_bdev);
- -
   /*
    * Various filesystems appear to want __find_get_block to be non-blocking.
    * But it's the page lock which protects the buffers.  To get around this,
@@@ -199,13 -344,13 +199,13 @@@ __find_get_block_slow(struct block_devi
         head = page_buffers(page);
         bh = head;
         do {
- -              if (bh->b_blocknr == block) {
+ +              if (!buffer_mapped(bh))
+ +                      all_mapped = 0;
+ +              else if (bh->b_blocknr == block) {
                         ret = bh;
                         get_bh(bh);
                         goto out_unlock;
                 }
- -              if (!buffer_mapped(bh))
- -                      all_mapped = 0;
                 bh = bh->b_this_page;
         } while (bh != head);
   
@@@ -290,7 -435,7 +290,7 @@@ static void free_more_memory(void
                                                 &zone);
                 if (zone)
                         try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
- -                                              GFP_NOFS);
+ +                                              GFP_NOFS, NULL);
         }
   }
   
@@@ -547,39 -692,6 +547,39 @@@ repeat
         return err;
   }
   
+ +void do_thaw_all(unsigned long unused)
+ +{
+ +      struct super_block *sb;
+ +      char b[BDEVNAME_SIZE];
+ +
+ +      spin_lock(&sb_lock);
+ +restart:
+ +      list_for_each_entry(sb, &super_blocks, s_list) {
+ +              sb->s_count++;
+ +              spin_unlock(&sb_lock);
+ +              down_read(&sb->s_umount);
+ +              while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+ +                      printk(KERN_WARNING "Emergency Thaw on %s\n",
+ +                             bdevname(sb->s_bdev, b));
+ +              up_read(&sb->s_umount);
+ +              spin_lock(&sb_lock);
+ +              if (__put_super_and_need_restart(sb))
+ +                      goto restart;
+ +      }
+ +      spin_unlock(&sb_lock);
+ +      printk(KERN_WARNING "Emergency Thaw complete\n");
+ +}
+ +
+ +/**
+ + * emergency_thaw_all -- forcibly thaw every frozen filesystem
+ + *
+ + * Used for emergency unfreeze of all filesystems via SysRq
+ + */
+ +void emergency_thaw_all(void)
+ +{
+ +      pdflush_operation(do_thaw_all, 0);
+ +}
+ +
   /**
    * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
    * @mapping: the mapping which wants those buffers written
@@@ -654,7 -766,14 +654,7 @@@ static void __set_page_dirty(struct pag
         spin_lock_irq(&mapping->tree_lock);
         if (page->mapping) {    /* Race with truncate? */
                 WARN_ON_ONCE(warn && !PageUptodate(page));
- -
- -              if (mapping_cap_account_dirty(mapping)) {
- -                      __inc_zone_page_state(page, NR_FILE_DIRTY);
- -                      __inc_bdi_stat(mapping->backing_dev_info,
- -                                      BDI_RECLAIMABLE);
- -                      task_dirty_inc(current);
- -                      task_io_account_write(PAGE_CACHE_SIZE);
- -              }
+ +              account_page_dirtied(page, mapping);
                 radix_tree_tag_set(&mapping->page_tree,
                                 page_index(page), PAGECACHE_TAG_DIRTY);
         }
@@@ -1595,6 -1714,7 +1595,7 @@@ static int __block_write_full_page(stru
         struct buffer_head *bh, *head;
         const unsigned blocksize = 1 << inode->i_blkbits;
         int nr_underway = 0;
+       int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
   
         BUG_ON(!PageLocked(page));
   
@@@ -1686,7 -1806,7 +1687,7 @@@
         do {
                 struct buffer_head *next = bh->b_this_page;
                 if (buffer_async_write(bh)) {
-                       submit_bh(WRITE, bh);
+                       submit_bh(write_op, bh);
                         nr_underway++;
                 }
                 bh = next;
@@@ -1740,7 -1860,7 +1741,7 @@@ recover
                 struct buffer_head *next = bh->b_this_page;
                 if (buffer_async_write(bh)) {
                         clear_buffer_dirty(bh);
-                       submit_bh(WRITE, bh);
+                       submit_bh(write_op, bh);
                         nr_underway++;
                 }
                 bh = next;
@@@ -2346,14 -2466,13 +2347,14 @@@ int block_commit_write(struct page *pag
    * unlock the page.
    */
   int
- -block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+ +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                    get_block_t get_block)
   {
+ +      struct page *page = vmf->page;
         struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
         unsigned long end;
         loff_t size;
- -      int ret = -EINVAL;
+ +      int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
   
         lock_page(page);
         size = i_size_read(inode);
@@@ -2373,13 -2492,6 +2374,13 @@@
         if (!ret)
                 ret = block_commit_write(page, 0, end);
   
+ +      if (unlikely(ret)) {
+ +              if (ret == -ENOMEM)
+ +                      ret = VM_FAULT_OOM;
+ +              else /* -ENOSPC, -EIO, etc */
+ +                      ret = VM_FAULT_SIGBUS;
+ +      }
+ +
   out_unlock:
         unlock_page(page);
         return ret;
@@@ -3315,6 -3427,7 +3316,6 @@@ EXPORT_SYMBOL(cont_write_begin)
   EXPORT_SYMBOL(end_buffer_read_sync);
   EXPORT_SYMBOL(end_buffer_write_sync);
   EXPORT_SYMBOL(file_fsync);
- -EXPORT_SYMBOL(fsync_bdev);
   EXPORT_SYMBOL(generic_block_bmap);
   EXPORT_SYMBOL(generic_cont_expand_simple);
   EXPORT_SYMBOL(init_buffer);
diff --combined fs/ext3/file.c

index 521f8238b2fa1e967b47040355e5834fe3fc3e53,4a04cbb1c231507b2fe95ec48408eb5a57f5e76a..5b49704b231b27b53a8d52dc15a1c62118f6124f
--- 1/fs/ext3/file.c
--- 2/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@@ -33,6 -33,10 +33,10 @@@
    */
   static int ext3_release_file (struct inode * inode, struct file * filp)
   {
+       if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) {
+               filemap_flush(inode->i_mapping);
+               EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE;
+       }
         /* if we are the last writer on the inode, drop the block reservation */
         if ((filp->f_mode & FMODE_WRITE) &&
                         (atomic_read(&inode->i_writecount) == 1))
@@@ -112,7 -116,7 +116,7 @@@ const struct file_operations ext3_file_
         .write          = do_sync_write,
         .aio_read       = generic_file_aio_read,
         .aio_write      = ext3_file_write,
- -      .ioctl          = ext3_ioctl,
+ +      .unlocked_ioctl = ext3_ioctl,
   #ifdef CONFIG_COMPAT
         .compat_ioctl   = ext3_compat_ioctl,
   #endif
diff --combined fs/ext3/inode.c

index d3ef6566b0190340b21f94402a811f5a45d4f38d,0f5bca0d82fc8f53b80d91c0c45beec3dcd2623c..466a332e0bd124c871143cfd682392074252aa76
--- 1/fs/ext3/inode.c
--- 2/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@@ -1149,15 -1149,12 +1149,15 @@@ static int ext3_write_begin(struct fil
                                 struct page **pagep, void **fsdata)
   {
         struct inode *inode = mapping->host;
- -      int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+ +      int ret;
         handle_t *handle;
         int retries = 0;
         struct page *page;
         pgoff_t index;
         unsigned from, to;
+ +      /* Reserve one block more for addition to orphan list in case
+ +       * we allocate blocks but write fails for some reason */
+ +      int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
   
         index = pos >> PAGE_CACHE_SHIFT;
         from = pos & (PAGE_CACHE_SIZE - 1);
@@@ -1187,19 -1184,14 +1187,19 @@@ retry
         }
   write_begin_failed:
         if (ret) {
- -              ext3_journal_stop(handle);
- -              unlock_page(page);
- -              page_cache_release(page);
                 /*
                  * block_write_begin may have instantiated a few blocks
                  * outside i_size.  Trim these off again. Don't need
                  * i_size_read because we hold i_mutex.
+ +               *
+ +               * Add inode to orphan list in case we crash before truncate
+ +               * finishes.
                  */
+ +              if (pos + len > inode->i_size)
+ +                      ext3_orphan_add(handle, inode);
+ +              ext3_journal_stop(handle);
+ +              unlock_page(page);
+ +              page_cache_release(page);
                 if (pos + len > inode->i_size)
                         vmtruncate(inode, inode->i_size);
         }
@@@ -1219,18 -1211,6 +1219,18 @@@ int ext3_journal_dirty_data(handle_t *h
         return err;
   }
   
+ +/* For ordered writepage and write_end functions */
+ +static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
+ +{
+ +      /*
+ +       * Write could have mapped the buffer but it didn't copy the data in
+ +       * yet. So avoid filing such buffer into a transaction.
+ +       */
+ +      if (buffer_mapped(bh) && buffer_uptodate(bh))
+ +              return ext3_journal_dirty_data(handle, bh);
+ +      return 0;
+ +}
+ +
   /* For write_end() in data=journal mode */
   static int write_end_fn(handle_t *handle, struct buffer_head *bh)
   {
@@@ -1241,20 -1221,26 +1241,20 @@@
   }
   
   /*
- - * Generic write_end handler for ordered and writeback ext3 journal modes.
- - * We can't use generic_write_end, because that unlocks the page and we need to
- - * unlock the page after ext3_journal_stop, but ext3_journal_stop must run
- - * after block_write_end.
+ + * This is nasty and subtle: ext3_write_begin() could have allocated blocks
+ + * for the whole page but later we failed to copy the data in. Update inode
+ + * size according to what we managed to copy. The rest is going to be
+ + * truncated in write_end function.
    */
- -static int ext3_generic_write_end(struct file *file,
- -                              struct address_space *mapping,
- -                              loff_t pos, unsigned len, unsigned copied,
- -                              struct page *page, void *fsdata)
+ +static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied)
   {
- -      struct inode *inode = file->f_mapping->host;
- -
- -      copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
- -
- -      if (pos+copied > inode->i_size) {
- -              i_size_write(inode, pos+copied);
+ +      /* What matters to us is i_disksize. We don't write i_size anywhere */
+ +      if (pos + copied > inode->i_size)
+ +              i_size_write(inode, pos + copied);
+ +      if (pos + copied > EXT3_I(inode)->i_disksize) {
+ +              EXT3_I(inode)->i_disksize = pos + copied;
                 mark_inode_dirty(inode);
         }
- -
- -      return copied;
   }
   
   /*
@@@ -1274,29 -1260,35 +1274,29 @@@ static int ext3_ordered_write_end(struc
         unsigned from, to;
         int ret = 0, ret2;
   
- -      from = pos & (PAGE_CACHE_SIZE - 1);
- -      to = from + len;
+ +      copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
   
+ +      from = pos & (PAGE_CACHE_SIZE - 1);
+ +      to = from + copied;
         ret = walk_page_buffers(handle, page_buffers(page),
- -              from, to, NULL, ext3_journal_dirty_data);
+ +              from, to, NULL, journal_dirty_data_fn);
   
- -      if (ret == 0) {
- -              /*
- -               * generic_write_end() will run mark_inode_dirty() if i_size
- -               * changes.  So let's piggyback the i_disksize mark_inode_dirty
- -               * into that.
- -               */
- -              loff_t new_i_size;
- -
- -              new_i_size = pos + copied;
- -              if (new_i_size > EXT3_I(inode)->i_disksize)
- -                      EXT3_I(inode)->i_disksize = new_i_size;
- -              ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
- -                                                      page, fsdata);
- -              copied = ret2;
- -              if (ret2 < 0)
- -                      ret = ret2;
- -      }
+ +      if (ret == 0)
+ +              update_file_sizes(inode, pos, copied);
+ +      /*
+ +       * There may be allocated blocks outside of i_size because
+ +       * we failed to copy some data. Prepare for truncate.
+ +       */
+ +      if (pos + len > inode->i_size)
+ +              ext3_orphan_add(handle, inode);
         ret2 = ext3_journal_stop(handle);
         if (!ret)
                 ret = ret2;
         unlock_page(page);
         page_cache_release(page);
   
+ +      if (pos + len > inode->i_size)
+ +              vmtruncate(inode, inode->i_size);
         return ret ? ret : copied;
   }
   
@@@ -1307,22 -1299,25 +1307,22 @@@ static int ext3_writeback_write_end(str
   {
         handle_t *handle = ext3_journal_current_handle();
         struct inode *inode = file->f_mapping->host;
- -      int ret = 0, ret2;
- -      loff_t new_i_size;
- -
- -      new_i_size = pos + copied;
- -      if (new_i_size > EXT3_I(inode)->i_disksize)
- -              EXT3_I(inode)->i_disksize = new_i_size;
- -
- -      ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
- -                                                      page, fsdata);
- -      copied = ret2;
- -      if (ret2 < 0)
- -              ret = ret2;
+ +      int ret;
   
- -      ret2 = ext3_journal_stop(handle);
- -      if (!ret)
- -              ret = ret2;
+ +      copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ +      update_file_sizes(inode, pos, copied);
+ +      /*
+ +       * There may be allocated blocks outside of i_size because
+ +       * we failed to copy some data. Prepare for truncate.
+ +       */
+ +      if (pos + len > inode->i_size)
+ +              ext3_orphan_add(handle, inode);
+ +      ret = ext3_journal_stop(handle);
         unlock_page(page);
         page_cache_release(page);
   
+ +      if (pos + len > inode->i_size)
+ +              vmtruncate(inode, inode->i_size);
         return ret ? ret : copied;
   }
   
@@@ -1343,23 -1338,15 +1343,23 @@@ static int ext3_journalled_write_end(st
         if (copied < len) {
                 if (!PageUptodate(page))
                         copied = 0;
- -              page_zero_new_buffers(page, from+copied, to);
+ +              page_zero_new_buffers(page, from + copied, to);
+ +              to = from + copied;
         }
   
         ret = walk_page_buffers(handle, page_buffers(page), from,
                                 to, &partial, write_end_fn);
         if (!partial)
                 SetPageUptodate(page);
- -      if (pos+copied > inode->i_size)
- -              i_size_write(inode, pos+copied);
+ +
+ +      if (pos + copied > inode->i_size)
+ +              i_size_write(inode, pos + copied);
+ +      /*
+ +       * There may be allocated blocks outside of i_size because
+ +       * we failed to copy some data. Prepare for truncate.
+ +       */
+ +      if (pos + len > inode->i_size)
+ +              ext3_orphan_add(handle, inode);
         EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
         if (inode->i_size > EXT3_I(inode)->i_disksize) {
                 EXT3_I(inode)->i_disksize = inode->i_size;
@@@ -1374,8 -1361,6 +1374,8 @@@
         unlock_page(page);
         page_cache_release(page);
   
+ +      if (pos + len > inode->i_size)
+ +              vmtruncate(inode, inode->i_size);
         return ret ? ret : copied;
   }
   
@@@ -1443,9 -1428,11 +1443,9 @@@ static int bput_one(handle_t *handle, s
         return 0;
   }
   
- -static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
+ +static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
   {
- -      if (buffer_mapped(bh))
- -              return ext3_journal_dirty_data(handle, bh);
- -      return 0;
+ +      return !buffer_mapped(bh);
   }
   
   /*
@@@ -1518,15 -1505,6 +1518,15 @@@ static int ext3_ordered_writepage(struc
         if (ext3_journal_current_handle())
                 goto out_fail;
   
+ +      if (!page_has_buffers(page)) {
+ +              create_empty_buffers(page, inode->i_sb->s_blocksize,
+ +                              (1 << BH_Dirty)|(1 << BH_Uptodate));
+ +      } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
+ +              /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */
+ +              return block_write_full_page(page, NULL, wbc);
+ +      }
+ +      page_bufs = page_buffers(page);
+ +
         handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
   
         if (IS_ERR(handle)) {
@@@ -1534,6 -1512,11 +1534,6 @@@
                 goto out_fail;
         }
   
- -      if (!page_has_buffers(page)) {
- -              create_empty_buffers(page, inode->i_sb->s_blocksize,
- -                              (1 << BH_Dirty)|(1 << BH_Uptodate));
- -      }
- -      page_bufs = page_buffers(page);
         walk_page_buffers(handle, page_bufs, 0,
                         PAGE_CACHE_SIZE, NULL, bget_one);
   
@@@ -2363,6 -2346,9 +2363,9 @@@ void ext3_truncate(struct inode *inode
         if (!ext3_can_truncate(inode))
                 return;
   
+       if (inode->i_size == 0 && ext3_should_writeback_data(inode))
+               ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
+ 
         /*
          * We have to lock the EOF page here, because lock_page() nests
          * outside journal_start().
@@@ -3072,7 -3058,7 +3075,7 @@@ int ext3_setattr(struct dentry *dentry
                         error = PTR_ERR(handle);
                         goto err_out;
                 }
- -              error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+ +              error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
                 if (error) {
                         ext3_journal_stop(handle);
                         return error;
@@@ -3163,7 -3149,7 +3166,7 @@@ static int ext3_writepage_trans_blocks(
                 ret = 2 * (bpp + indirects) + 2;
   
   #ifdef CONFIG_QUOTA
- -      /* We know that structure was already allocated during DQUOT_INIT so
+ +      /* We know that structure was already allocated during vfs_dq_init so
          * we will be updating only the data blocks + inodes */
         ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
   #endif
@@@ -3254,7 -3240,7 +3257,7 @@@ int ext3_mark_inode_dirty(handle_t *han
    * i_size has been changed by generic_commit_write() and we thus need
    * to include the updated inode in the current transaction.
    *
- - * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
+ + * Also, vfs_dq_alloc_space() will always dirty the inode when blocks
    * are allocated to the file.
    *
    * If the inode is marked synchronous, we don't honour that here - doing
diff --combined fs/ext3/namei.c

index 6ddaa0a42b24a1c7d3576ce1a5287d869f0c2f46,ab98a66ab8c76b15c0b68f68dfbcf0f0f8d64fd4..6ff7b9730234bd97f2a67a1d01b08692630549ba
--- 1/fs/ext3/namei.c
--- 2/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@@ -161,12 -161,12 +161,12 @@@ static struct dx_frame *dx_probe(struc
                                  struct dx_frame *frame,
                                  int *err);
   static void dx_release (struct dx_frame *frames);
- -static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+ +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize,
                         struct dx_hash_info *hinfo, struct dx_map_entry map[]);
   static void dx_sort_map(struct dx_map_entry *map, unsigned count);
   static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
                 struct dx_map_entry *offsets, int count);
- -static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+ +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize);
   static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
   static int ext3_htree_next_block(struct inode *dir, __u32 hash,
                                  struct dx_frame *frame,
@@@ -708,14 -708,14 +708,14 @@@ errout
    * Create map of hash values, offsets, and sizes, stored at end of block.
    * Returns number of entries mapped.
    */
- -static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
- -                      struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
+ +static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize,
+ +              struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
   {
         int count = 0;
         char *base = (char *) de;
         struct dx_hash_info h = *hinfo;
   
- -      while ((char *) de < base + size)
+ +      while ((char *) de < base + blocksize)
         {
                 if (de->name_len && de->inode) {
                         ext3fs_dirhash(de->name, de->name_len, &h);
@@@ -1047,16 -1047,8 +1047,16 @@@ static struct dentry *ext3_lookup(struc
                         return ERR_PTR(-EIO);
                 }
                 inode = ext3_iget(dir->i_sb, ino);
- -              if (IS_ERR(inode))
- -                      return ERR_CAST(inode);
+ +              if (unlikely(IS_ERR(inode))) {
+ +                      if (PTR_ERR(inode) == -ESTALE) {
+ +                              ext3_error(dir->i_sb, __func__,
+ +                                              "deleted inode referenced: %lu",
+ +                                              ino);
+ +                              return ERR_PTR(-EIO);
+ +                      } else {
+ +                              return ERR_CAST(inode);
+ +                      }
+ +              }
         }
         return d_splice_alias(inode, dentry);
   }
@@@ -1128,14 -1120,13 +1128,14 @@@ dx_move_dirents(char *from, char *to, s
    * Compact each dir entry in the range to the minimal rec_len.
    * Returns pointer to last entry in range.
    */
- -static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+ +static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize)
   {
- -      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
+ +      struct ext3_dir_entry_2 *next, *to, *prev;
+ +      struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base;
         unsigned rec_len = 0;
   
         prev = to = de;
- -      while ((char*)de < base + size) {
+ +      while ((char *)de < base + blocksize) {
                 next = ext3_next_entry(de);
                 if (de->inode && de->name_len) {
                         rec_len = EXT3_DIR_REC_LEN(de->name_len);
@@@ -2058,7 -2049,7 +2058,7 @@@ static int ext3_rmdir (struct inode * d
   
         /* Initialize quotas before so that eventual writes go in
          * separate transaction */
- -      DQUOT_INIT(dentry->d_inode);
+ +      vfs_dq_init(dentry->d_inode);
         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
@@@ -2117,7 -2108,7 +2117,7 @@@ static int ext3_unlink(struct inode * d
   
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
- -      DQUOT_INIT(dentry->d_inode);
+ +      vfs_dq_init(dentry->d_inode);
         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
@@@ -2274,14 -2265,14 +2274,14 @@@ static int ext3_rename (struct inode * 
         struct inode * old_inode, * new_inode;
         struct buffer_head * old_bh, * new_bh, * dir_bh;
         struct ext3_dir_entry_2 * old_de, * new_de;
-       int retval;
+       int retval, flush_file = 0;
   
         old_bh = new_bh = dir_bh = NULL;
   
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
         if (new_dentry->d_inode)
- -              DQUOT_INIT(new_dentry->d_inode);
+ +              vfs_dq_init(new_dentry->d_inode);
         handle = ext3_journal_start(old_dir, 2 *
                                         EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
                                         EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@@ -2410,6 -2401,8 +2410,8 @@@
                 ext3_mark_inode_dirty(handle, new_inode);
                 if (!new_inode->i_nlink)
                         ext3_orphan_add(handle, new_inode);
+               if (ext3_should_writeback_data(new_inode))
+                       flush_file = 1;
         }
         retval = 0;
   
@@@ -2418,6 -2411,8 +2420,8 @@@ end_rename
         brelse (old_bh);
         brelse (new_bh);
         ext3_journal_stop(handle);
+       if (retval == 0 && flush_file)
+               filemap_flush(old_inode->i_mapping);
         return retval;
   }
   
diff --combined include/linux/ext3_fs.h

index e263acaa405b8deba36ad227c745dfc2fc4c8fad,d2630c56cb34585791af0c1871a3d7ceaafdac76..634a5e5aba3e219844fbffadce72b0361c794203
--- 1/include/linux/ext3_fs.h
--- 2/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@@ -208,6 -208,7 +208,7 @@@ static inline __u32 ext3_mask_flags(umo
   #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
   #define EXT3_STATE_NEW                        0x00000002 /* inode is newly created */
   #define EXT3_STATE_XATTR              0x00000004 /* has in-inode xattrs */
+ #define EXT3_STATE_FLUSH_ON_CLOSE     0x00000008
   
   /* Used to pass group descriptor data when online resize is done */
   struct ext3_new_group_input {
@@@ -893,8 -894,9 +894,8 @@@ extern int ext3_fiemap(struct inode *in
                        u64 start, u64 len);
   
   /* ioctl.c */
- -extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
- -                     unsigned long);
- -extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
+ +extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
+ +extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
   
   /* namei.c */
   extern int ext3_orphan_add(handle_t *, struct inode *);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Apr 2009 18:10:33 +0000 (11:10 -0700)
		1	2
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ext3_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history