Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt

index 0d5394920a31c146ef85cc4df64107e37a6ae386..74484e6964052394bc9e3381c6ddd010f050d7fb 100644 (file)
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org
      you will need to merge your changes with the version from e2fsprogs
      1.41.x.
  
-  - Create a new filesystem using the ext4dev filesystem type:
+  - Create a new filesystem using the ext4 filesystem type:
  
-       # mke2fs -t ext4dev /dev/hda1
+       # mke2fs -t ext4 /dev/hda1
  
      Or configure an existing ext3 filesystem to support extents and set
      the test_fs flag to indicate that it's ok for an in-development
@@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org
  
          # tune2fs -I 256 /dev/hda1
  
-    (Note: we currently do not have tools to convert an ext4dev
+    (Note: we currently do not have tools to convert an ext4
      filesystem back to ext3; so please do not do try this on production
      filesystems.)
  
    - Mounting:
  
-       # mount -t ext4dev /dev/hda1 /wherever
+       # mount -t ext4 /dev/hda1 /wherever
  
    - When comparing performance with other filesystems, remember that
      ext3/4 by default offers higher data integrity guarantees than most.
@@ -177,6 +177,11 @@ barrier=<0|1(*)>   This enables/disables the use of write barriers in
                         your disks are battery-backed in one way or another,
                         disabling barriers may safely improve performance.
  
+inode_readahead=n      This tuning parameter controls the maximum
+                       number of inode table blocks that ext4's inode
+                       table readahead algorithm will pre-read into
+                       the buffer cache.  The default value is 32 blocks.
+
  orlov          (*)     This enables the new Orlov block allocator. It is
                         enabled by default.
  
@@ -252,6 +257,7 @@ stripe=n            Number of filesystem blocks that mballoc will try
  delalloc       (*)     Deferring block allocation until write-out time.
  nodelalloc             Disable delayed allocation. Blocks are allocation
                         when data is copied from user to page cache.
+
  Data Mode
  =========
  There are 3 different data modes:
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt

new file mode 100644 (file)

index 0000000..1e3defc
--- /dev/null
+++ b/Documentation/filesystems/fiemap.txt
@@ -0,0 +1,228 @@
+============
+Fiemap Ioctl
+============
+
+The fiemap ioctl is an efficient method for userspace to get file
+extent mappings. Instead of block-by-block mapping (such as bmap), fiemap
+returns a list of extents.
+
+
+Request Basics
+--------------
+
+A fiemap request is encoded within struct fiemap:
+
+struct fiemap {
+       __u64   fm_start;        /* logical offset (inclusive) at
+                                 * which to start mapping (in) */
+       __u64   fm_length;       /* logical length of mapping which
+                                 * userspace cares about (in) */
+       __u32   fm_flags;        /* FIEMAP_FLAG_* flags for request (in/out) */
+       __u32   fm_mapped_extents; /* number of extents that were
+                                   * mapped (out) */
+       __u32   fm_extent_count; /* size of fm_extents array (in) */
+       __u32   fm_reserved;
+       struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+};
+
+
+fm_start, and fm_length specify the logical range within the file
+which the process would like mappings for. Extents returned mirror
+those on disk - that is, the logical offset of the 1st returned extent
+may start before fm_start, and the range covered by the last returned
+extent may end after fm_length. All offsets and lengths are in bytes.
+
+Certain flags to modify the way in which mappings are looked up can be
+set in fm_flags. If the kernel doesn't understand some particular
+flags, it will return EBADR and the contents of fm_flags will contain
+the set of flags which caused the error. If the kernel is compatible
+with all flags passed, the contents of fm_flags will be unmodified.
+It is up to userspace to determine whether rejection of a particular
+flag is fatal to it's operation. This scheme is intended to allow the
+fiemap interface to grow in the future but without losing
+compatibility with old software.
+
+fm_extent_count specifies the number of elements in the fm_extents[] array
+that can be used to return extents.  If fm_extent_count is zero, then the
+fm_extents[] array is ignored (no extents will be returned), and the
+fm_mapped_extents count will hold the number of extents needed in
+fm_extents[] to hold the file's current mapping.  Note that there is
+nothing to prevent the file from changing between calls to FIEMAP.
+
+The following flags can be set in fm_flags:
+
+* FIEMAP_FLAG_SYNC
+If this flag is set, the kernel will sync the file before mapping extents.
+
+* FIEMAP_FLAG_XATTR
+If this flag is set, the extents returned will describe the inodes
+extended attribute lookup tree, instead of it's data tree.
+
+
+Extent Mapping
+--------------
+
+Extent information is returned within the embedded fm_extents array
+which userspace must allocate along with the fiemap structure. The
+number of elements in the fiemap_extents[] array should be passed via
+fm_extent_count. The number of extents mapped by kernel will be
+returned via fm_mapped_extents. If the number of fiemap_extents
+allocated is less than would be required to map the requested range,
+the maximum number of extents that can be mapped in the fm_extent[]
+array will be returned and fm_mapped_extents will be equal to
+fm_extent_count. In that case, the last extent in the array will not
+complete the requested range and will not have the FIEMAP_EXTENT_LAST
+flag set (see the next section on extent flags).
+
+Each extent is described by a single fiemap_extent structure as
+returned in fm_extents.
+
+struct fiemap_extent {
+       __u64   fe_logical;  /* logical offset in bytes for the start of
+                             * the extent */
+       __u64   fe_physical; /* physical offset in bytes for the start
+                             * of the extent */
+       __u64   fe_length;   /* length in bytes for the extent */
+       __u64   fe_reserved64[2];
+       __u32   fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+       __u32   fe_reserved[3];
+};
+
+All offsets and lengths are in bytes and mirror those on disk.  It is valid
+for an extents logical offset to start before the request or it's logical
+length to extend past the request.  Unless FIEMAP_EXTENT_NOT_ALIGNED is
+returned, fe_logical, fe_physical, and fe_length will be aligned to the
+block size of the file system.  With the exception of extents flagged as
+FIEMAP_EXTENT_MERGED, adjacent extents will not be merged.
+
+The fe_flags field contains flags which describe the extent returned.
+A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in
+the file so that the process making fiemap calls can determine when no
+more extents are available, without having to call the ioctl again.
+
+Some flags are intentionally vague and will always be set in the
+presence of other more specific flags. This way a program looking for
+a general property does not have to know all existing and future flags
+which imply that property.
+
+For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL
+are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking
+for inline or tail-packed data can key on the specific flag. Software
+which simply cares not to try operating on non-aligned extents
+however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to
+worry about all present and future flags which might imply unaligned
+data. Note that the opposite is not true - it would be valid for
+FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
+
+* FIEMAP_EXTENT_LAST
+This is the last extent in the file. A mapping attempt past this
+extent will return nothing.
+
+* FIEMAP_EXTENT_UNKNOWN
+The location of this extent is currently unknown. This may indicate
+the data is stored on an inaccessible volume or that no storage has
+been allocated for the file yet.
+
+* FIEMAP_EXTENT_DELALLOC
+  - This will also set FIEMAP_EXTENT_UNKNOWN.
+Delayed allocation - while there is data for this extent, it's
+physical location has not been allocated yet.
+
+* FIEMAP_EXTENT_ENCODED
+This extent does not consist of plain filesystem blocks but is
+encoded (e.g. encrypted or compressed).  Reading the data in this
+extent via I/O to the block device will have undefined results.
+
+Note that it is *always* undefined to try to update the data
+in-place by writing to the indicated location without the
+assistance of the filesystem, or to access the data using the
+information returned by the FIEMAP interface while the filesystem
+is mounted.  In other words, user applications may only read the
+extent data via I/O to the block device while the filesystem is
+unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
+clear; user applications must not try reading or writing to the
+filesystem via the block device under any other circumstances.
+
+* FIEMAP_EXTENT_DATA_ENCRYPTED
+  - This will also set FIEMAP_EXTENT_ENCODED
+The data in this extent has been encrypted by the file system.
+
+* FIEMAP_EXTENT_NOT_ALIGNED
+Extent offsets and length are not guaranteed to be block aligned.
+
+* FIEMAP_EXTENT_DATA_INLINE
+  This will also set FIEMAP_EXTENT_NOT_ALIGNED
+Data is located within a meta data block.
+
+* FIEMAP_EXTENT_DATA_TAIL
+  This will also set FIEMAP_EXTENT_NOT_ALIGNED
+Data is packed into a block with data from other files.
+
+* FIEMAP_EXTENT_UNWRITTEN
+Unwritten extent - the extent is allocated but it's data has not been
+initialized.  This indicates the extent's data will be all zero if read
+through the filesystem but the contents are undefined if read directly from
+the device.
+
+* FIEMAP_EXTENT_MERGED
+This will be set when a file does not support extents, i.e., it uses a block
+based addressing scheme.  Since returning an extent for each block back to
+userspace would be highly inefficient, the kernel will try to merge most
+adjacent blocks into 'extents'.
+
+
+VFS -> File System Implementation
+---------------------------------
+
+File systems wishing to support fiemap must implement a ->fiemap callback on
+their inode_operations structure. The fs ->fiemap call is responsible for
+defining it's set of supported fiemap flags, and calling a helper function on
+each discovered extent:
+
+struct inode_operations {
+       ...
+
+       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+                     u64 len);
+
+->fiemap is passed struct fiemap_extent_info which describes the
+fiemap request:
+
+struct fiemap_extent_info {
+       unsigned int fi_flags;          /* Flags as passed from user */
+       unsigned int fi_extents_mapped; /* Number of mapped extents */
+       unsigned int fi_extents_max;    /* Size of fiemap_extent array */
+       struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
+};
+
+It is intended that the file system should not need to access any of this
+structure directly.
+
+
+Flag checking should be done at the beginning of the ->fiemap callback via the
+fiemap_check_flags() helper:
+
+int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+
+The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The
+set of fiemap flags which the fs understands should be passed via fs_flags. If
+fiemap_check_flags finds invalid user flags, it will place the bad values in
+fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
+fiemap_check_flags(), it should immediately exit, returning that error back to
+ioctl_fiemap().
+
+
+For each extent in the request range, the file system should call
+the helper function, fiemap_fill_next_extent():
+
+int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+                           u64 phys, u64 len, u32 flags, u32 dev);
+
+fiemap_fill_next_extent() will use the passed values to populate the
+next free extent in the fm_extents array. 'General' extent flags will
+automatically be set from specific flags on behalf of the calling file
+system so that the userspace API is not broken.
+
+fiemap_fill_next_extent() returns 0 on success, and 1 when the
+user-supplied fm_extents array is full. If an error is encountered
+while copying the extent to user memory, -EFAULT will be returned.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt

index f566ad9bcb7b6a4a58deab30f085d85f0e39746e..d831d24d2a6c133214ad3663f1d7762138e3dbe4 100644 (file)
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -923,45 +923,44 @@ CPUs.
  The   "procs_blocked" line gives  the  number of  processes currently blocked,
  waiting for I/O to complete.
  
+
  1.9 Ext4 file system parameters
  ------------------------------
-Ext4 file system have one directory per partition under /proc/fs/ext4/
-# ls /proc/fs/ext4/hdc/
-group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
-stats  stream_req
-
-mb_groups:
-This file gives the details of multiblock allocator buddy cache of free blocks
-
-mb_history:
-Multiblock allocation history.
-
-stats:
-This file indicate whether the multiblock allocator should start collecting
-statistics. The statistics are shown during unmount
-
-group_prealloc:
-The multiblock allocator normalize the block allocation request to
-group_prealloc filesystem blocks if we don't have strip value set.
-The stripe value can be specified at mount time or during mke2fs.
-
-max_to_scan:
-How long multiblock allocator can look for a best extent (in found extents)
-
-min_to_scan:
-How long multiblock allocator  must look for a best extent
-
-order2_req:
-Multiblock allocator use  2^N search using buddies only for requests greater
-than or equal to order2_req. The request size is specfied in file system
-blocks. A value of 2 indicate only if the requests are greater than or equal
-to 4 blocks.
-
-stream_req:
-Files smaller than stream_req are served by the stream allocator, whose
-purpose is to pack requests as close each to other as possible to
-produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
-filesystem block size will use group based preallocation.
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4.  Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0).   The files in each per-device directory are shown
+in Table 1-10, below.
+
+Table 1-10: Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File            Content                                        
+ mb_groups       details of multiblock allocator buddy cache of free blocks
+ mb_history      multiblock allocation history
+ stats           controls whether the multiblock allocator should start
+                 collecting statistics, which are shown during the unmount
+ group_prealloc  the multiblock allocator will round up allocation
+                 requests to a multiple of this tuning parameter if the
+                 stripe size is not set in the ext4 superblock
+ max_to_scan     The maximum number of extents the multiblock allocator
+                 will search to find the best extent
+ min_to_scan     The minimum number of extents the multiblock allocator
+                 will search to find the best extent
+ order2_req      Tuning parameter which controls the minimum size for 
+                 requests (as a power of 2) where the buddy cache is
+                 used
+ stream_req      Files which have fewer blocks than this tunable
+                 parameter will have their blocks allocated out of a
+                 block group specific preallocation pool, so that small
+                 files are packed closely together.  Each large file
+                 will have its blocks allocated out of its own unique
+                 preallocation pool.
+inode_readahead  Tuning parameter which controls the maximum number of
+                 inode table blocks that ext4's inode table readahead
+                 algorithm will pre-read into the buffer cache
+..............................................................................
+
  
  ------------------------------------------------------------------------------
  Summary
diff --git a/MAINTAINERS b/MAINTAINERS

index 68781ed2b7346902897f6aea46524a76c029185f..587f418ed00d46eceb92cf6251e3c49d1d87c029 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1659,9 +1659,10 @@ L:       linux-ext4@vger.kernel.org
  S:     Maintained
  
  EXT4 FILE SYSTEM
-P:     Stephen Tweedie, Andrew Morton
-M:     sct@redhat.com, akpm@linux-foundation.org, adilger@sun.com
+P:     Theodore Ts'o
+M:     tytso@mit.edu, adilger@sun.com
  L:     linux-ext4@vger.kernel.org
+W:     http://ext4.wiki.kernel.org
  S:     Maintained
  
  F71805F HARDWARE MONITORING DRIVER
diff --git a/fs/Kconfig b/fs/Kconfig

index abccb5dab9a8b3297e1b41a9c76631a357de935e..40183d94b6834a6f1af0d92f1f90f5b50330afac 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY
           If you are not using a security module that requires using
           extended attributes for file security labels, say N.
  
-config EXT4DEV_FS
-       tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+config EXT4_FS
+       tristate "The Extended 4 (ext4) filesystem"
         select JBD2
         select CRC16
         help
-         Ext4dev is a predecessor filesystem of the next generation
-         extended fs ext4, based on ext3 filesystem code. It will be
-         renamed ext4 fs later, once ext4dev is mature and stabilized.
+         This is the next generation of the ext3 filesystem.
  
           Unlike the change from ext2 filesystem to ext3 filesystem,
-         the on-disk format of ext4dev is not the same as ext3 any more:
-         it is based on extent maps and it supports 48-bit physical block
-         numbers. These combined on-disk format changes will allow
-         ext4dev/ext4 to handle more than 16 TB filesystem volumes --
-         a hard limit that ext3 cannot overcome without changing the
-         on-disk format.
-
-         Other than extent maps and 48-bit block numbers, ext4dev also is
-         likely to have other new features such as persistent preallocation,
-         high resolution time stamps, and larger file support etc.  These
-         features will be added to ext4dev gradually.
+         the on-disk format of ext4 is not forwards compatible with
+         ext3; it is based on extent maps and it supports 48-bit
+         physical block numbers.  The ext4 filesystem also supports delayed
+         allocation, persistent preallocation, high resolution time stamps,
+         and a number of other features to improve performance and speed
+         up fsck time.  For more information, please see the web pages at
+         http://ext4.wiki.kernel.org.
+
+         The ext4 filesystem will support mounting an ext3
+         filesystem; while there will be some performance gains from
+         the delayed allocation and inode table readahead, the best
+         performance gains will require enabling ext4 features in the
+         filesystem, or formating a new filesystem as an ext4
+         filesystem initially.
  
           To compile this file system support as a module, choose M here. The
           module will be called ext4dev.
  
           If unsure, say N.
  
-config EXT4DEV_FS_XATTR
-       bool "Ext4dev extended attributes"
-       depends on EXT4DEV_FS
+config EXT4DEV_COMPAT
+       bool "Enable ext4dev compatibility"
+       depends on EXT4_FS
+       help
+         Starting with 2.6.28, the name of the ext4 filesystem was
+         renamed from ext4dev to ext4.  Unfortunately there are some
+         lagecy userspace programs (such as klibc's fstype) have
+         "ext4dev" hardcoded.  
+
+         To enable backwards compatibility so that systems that are
+         still expecting to mount ext4 filesystems using ext4dev,
+         chose Y here.   This feature will go away by 2.6.31, so
+         please arrange to get your userspace programs fixed!
+
+config EXT4_FS_XATTR
+       bool "Ext4 extended attributes"
+       depends on EXT4_FS
         default y
         help
           Extended attributes are name:value pairs associated with inodes by
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR
  
           If unsure, say N.
  
-         You need this for POSIX ACL support on ext4dev/ext4.
+         You need this for POSIX ACL support on ext4.
  
-config EXT4DEV_FS_POSIX_ACL
-       bool "Ext4dev POSIX Access Control Lists"
-       depends on EXT4DEV_FS_XATTR
+config EXT4_FS_POSIX_ACL
+       bool "Ext4 POSIX Access Control Lists"
+       depends on EXT4_FS_XATTR
         select FS_POSIX_ACL
         help
           POSIX Access Control Lists (ACLs) support permissions for users and
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL
  
           If you don't know what Access Control Lists are, say N
  
-config EXT4DEV_FS_SECURITY
-       bool "Ext4dev Security Labels"
-       depends on EXT4DEV_FS_XATTR
+config EXT4_FS_SECURITY
+       bool "Ext4 Security Labels"
+       depends on EXT4_FS_XATTR
         help
           Security labels support alternative access control models
           implemented by security modules like SELinux.  This option
           enables an extended attribute handler for file security
-         labels in the ext4dev/ext4 filesystem.
+         labels in the ext4 filesystem.
  
           If you are not using a security module that requires using
           extended attributes for file security labels, say N.
@@ -240,22 +254,22 @@ config JBD2
         help
           This is a generic journaling layer for block devices that support
           both 32-bit and 64-bit block numbers.  It is currently used by
-         the ext4dev/ext4 filesystem, but it could also be used to add
+         the ext4 filesystem, but it could also be used to add
           journal support to other file systems or block devices such
           as RAID or LVM.
  
-         If you are using ext4dev/ext4, you need to say Y here. If you are not
-         using ext4dev/ext4 then you will probably want to say N.
+         If you are using ext4, you need to say Y here. If you are not
+         using ext4 then you will probably want to say N.
  
           To compile this device as a module, choose M here. The module will be
-         called jbd2.  If you are compiling ext4dev/ext4 into the kernel,
+         called jbd2.  If you are compiling ext4 into the kernel,
           you cannot compile this code as a module.
  
  config JBD2_DEBUG
-       bool "JBD2 (ext4dev/ext4) debugging support"
+       bool "JBD2 (ext4) debugging support"
         depends on JBD2 && DEBUG_FS
         help
-         If you are using the ext4dev/ext4 journaled file system (or
+         If you are using the ext4 journaled file system (or
           potentially any other filesystem/device using JBD2), this option
           allows you to enable debugging output while the system is running,
           in order to help track down any problems you are having.
@@ -270,9 +284,9 @@ config JBD2_DEBUG
  config FS_MBCACHE
  # Meta block cache for Extended Attributes (ext2/ext3/ext4)
         tristate
-       depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR
-       default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
-       default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
+       depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR
+       default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y
+       default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m
  
  config REISERFS_FS
         tristate "Reiserfs support"
diff --git a/fs/Makefile b/fs/Makefile

index a1482a5eff153384c4f1f4b2e9a1a30b8f113d4d..de404b00eb0cad255e28667472d7d2d75c43635a 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_DLM)             += dlm/
  # Do not add any filesystems before this line
  obj-$(CONFIG_REISERFS_FS)      += reiserfs/
  obj-$(CONFIG_EXT3_FS)          += ext3/ # Before ext2 so root fs can be ext3
-obj-$(CONFIG_EXT4DEV_FS)       += ext4/ # Before ext2 so root fs can be ext4dev
+obj-$(CONFIG_EXT4_FS)          += ext4/ # Before ext2 so root fs can be ext4dev
  obj-$(CONFIG_JBD)              += jbd/
  obj-$(CONFIG_JBD2)             += jbd2/
  obj-$(CONFIG_EXT2_FS)          += ext2/
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h

index 47d88da2d33b5fab73349fa26776f7773d98d194..bae998c1e44eea3afef4a2e834d75d08ca5c472e 100644 (file)
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *);
  extern int ext2_setattr (struct dentry *, struct iattr *);
  extern void ext2_set_inode_flags(struct inode *inode);
  extern void ext2_get_inode_flags(struct ext2_inode_info *);
+extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                      u64 start, u64 len);
  int __ext2_write_begin(struct file *file, struct address_space *mapping,
                 loff_t pos, unsigned len, unsigned flags,
                 struct page **pagep, void **fsdata);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c

index 5f2fa9c36293d835722ec1371b820b07d5c5d9b5..45ed071221821bac70f2d92d817cf24338cb4ccc 100644 (file)
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = {
  #endif
         .setattr        = ext2_setattr,
         .permission     = ext2_permission,
+       .fiemap         = ext2_fiemap,
  };
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 991d6dfeb51f078bfe746e6dede400e7e2525817..7658b33e2653c5da7eb0c00c8fc7dc12313f7745 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,6 +31,7 @@
  #include <linux/writeback.h>
  #include <linux/buffer_head.h>
  #include <linux/mpage.h>
+#include <linux/fiemap.h>
  #include "ext2.h"
  #include "acl.h"
  #include "xip.h"
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_
  
  }
  
+int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 len)
+{
+       return generic_block_fiemap(inode, fieinfo, start, len,
+                                   ext2_get_block);
+}
+
  static int ext2_writepage(struct page *page, struct writeback_control *wbc)
  {
         return block_write_full_page(page, ext2_get_block, wbc);
diff --git a/fs/ext3/file.c b/fs/ext3/file.c

index acc4913d30199079007726e8c4a49b4a07dd77cc..3be1e0689c9aa34443b5695f7800c41fbcafceef 100644 (file)
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = {
         .removexattr    = generic_removexattr,
  #endif
         .permission     = ext3_permission,
+       .fiemap         = ext3_fiemap,
  };
  
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index 507d8689b111662b403c874e2c269417635f5555..ebfec4d0148e9e272e2a25efd0e3b551381408c6 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -36,6 +36,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
  #include <linux/bio.h>
+#include <linux/fiemap.h>
  #include "xattr.h"
  #include "acl.h"
  
@@ -981,6 +982,13 @@ out:
         return ret;
  }
  
+int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               u64 start, u64 len)
+{
+       return generic_block_fiemap(inode, fieinfo, start, len,
+                                   ext3_get_block);
+}
+
  /*
   * `handle' can be NULL if create is zero
   */
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile

index ac6fa8ca0a2f1b7b3a99f03accce58f6f666e209..a8ff003a00f70b8e09958fcdfa3491c0a4bc5a8f 100644 (file)
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -2,12 +2,12 @@
  # Makefile for the linux ext4-filesystem routines.
  #
  
-obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
+obj-$(CONFIG_EXT4_FS) += ext4.o
  
-ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                    ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
                    ext4_jbd2.o migrate.o mballoc.o
  
-ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)     += xattr.o xattr_user.o xattr_trusted.o
-ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
-ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY)  += xattr_security.o
+ext4-$(CONFIG_EXT4_FS_XATTR)           += xattr.o xattr_user.o xattr_trusted.o
+ext4-$(CONFIG_EXT4_FS_POSIX_ACL)       += acl.o
+ext4-$(CONFIG_EXT4_FS_SECURITY)                += xattr_security.o
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h

index cd2b855a07d669988f817f6e7415088b8dcefed6..cb45257a246e888467f8db4bfb6822b00e7fddd7 100644 (file)
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size)
         }
  }
  
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
  
  /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
     if the ACL has not been cached */
  #define EXT4_ACL_NOT_CACHED ((void *)-1)
  
  /* acl.c */
-extern int ext4_permission (struct inode *, int);
-extern int ext4_acl_chmod (struct inode *);
-extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
+extern int ext4_permission(struct inode *, int);
+extern int ext4_acl_chmod(struct inode *);
+extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
  
-#else  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
+#else  /* CONFIG_EXT4_FS_POSIX_ACL */
  #include <linux/sched.h>
  #define ext4_permission NULL
  
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
  {
         return 0;
  }
-#endif  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
+#endif  /* CONFIG_EXT4_FS_POSIX_ACL */
  
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index e9fa960ba6da9fa6cf573fc517d70808e519ab83..bd2ece22882755b02599563a1bee89c5b10d2b87 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
         }
         return used_blocks;
  }
+
  /* Initializes an uninitialized block bitmap if given, and returns the
   * number of blocks free in the group. */
  unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                  */
                 group_blocks = ext4_blocks_count(sbi->s_es) -
                         le32_to_cpu(sbi->s_es->s_first_data_block) -
-                       (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
+                       (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
         } else {
                 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
         }
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
   * @bh:                        pointer to the buffer head to store the block
   *                     group descriptor
   */
-struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
                                              ext4_group_t block_group,
-                                            struct buffer_head ** bh)
+                                            struct buffer_head **bh)
  {
         unsigned long group_desc;
         unsigned long offset;
-       struct ext4_group_desc * desc;
+       struct ext4_group_desc *desc;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
         if (block_group >= sbi->s_groups_count) {
-               ext4_error (sb, "ext4_get_group_desc",
-                           "block_group >= groups_count - "
-                           "block_group = %lu, groups_count = %lu",
-                           block_group, sbi->s_groups_count);
+               ext4_error(sb, "ext4_get_group_desc",
+                          "block_group >= groups_count - "
+                          "block_group = %lu, groups_count = %lu",
+                          block_group, sbi->s_groups_count);
  
                 return NULL;
         }
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
         group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
         offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
         if (!sbi->s_group_desc[group_desc]) {
-               ext4_error (sb, "ext4_get_group_desc",
-                           "Group descriptor not loaded - "
-                           "block_group = %lu, group_desc = %lu, desc = %lu",
-                            block_group, group_desc, offset);
+               ext4_error(sb, "ext4_get_group_desc",
+                          "Group descriptor not loaded - "
+                          "block_group = %lu, group_desc = %lu, desc = %lu",
+                          block_group, group_desc, offset);
                 return NULL;
         }
  
@@ -302,8 +303,8 @@ err_out:
  struct buffer_head *
  ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
  {
-       struct ext4_group_desc * desc;
-       struct buffer_head * bh = NULL;
+       struct ext4_group_desc *desc;
+       struct buffer_head *bh = NULL;
         ext4_fsblk_t bitmap_blk;
  
         desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
                             block_group, bitmap_blk);
                 return NULL;
         }
-       if (bh_uptodate_or_lock(bh))
+       if (buffer_uptodate(bh) &&
+           !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
                 return bh;
  
+       lock_buffer(bh);
         spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
         if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                 ext4_init_block_bitmap(sb, bh, block_group, desc);
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
          */
         return bh;
  }
-/*
- * The reservation window structure operations
- * --------------------------------------------
- * Operations include:
- * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
- *
- * We use a red-black tree to represent per-filesystem reservation
- * windows.
- *
- */
-
-/**
- * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
- * @rb_root:           root of per-filesystem reservation rb tree
- * @verbose:           verbose mode
- * @fn:                        function which wishes to dump the reservation map
- *
- * If verbose is turned on, it will print the whole block reservation
- * windows(start, end).        Otherwise, it will only print out the "bad" windows,
- * those windows that overlap with their immediate neighbors.
- */
-#if 1
-static void __rsv_window_dump(struct rb_root *root, int verbose,
-                             const char *fn)
-{
-       struct rb_node *n;
-       struct ext4_reserve_window_node *rsv, *prev;
-       int bad;
-
-restart:
-       n = rb_first(root);
-       bad = 0;
-       prev = NULL;
-
-       printk("Block Allocation Reservation Windows Map (%s):\n", fn);
-       while (n) {
-               rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
-               if (verbose)
-                       printk("reservation window 0x%p "
-                              "start:  %llu, end:  %llu\n",
-                              rsv, rsv->rsv_start, rsv->rsv_end);
-               if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
-                       printk("Bad reservation %p (start >= end)\n",
-                              rsv);
-                       bad = 1;
-               }
-               if (prev && prev->rsv_end >= rsv->rsv_start) {
-                       printk("Bad reservation %p (prev->end >= start)\n",
-                              rsv);
-                       bad = 1;
-               }
-               if (bad) {
-                       if (!verbose) {
-                               printk("Restarting reservation walk in verbose mode\n");
-                               verbose = 1;
-                               goto restart;
-                       }
-               }
-               n = rb_next(n);
-               prev = rsv;
-       }
-       printk("Window map complete.\n");
-       BUG_ON(bad);
-}
-#define rsv_window_dump(root, verbose) \
-       __rsv_window_dump((root), (verbose), __func__)
-#else
-#define rsv_window_dump(root, verbose) do {} while (0)
-#endif
-
-/**
- * goal_in_my_reservation()
- * @rsv:               inode's reservation window
- * @grp_goal:          given goal block relative to the allocation block group
- * @group:             the current allocation block group
- * @sb:                        filesystem super block
- *
- * Test if the given goal block (group relative) is within the file's
- * own block reservation window range.
- *
- * If the reservation window is outside the goal allocation group, return 0;
- * grp_goal (given goal block) could be -1, which means no specific
- * goal block. In this case, always return 1.
- * If the goal block is within the reservation window, return 1;
- * otherwise, return 0;
- */
-static int
-goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
-                       ext4_group_t group, struct super_block *sb)
-{
-       ext4_fsblk_t group_first_block, group_last_block;
-
-       group_first_block = ext4_group_first_block_no(sb, group);
-       group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
-       if ((rsv->_rsv_start > group_last_block) ||
-           (rsv->_rsv_end < group_first_block))
-               return 0;
-       if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
-               || (grp_goal + group_first_block > rsv->_rsv_end)))
-               return 0;
-       return 1;
-}
-
-/**
- * search_reserve_window()
- * @rb_root:           root of reservation tree
- * @goal:              target allocation block
- *
- * Find the reserved window which includes the goal, or the previous one
- * if the goal is not in any window.
- * Returns NULL if there are no windows or if all windows start after the goal.
- */
-static struct ext4_reserve_window_node *
-search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
-{
-       struct rb_node *n = root->rb_node;
-       struct ext4_reserve_window_node *rsv;
-
-       if (!n)
-               return NULL;
-
-       do {
-               rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
-
-               if (goal < rsv->rsv_start)
-                       n = n->rb_left;
-               else if (goal > rsv->rsv_end)
-                       n = n->rb_right;
-               else
-                       return rsv;
-       } while (n);
-       /*
-        * We've fallen off the end of the tree: the goal wasn't inside
-        * any particular node.  OK, the previous node must be to one
-        * side of the interval containing the goal.  If it's the RHS,
-        * we need to back up one.
-        */
-       if (rsv->rsv_start > goal) {
-               n = rb_prev(&rsv->rsv_node);
-               rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
-       }
-       return rsv;
-}
-
-/**
- * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
- * @sb:                        super block
- * @rsv:               reservation window to add
- *
- * Must be called with rsv_lock hold.
- */
-void ext4_rsv_window_add(struct super_block *sb,
-                   struct ext4_reserve_window_node *rsv)
-{
-       struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
-       struct rb_node *node = &rsv->rsv_node;
-       ext4_fsblk_t start = rsv->rsv_start;
-
-       struct rb_node ** p = &root->rb_node;
-       struct rb_node * parent = NULL;
-       struct ext4_reserve_window_node *this;
-
-       while (*p)
-       {
-               parent = *p;
-               this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
-
-               if (start < this->rsv_start)
-                       p = &(*p)->rb_left;
-               else if (start > this->rsv_end)
-                       p = &(*p)->rb_right;
-               else {
-                       rsv_window_dump(root, 1);
-                       BUG();
-               }
-       }
-
-       rb_link_node(node, parent, p);
-       rb_insert_color(node, root);
-}
-
-/**
- * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
- * @sb:                        super block
- * @rsv:               reservation window to remove
- *
- * Mark the block reservation window as not allocated, and unlink it
- * from the filesystem reservation window rb tree. Must be called with
- * rsv_lock hold.
- */
-static void rsv_window_remove(struct super_block *sb,
-                             struct ext4_reserve_window_node *rsv)
-{
-       rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-       rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-       rsv->rsv_alloc_hit = 0;
-       rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
-}
-
-/*
- * rsv_is_empty() -- Check if the reservation window is allocated.
- * @rsv:               given reservation window to check
- *
- * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
- */
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
-{
-       /* a valid reservation end block could not be 0 */
-       return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-}
-
-/**
- * ext4_init_block_alloc_info()
- * @inode:             file inode structure
- *
- * Allocate and initialize the reservation window structure, and
- * link the window to the ext4 inode structure at last
- *
- * The reservation window structure is only dynamically allocated
- * and linked to ext4 inode the first time the open file
- * needs a new block. So, before every ext4_new_block(s) call, for
- * regular files, we should check whether the reservation window
- * structure exists or not. In the latter case, this function is called.
- * Fail to do so will result in block reservation being turned off for that
- * open file.
- *
- * This function is called from ext4_get_blocks_handle(), also called
- * when setting the reservation window size through ioctl before the file
- * is open for write (needs block allocation).
- *
- * Needs down_write(i_data_sem) protection prior to call this function.
- */
-void ext4_init_block_alloc_info(struct inode *inode)
-{
-       struct ext4_inode_info *ei = EXT4_I(inode);
-       struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
-       struct super_block *sb = inode->i_sb;
-
-       block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
-       if (block_i) {
-               struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
-
-               rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-               rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-
-               /*
-                * if filesystem is mounted with NORESERVATION, the goal
-                * reservation window size is set to zero to indicate
-                * block reservation is off
-                */
-               if (!test_opt(sb, RESERVATION))
-                       rsv->rsv_goal_size = 0;
-               else
-                       rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
-               rsv->rsv_alloc_hit = 0;
-               block_i->last_alloc_logical_block = 0;
-               block_i->last_alloc_physical_block = 0;
-       }
-       ei->i_block_alloc_info = block_i;
-}
-
-/**
- * ext4_discard_reservation()
- * @inode:             inode
- *
- * Discard(free) block reservation window on last file close, or truncate
- * or at last iput().
- *
- * It is being called in three cases:
- *     ext4_release_file(): last writer close the file
- *     ext4_clear_inode(): last iput(), when nobody link to this file.
- *     ext4_truncate(): when the block indirect map is about to change.
- *
- */
-void ext4_discard_reservation(struct inode *inode)
-{
-       struct ext4_inode_info *ei = EXT4_I(inode);
-       struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
-       struct ext4_reserve_window_node *rsv;
-       spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
-
-       ext4_mb_discard_inode_preallocations(inode);
-
-       if (!block_i)
-               return;
-
-       rsv = &block_i->rsv_window_node;
-       if (!rsv_is_empty(&rsv->rsv_window)) {
-               spin_lock(rsv_lock);
-               if (!rsv_is_empty(&rsv->rsv_window))
-                       rsv_window_remove(inode->i_sb, rsv);
-               spin_unlock(rsv_lock);
-       }
-}
  
  /**
   * ext4_free_blocks_sb() -- Free given blocks and update quota
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode)
   * @block:                     start physcial block to free
   * @count:                     number of blocks to free
   * @pdquot_freed_blocks:       pointer to quota
+ *
+ * XXX This function is only used by the on-line resizing code, which
+ * should probably be fixed up to call the mballoc variant.  There
+ * this needs to be cleaned up later; in fact, I'm not convinced this
+ * is 100% correct in the face of the mballoc code.  The online resizing
+ * code needs to be fixed up to more tightly (and correctly) interlock
+ * with the mballoc code.
   */
  void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
                          ext4_fsblk_t block, unsigned long count,
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
         ext4_grpblk_t bit;
         unsigned long i;
         unsigned long overflow;
-       struct ext4_group_desc * desc;
-       struct ext4_super_block * es;
+       struct ext4_group_desc *desc;
+       struct ext4_super_block *es;
         struct ext4_sb_info *sbi;
         int err = 0, ret;
         ext4_grpblk_t group_freed;
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
         if (block < le32_to_cpu(es->s_first_data_block) ||
             block + count < block ||
             block + count > ext4_blocks_count(es)) {
-               ext4_error (sb, "ext4_free_blocks",
-                           "Freeing blocks not in datazone - "
-                           "block = %llu, count = %lu", block, count);
+               ext4_error(sb, "ext4_free_blocks",
+                          "Freeing blocks not in datazone - "
+                          "block = %llu, count = %lu", block, count);
                 goto error_return;
         }
  
-       ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
+       ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
  
  do_more:
         overflow = 0;
@@ -694,7 +409,7 @@ do_more:
         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
         if (!bitmap_bh)
                 goto error_return;
-       desc = ext4_get_group_desc (sb, block_group, &gd_bh);
+       desc = ext4_get_group_desc(sb, block_group, &gd_bh);
         if (!desc)
                 goto error_return;
  
@@ -703,10 +418,10 @@ do_more:
             in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
             in_range(block + count - 1, ext4_inode_table(sb, desc),
                      sbi->s_itb_per_group)) {
-               ext4_error (sb, "ext4_free_blocks",
-                           "Freeing blocks in system zones - "
-                           "Block = %llu, count = %lu",
-                           block, count);
+               ext4_error(sb, "ext4_free_blocks",
+                          "Freeing blocks in system zones - "
+                          "Block = %llu, count = %lu",
+                          block, count);
                 goto error_return;
         }
  
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                         ext4_fsblk_t block, unsigned long count,
                         int metadata)
  {
-       struct super_block * sb;
+       struct super_block *sb;
         unsigned long dquot_freed_blocks;
  
         /* this isn't the right place to decide whether block is metadata
@@ -859,747 +574,51 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
  
         sb = inode->i_sb;
  
-       if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
-               ext4_free_blocks_sb(handle, sb, block, count,
-                                               &dquot_freed_blocks);
-       else
-               ext4_mb_free_blocks(handle, inode, block, count,
-                                               metadata, &dquot_freed_blocks);
+       ext4_mb_free_blocks(handle, inode, block, count,
+                           metadata, &dquot_freed_blocks);
         if (dquot_freed_blocks)
                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
         return;
  }
  
-/**
- * ext4_test_allocatable()
- * @nr:                        given allocation block group
- * @bh:                        bufferhead contains the bitmap of the given block group
- *
- * For ext4 allocations, we must not reuse any blocks which are
- * allocated in the bitmap buffer's "last committed data" copy.  This
- * prevents deletes from freeing up the page for reuse until we have
- * committed the delete transaction.
- *
- * If we didn't do this, then deleting something and reallocating it as
- * data would allow the old block to be overwritten before the
- * transaction committed (because we force data to disk before commit).
- * This would lead to corruption if we crashed between overwriting the
- * data and committing the delete.
- *
- * @@@ We may want to make this allocation behaviour conditional on
- * data-writes at some point, and disable it for metadata allocations or
- * sync-data inodes.
- */
-static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
-{
-       int ret;
-       struct journal_head *jh = bh2jh(bh);
-
-       if (ext4_test_bit(nr, bh->b_data))
-               return 0;
-
-       jbd_lock_bh_state(bh);
-       if (!jh->b_committed_data)
-               ret = 1;
-       else
-               ret = !ext4_test_bit(nr, jh->b_committed_data);
-       jbd_unlock_bh_state(bh);
-       return ret;
-}
-
-/**
- * bitmap_search_next_usable_block()
- * @start:             the starting block (group relative) of the search
- * @bh:                        bufferhead contains the block group bitmap
- * @maxblocks:         the ending block (group relative) of the reservation
- *
- * The bitmap search --- search forward alternately through the actual
- * bitmap on disk and the last-committed copy in journal, until we find a
- * bit free in both bitmaps.
- */
-static ext4_grpblk_t
-bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
-                                       ext4_grpblk_t maxblocks)
+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+                                               s64 nblocks)
  {
-       ext4_grpblk_t next;
-       struct journal_head *jh = bh2jh(bh);
-
-       while (start < maxblocks) {
-               next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
-               if (next >= maxblocks)
-                       return -1;
-               if (ext4_test_allocatable(next, bh))
-                       return next;
-               jbd_lock_bh_state(bh);
-               if (jh->b_committed_data)
-                       start = ext4_find_next_zero_bit(jh->b_committed_data,
-                                                       maxblocks, next);
-               jbd_unlock_bh_state(bh);
-       }
-       return -1;
-}
+       s64 free_blocks, dirty_blocks;
+       s64 root_blocks = 0;
+       struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+       struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
  
-/**
- * find_next_usable_block()
- * @start:             the starting block (group relative) to find next
- *                     allocatable block in bitmap.
- * @bh:                        bufferhead contains the block group bitmap
- * @maxblocks:         the ending block (group relative) for the search
- *
- * Find an allocatable block in a bitmap.  We honor both the bitmap and
- * its last-committed copy (if that exists), and perform the "most
- * appropriate allocation" algorithm of looking for a free block near
- * the initial goal; then for a free byte somewhere in the bitmap; then
- * for any free bit in the bitmap.
- */
-static ext4_grpblk_t
-find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
-                       ext4_grpblk_t maxblocks)
-{
-       ext4_grpblk_t here, next;
-       char *p, *r;
-
-       if (start > 0) {
-               /*
-                * The goal was occupied; search forward for a free
-                * block within the next XX blocks.
-                *
-                * end_goal is more or less random, but it has to be
-                * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
-                * next 64-bit boundary is simple..
-                */
-               ext4_grpblk_t end_goal = (start + 63) & ~63;
-               if (end_goal > maxblocks)
-                       end_goal = maxblocks;
-               here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
-               if (here < end_goal && ext4_test_allocatable(here, bh))
-                       return here;
-               ext4_debug("Bit not found near goal\n");
-       }
-
-       here = start;
-       if (here < 0)
-               here = 0;
-
-       p = ((char *)bh->b_data) + (here >> 3);
-       r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
-       next = (r - ((char *)bh->b_data)) << 3;
-
-       if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
-               return next;
-
-       /*
-        * The bitmap search --- search forward alternately through the actual
-        * bitmap and the last-committed copy until we find a bit free in
-        * both
-        */
-       here = bitmap_search_next_usable_block(here, bh, maxblocks);
-       return here;
-}
-
-/**
- * claim_block()
- * @block:             the free block (group relative) to allocate
- * @bh:                        the bufferhead containts the block group bitmap
- *
- * We think we can allocate this block in this bitmap.  Try to set the bit.
- * If that succeeds then check that nobody has allocated and then freed the
- * block since we saw that is was not marked in b_committed_data.  If it _was_
- * allocated and freed then clear the bit in the bitmap again and return
- * zero (failure).
- */
-static inline int
-claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
-{
-       struct journal_head *jh = bh2jh(bh);
-       int ret;
-
-       if (ext4_set_bit_atomic(lock, block, bh->b_data))
-               return 0;
-       jbd_lock_bh_state(bh);
-       if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
-               ext4_clear_bit_atomic(lock, block, bh->b_data);
-               ret = 0;
-       } else {
-               ret = 1;
-       }
-       jbd_unlock_bh_state(bh);
-       return ret;
-}
+       free_blocks  = percpu_counter_read_positive(fbc);
+       dirty_blocks = percpu_counter_read_positive(dbc);
  
-/**
- * ext4_try_to_allocate()
- * @sb:                        superblock
- * @handle:            handle to this transaction
- * @group:             given allocation block group
- * @bitmap_bh:         bufferhead holds the block bitmap
- * @grp_goal:          given target block within the group
- * @count:             target number of blocks to allocate
- * @my_rsv:            reservation window
- *
- * Attempt to allocate blocks within a give range. Set the range of allocation
- * first, then find the first free bit(s) from the bitmap (within the range),
- * and at last, allocate the blocks by claiming the found free bit as allocated.
- *
- * To set the range of this allocation:
- *     if there is a reservation window, only try to allocate block(s) from the
- *     file's own reservation window;
- *     Otherwise, the allocation range starts from the give goal block, ends at
- *     the block group's last block.
- *
- * If we failed to allocate the desired block then we may end up crossing to a
- * new bitmap.  In that case we must release write access to the old one via
- * ext4_journal_release_buffer(), else we'll run out of credits.
- */
-static ext4_grpblk_t
-ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
-                       ext4_group_t group, struct buffer_head *bitmap_bh,
-                       ext4_grpblk_t grp_goal, unsigned long *count,
-                       struct ext4_reserve_window *my_rsv)
-{
-       ext4_fsblk_t group_first_block;
-       ext4_grpblk_t start, end;
-       unsigned long num = 0;
-
-       /* we do allocation within the reservation window if we have a window */
-       if (my_rsv) {
-               group_first_block = ext4_group_first_block_no(sb, group);
-               if (my_rsv->_rsv_start >= group_first_block)
-                       start = my_rsv->_rsv_start - group_first_block;
-               else
-                       /* reservation window cross group boundary */
-                       start = 0;
-               end = my_rsv->_rsv_end - group_first_block + 1;
-               if (end > EXT4_BLOCKS_PER_GROUP(sb))
-                       /* reservation window crosses group boundary */
-                       end = EXT4_BLOCKS_PER_GROUP(sb);
-               if ((start <= grp_goal) && (grp_goal < end))
-                       start = grp_goal;
-               else
-                       grp_goal = -1;
-       } else {
-               if (grp_goal > 0)
-                       start = grp_goal;
-               else
-                       start = 0;
-               end = EXT4_BLOCKS_PER_GROUP(sb);
-       }
-
-       BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
-
-repeat:
-       if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
-               grp_goal = find_next_usable_block(start, bitmap_bh, end);
-               if (grp_goal < 0)
-                       goto fail_access;
-               if (!my_rsv) {
-                       int i;
-
-                       for (i = 0; i < 7 && grp_goal > start &&
-                                       ext4_test_allocatable(grp_goal - 1,
-                                                               bitmap_bh);
-                                       i++, grp_goal--)
-                               ;
-               }
-       }
-       start = grp_goal;
-
-       if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
-               grp_goal, bitmap_bh)) {
-               /*
-                * The block was allocated by another thread, or it was
-                * allocated and then freed by another thread
-                */
-               start++;
-               grp_goal++;
-               if (start >= end)
-                       goto fail_access;
-               goto repeat;
-       }
-       num++;
-       grp_goal++;
-       while (num < *count && grp_goal < end
-               && ext4_test_allocatable(grp_goal, bitmap_bh)
-               && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
-                               grp_goal, bitmap_bh)) {
-               num++;
-               grp_goal++;
-       }
-       *count = num;
-       return grp_goal - num;
-fail_access:
-       *count = num;
-       return -1;
-}
-
-/**
- *     find_next_reservable_window():
- *             find a reservable space within the given range.
- *             It does not allocate the reservation window for now:
- *             alloc_new_reservation() will do the work later.
- *
- *     @search_head: the head of the searching list;
- *             This is not necessarily the list head of the whole filesystem
- *
- *             We have both head and start_block to assist the search
- *             for the reservable space. The list starts from head,
- *             but we will shift to the place where start_block is,
- *             then start from there, when looking for a reservable space.
- *
- *     @size: the target new reservation window size
- *
- *     @group_first_block: the first block we consider to start
- *                     the real search from
- *
- *     @last_block:
- *             the maximum block number that our goal reservable space
- *             could start from. This is normally the last block in this
- *             group. The search will end when we found the start of next
- *             possible reservable space is out of this boundary.
- *             This could handle the cross boundary reservation window
- *             request.
- *
- *     basically we search from the given range, rather than the whole
- *     reservation double linked list, (start_block, last_block)
- *     to find a free region that is of my size and has not
- *     been reserved.
- *
- */
-static int find_next_reservable_window(
-                               struct ext4_reserve_window_node *search_head,
-                               struct ext4_reserve_window_node *my_rsv,
-                               struct super_block * sb,
-                               ext4_fsblk_t start_block,
-                               ext4_fsblk_t last_block)
-{
-       struct rb_node *next;
-       struct ext4_reserve_window_node *rsv, *prev;
-       ext4_fsblk_t cur;
-       int size = my_rsv->rsv_goal_size;
-
-       /* TODO: make the start of the reservation window byte-aligned */
-       /* cur = *start_block & ~7;*/
-       cur = start_block;
-       rsv = search_head;
-       if (!rsv)
-               return -1;
-
-       while (1) {
-               if (cur <= rsv->rsv_end)
-                       cur = rsv->rsv_end + 1;
-
-               /* TODO?
-                * in the case we could not find a reservable space
-                * that is what is expected, during the re-search, we could
-                * remember what's the largest reservable space we could have
-                * and return that one.
-                *
-                * For now it will fail if we could not find the reservable
-                * space with expected-size (or more)...
-                */
-               if (cur > last_block)
-                       return -1;              /* fail */
-
-               prev = rsv;
-               next = rb_next(&rsv->rsv_node);
-               rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
+       if (!capable(CAP_SYS_RESOURCE) &&
+               sbi->s_resuid != current->fsuid &&
+               (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+               root_blocks = ext4_r_blocks_count(sbi->s_es);
  
-               /*
-                * Reached the last reservation, we can just append to the
-                * previous one.
-                */
-               if (!next)
-                       break;
-
-               if (cur + size <= rsv->rsv_start) {
-                       /*
-                        * Found a reserveable space big enough.  We could
-                        * have a reservation across the group boundary here
-                        */
-                       break;
+       if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+                                               EXT4_FREEBLOCKS_WATERMARK) {
+               free_blocks  = percpu_counter_sum(fbc);
+               dirty_blocks = percpu_counter_sum(dbc);
+               if (dirty_blocks < 0) {
+                       printk(KERN_CRIT "Dirty block accounting "
+                                       "went wrong %lld\n",
+                                       dirty_blocks);
                 }
         }
-       /*
-        * we come here either :
-        * when we reach the end of the whole list,
-        * and there is empty reservable space after last entry in the list.
-        * append it to the end of the list.
-        *
-        * or we found one reservable space in the middle of the list,
-        * return the reservation window that we could append to.
-        * succeed.
+       /* Check whether we have space after
+        * accounting for current dirty blocks
          */
+       if (free_blocks < ((root_blocks + nblocks) + dirty_blocks))
+               /* we don't have free space */
+               return -ENOSPC;
  
-       if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
-               rsv_window_remove(sb, my_rsv);
-
-       /*
-        * Let's book the whole avaliable window for now.  We will check the
-        * disk bitmap later and then, if there are free blocks then we adjust
-        * the window size if it's larger than requested.
-        * Otherwise, we will remove this node from the tree next time
-        * call find_next_reservable_window.
-        */
-       my_rsv->rsv_start = cur;
-       my_rsv->rsv_end = cur + size - 1;
-       my_rsv->rsv_alloc_hit = 0;
-
-       if (prev != my_rsv)
-               ext4_rsv_window_add(sb, my_rsv);
-
+       /* Add the blocks to nblocks */
+       percpu_counter_add(dbc, nblocks);
         return 0;
  }
  
-/**
- *     alloc_new_reservation()--allocate a new reservation window
- *
- *             To make a new reservation, we search part of the filesystem
- *             reservation list (the list that inside the group). We try to
- *             allocate a new reservation window near the allocation goal,
- *             or the beginning of the group, if there is no goal.
- *
- *             We first find a reservable space after the goal, then from
- *             there, we check the bitmap for the first free block after
- *             it. If there is no free block until the end of group, then the
- *             whole group is full, we failed. Otherwise, check if the free
- *             block is inside the expected reservable space, if so, we
- *             succeed.
- *             If the first free block is outside the reservable space, then
- *             start from the first free block, we search for next available
- *             space, and go on.
- *
- *     on succeed, a new reservation will be found and inserted into the list
- *     It contains at least one free block, and it does not overlap with other
- *     reservation windows.
- *
- *     failed: we failed to find a reservation window in this group
- *
- *     @rsv: the reservation
- *
- *     @grp_goal: The goal (group-relative).  It is where the search for a
- *             free reservable space should start from.
- *             if we have a grp_goal(grp_goal >0 ), then start from there,
- *             no grp_goal(grp_goal = -1), we start from the first block
- *             of the group.
- *
- *     @sb: the super block
- *     @group: the group we are trying to allocate in
- *     @bitmap_bh: the block group block bitmap
- *
- */
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
-               ext4_grpblk_t grp_goal, struct super_block *sb,
-               ext4_group_t group, struct buffer_head *bitmap_bh)
-{
-       struct ext4_reserve_window_node *search_head;
-       ext4_fsblk_t group_first_block, group_end_block, start_block;
-       ext4_grpblk_t first_free_block;
-       struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
-       unsigned long size;
-       int ret;
-       spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
-
-       group_first_block = ext4_group_first_block_no(sb, group);
-       group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
-       if (grp_goal < 0)
-               start_block = group_first_block;
-       else
-               start_block = grp_goal + group_first_block;
-
-       size = my_rsv->rsv_goal_size;
-
-       if (!rsv_is_empty(&my_rsv->rsv_window)) {
-               /*
-                * if the old reservation is cross group boundary
-                * and if the goal is inside the old reservation window,
-                * we will come here when we just failed to allocate from
-                * the first part of the window. We still have another part
-                * that belongs to the next group. In this case, there is no
-                * point to discard our window and try to allocate a new one
-                * in this group(which will fail). we should
-                * keep the reservation window, just simply move on.
-                *
-                * Maybe we could shift the start block of the reservation
-                * window to the first block of next group.
-                */
-
-               if ((my_rsv->rsv_start <= group_end_block) &&
-                               (my_rsv->rsv_end > group_end_block) &&
-                               (start_block >= my_rsv->rsv_start))
-                       return -1;
-
-               if ((my_rsv->rsv_alloc_hit >
-                    (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
-                       /*
-                        * if the previously allocation hit ratio is
-                        * greater than 1/2, then we double the size of
-                        * the reservation window the next time,
-                        * otherwise we keep the same size window
-                        */
-                       size = size * 2;
-                       if (size > EXT4_MAX_RESERVE_BLOCKS)
-                               size = EXT4_MAX_RESERVE_BLOCKS;
-                       my_rsv->rsv_goal_size= size;
-               }
-       }
-
-       spin_lock(rsv_lock);
-       /*
-        * shift the search start to the window near the goal block
-        */
-       search_head = search_reserve_window(fs_rsv_root, start_block);
-
-       /*
-        * find_next_reservable_window() simply finds a reservable window
-        * inside the given range(start_block, group_end_block).
-        *
-        * To make sure the reservation window has a free bit inside it, we
-        * need to check the bitmap after we found a reservable window.
-        */
-retry:
-       ret = find_next_reservable_window(search_head, my_rsv, sb,
-                                               start_block, group_end_block);
-
-       if (ret == -1) {
-               if (!rsv_is_empty(&my_rsv->rsv_window))
-                       rsv_window_remove(sb, my_rsv);
-               spin_unlock(rsv_lock);
-               return -1;
-       }
-
-       /*
-        * On success, find_next_reservable_window() returns the
-        * reservation window where there is a reservable space after it.
-        * Before we reserve this reservable space, we need
-        * to make sure there is at least a free block inside this region.
-        *
-        * searching the first free bit on the block bitmap and copy of
-        * last committed bitmap alternatively, until we found a allocatable
-        * block. Search start from the start block of the reservable space
-        * we just found.
-        */
-       spin_unlock(rsv_lock);
-       first_free_block = bitmap_search_next_usable_block(
-                       my_rsv->rsv_start - group_first_block,
-                       bitmap_bh, group_end_block - group_first_block + 1);
-
-       if (first_free_block < 0) {
-               /*
-                * no free block left on the bitmap, no point
-                * to reserve the space. return failed.
-                */
-               spin_lock(rsv_lock);
-               if (!rsv_is_empty(&my_rsv->rsv_window))
-                       rsv_window_remove(sb, my_rsv);
-               spin_unlock(rsv_lock);
-               return -1;              /* failed */
-       }
-
-       start_block = first_free_block + group_first_block;
-       /*
-        * check if the first free block is within the
-        * free space we just reserved
-        */
-       if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
-               return 0;               /* success */
-       /*
-        * if the first free bit we found is out of the reservable space
-        * continue search for next reservable space,
-        * start from where the free block is,
-        * we also shift the list head to where we stopped last time
-        */
-       search_head = my_rsv;
-       spin_lock(rsv_lock);
-       goto retry;
-}
-
-/**
- * try_to_extend_reservation()
- * @my_rsv:            given reservation window
- * @sb:                        super block
- * @size:              the delta to extend
- *
- * Attempt to expand the reservation window large enough to have
- * required number of free blocks
- *
- * Since ext4_try_to_allocate() will always allocate blocks within
- * the reservation window range, if the window size is too small,
- * multiple blocks allocation has to stop at the end of the reservation
- * window. To make this more efficient, given the total number of
- * blocks needed and the current size of the window, we try to
- * expand the reservation window size if necessary on a best-effort
- * basis before ext4_new_blocks() tries to allocate blocks,
- */
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
-                       struct super_block *sb, int size)
-{
-       struct ext4_reserve_window_node *next_rsv;
-       struct rb_node *next;
-       spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
-
-       if (!spin_trylock(rsv_lock))
-               return;
-
-       next = rb_next(&my_rsv->rsv_node);
-
-       if (!next)
-               my_rsv->rsv_end += size;
-       else {
-               next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
-
-               if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
-                       my_rsv->rsv_end += size;
-               else
-                       my_rsv->rsv_end = next_rsv->rsv_start - 1;
-       }
-       spin_unlock(rsv_lock);
-}
-
-/**
- * ext4_try_to_allocate_with_rsv()
- * @sb:                        superblock
- * @handle:            handle to this transaction
- * @group:             given allocation block group
- * @bitmap_bh:         bufferhead holds the block bitmap
- * @grp_goal:          given target block within the group
- * @count:             target number of blocks to allocate
- * @my_rsv:            reservation window
- * @errp:              pointer to store the error code
- *
- * This is the main function used to allocate a new block and its reservation
- * window.
- *
- * Each time when a new block allocation is need, first try to allocate from
- * its own reservation.  If it does not have a reservation window, instead of
- * looking for a free bit on bitmap first, then look up the reservation list to
- * see if it is inside somebody else's reservation window, we try to allocate a
- * reservation window for it starting from the goal first. Then do the block
- * allocation within the reservation window.
- *
- * This will avoid keeping on searching the reservation list again and
- * again when somebody is looking for a free block (without
- * reservation), and there are lots of free blocks, but they are all
- * being reserved.
- *
- * We use a red-black tree for the per-filesystem reservation list.
- *
- */
-static ext4_grpblk_t
-ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
-                       ext4_group_t group, struct buffer_head *bitmap_bh,
-                       ext4_grpblk_t grp_goal,
-                       struct ext4_reserve_window_node * my_rsv,
-                       unsigned long *count, int *errp)
-{
-       ext4_fsblk_t group_first_block, group_last_block;
-       ext4_grpblk_t ret = 0;
-       int fatal;
-       unsigned long num = *count;
-
-       *errp = 0;
-
-       /*
-        * Make sure we use undo access for the bitmap, because it is critical
-        * that we do the frozen_data COW on bitmap buffers in all cases even
-        * if the buffer is in BJ_Forget state in the committing transaction.
-        */
-       BUFFER_TRACE(bitmap_bh, "get undo access for new block");
-       fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
-       if (fatal) {
-               *errp = fatal;
-               return -1;
-       }
-
-       /*
-        * we don't deal with reservation when
-        * filesystem is mounted without reservation
-        * or the file is not a regular file
-        * or last attempt to allocate a block with reservation turned on failed
-        */
-       if (my_rsv == NULL ) {
-               ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
-                                               grp_goal, count, NULL);
-               goto out;
-       }
-       /*
-        * grp_goal is a group relative block number (if there is a goal)
-        * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
-        * first block is a filesystem wide block number
-        * first block is the block number of the first block in this group
-        */
-       group_first_block = ext4_group_first_block_no(sb, group);
-       group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
-       /*
-        * Basically we will allocate a new block from inode's reservation
-        * window.
-        *
-        * We need to allocate a new reservation window, if:
-        * a) inode does not have a reservation window; or
-        * b) last attempt to allocate a block from existing reservation
-        *    failed; or
-        * c) we come here with a goal and with a reservation window
-        *
-        * We do not need to allocate a new reservation window if we come here
-        * at the beginning with a goal and the goal is inside the window, or
-        * we don't have a goal but already have a reservation window.
-        * then we could go to allocate from the reservation window directly.
-        */
-       while (1) {
-               if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
-                       !goal_in_my_reservation(&my_rsv->rsv_window,
-                                               grp_goal, group, sb)) {
-                       if (my_rsv->rsv_goal_size < *count)
-                               my_rsv->rsv_goal_size = *count;
-                       ret = alloc_new_reservation(my_rsv, grp_goal, sb,
-                                                       group, bitmap_bh);
-                       if (ret < 0)
-                               break;                  /* failed */
-
-                       if (!goal_in_my_reservation(&my_rsv->rsv_window,
-                                                       grp_goal, group, sb))
-                               grp_goal = -1;
-               } else if (grp_goal >= 0) {
-                       int curr = my_rsv->rsv_end -
-                                       (grp_goal + group_first_block) + 1;
-
-                       if (curr < *count)
-                               try_to_extend_reservation(my_rsv, sb,
-                                                       *count - curr);
-               }
-
-               if ((my_rsv->rsv_start > group_last_block) ||
-                               (my_rsv->rsv_end < group_first_block)) {
-                       rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
-                       BUG();
-               }
-               ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
-                                          grp_goal, &num, &my_rsv->rsv_window);
-               if (ret >= 0) {
-                       my_rsv->rsv_alloc_hit += num;
-                       *count = num;
-                       break;                          /* succeed */
-               }
-               num = *count;
-       }
-out:
-       if (ret >= 0) {
-               BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
-                                       "bitmap block");
-               fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
-               if (fatal) {
-                       *errp = fatal;
-                       return -1;
-               }
-               return ret;
-       }
-
-       BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
-       ext4_journal_release_buffer(handle, bitmap_bh);
-       return ret;
-}
-
  /**
   * ext4_has_free_blocks()
   * @sbi:       in-core super block structure.
@@ -1610,29 +629,34 @@ out:
   * On success, return nblocks
   */
  ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
-                                               ext4_fsblk_t nblocks)
+                                               s64 nblocks)
  {
-       ext4_fsblk_t free_blocks;
-       ext4_fsblk_t root_blocks = 0;
+       s64 free_blocks, dirty_blocks;
+       s64 root_blocks = 0;
+       struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+       struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
  
-       free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+       free_blocks  = percpu_counter_read_positive(fbc);
+       dirty_blocks = percpu_counter_read_positive(dbc);
  
         if (!capable(CAP_SYS_RESOURCE) &&
                 sbi->s_resuid != current->fsuid &&
                 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
                 root_blocks = ext4_r_blocks_count(sbi->s_es);
-#ifdef CONFIG_SMP
-       if (free_blocks - root_blocks < FBC_BATCH)
-               free_blocks =
-                       percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
-#endif
-       if (free_blocks <= root_blocks)
+
+       if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+                                               EXT4_FREEBLOCKS_WATERMARK) {
+               free_blocks  = percpu_counter_sum(fbc);
+               dirty_blocks = percpu_counter_sum(dbc);
+       }
+       if (free_blocks <= (root_blocks + dirty_blocks))
                 /* we don't have free space */
                 return 0;
-       if (free_blocks - root_blocks < nblocks)
-               return free_blocks - root_blocks;
+
+       if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
+               return free_blocks - (root_blocks + dirty_blocks);
         return nblocks;
- }
+}
  
  
  /**
@@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
         return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
  }
  
-/**
- * ext4_old_new_blocks() -- core block bitmap based block allocation function
- *
- * @handle:            handle to this transaction
- * @inode:             file inode
- * @goal:              given target block(filesystem wide)
- * @count:             target number of blocks to allocate
- * @errp:              error code
- *
- * ext4_old_new_blocks uses a goal block to assist allocation and look up
- * the block bitmap directly to do block allocation.  It tries to
- * allocate block(s) from the block group contains the goal block first. If
- * that fails, it will try to allocate block(s) from other block groups
- * without any specific goal block.
- *
- * This function is called when -o nomballoc mount option is enabled
- *
- */
-ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, unsigned long *count, int *errp)
-{
-       struct buffer_head *bitmap_bh = NULL;
-       struct buffer_head *gdp_bh;
-       ext4_group_t group_no;
-       ext4_group_t goal_group;
-       ext4_grpblk_t grp_target_blk;   /* blockgroup relative goal block */
-       ext4_grpblk_t grp_alloc_blk;    /* blockgroup-relative allocated block*/
-       ext4_fsblk_t ret_block;         /* filesyetem-wide allocated block */
-       ext4_group_t bgi;                       /* blockgroup iteration index */
-       int fatal = 0, err;
-       int performed_allocation = 0;
-       ext4_grpblk_t free_blocks;      /* number of free blocks in a group */
-       struct super_block *sb;
-       struct ext4_group_desc *gdp;
-       struct ext4_super_block *es;
-       struct ext4_sb_info *sbi;
-       struct ext4_reserve_window_node *my_rsv = NULL;
-       struct ext4_block_alloc_info *block_i;
-       unsigned short windowsz = 0;
-       ext4_group_t ngroups;
-       unsigned long num = *count;
-
-       sb = inode->i_sb;
-       if (!sb) {
-               *errp = -ENODEV;
-               printk("ext4_new_block: nonexistent device");
-               return 0;
-       }
-
-       sbi = EXT4_SB(sb);
-       if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
-               /*
-                * With delalloc we already reserved the blocks
-                */
-               *count = ext4_has_free_blocks(sbi, *count);
-       }
-       if (*count == 0) {
-               *errp = -ENOSPC;
-               return 0;       /*return with ENOSPC error */
-       }
-       num = *count;
-
-       /*
-        * Check quota for allocation of this block.
-        */
-       if (DQUOT_ALLOC_BLOCK(inode, num)) {
-               *errp = -EDQUOT;
-               return 0;
-       }
-
-       sbi = EXT4_SB(sb);
-       es = EXT4_SB(sb)->s_es;
-       ext4_debug("goal=%llu.\n", goal);
-       /*
-        * Allocate a block from reservation only when
-        * filesystem is mounted with reservation(default,-o reservation), and
-        * it's a regular file, and
-        * the desired window size is greater than 0 (One could use ioctl
-        * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
-        * reservation on that particular file)
-        */
-       block_i = EXT4_I(inode)->i_block_alloc_info;
-       if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
-               my_rsv = &block_i->rsv_window_node;
-
-       /*
-        * First, test whether the goal block is free.
-        */
-       if (goal < le32_to_cpu(es->s_first_data_block) ||
-           goal >= ext4_blocks_count(es))
-               goal = le32_to_cpu(es->s_first_data_block);
-       ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
-       goal_group = group_no;
-retry_alloc:
-       gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
-       if (!gdp)
-               goto io_error;
-
-       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
-       /*
-        * if there is not enough free blocks to make a new resevation
-        * turn off reservation for this allocation
-        */
-       if (my_rsv && (free_blocks < windowsz)
-               && (rsv_is_empty(&my_rsv->rsv_window)))
-               my_rsv = NULL;
-
-       if (free_blocks > 0) {
-               bitmap_bh = ext4_read_block_bitmap(sb, group_no);
-               if (!bitmap_bh)
-                       goto io_error;
-               grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
-                                       group_no, bitmap_bh, grp_target_blk,
-                                       my_rsv, &num, &fatal);
-               if (fatal)
-                       goto out;
-               if (grp_alloc_blk >= 0)
-                       goto allocated;
-       }
-
-       ngroups = EXT4_SB(sb)->s_groups_count;
-       smp_rmb();
-
-       /*
-        * Now search the rest of the groups.  We assume that
-        * group_no and gdp correctly point to the last group visited.
-        */
-       for (bgi = 0; bgi < ngroups; bgi++) {
-               group_no++;
-               if (group_no >= ngroups)
-                       group_no = 0;
-               gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
-               if (!gdp)
-                       goto io_error;
-               free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
-               /*
-                * skip this group if the number of
-                * free blocks is less than half of the reservation
-                * window size.
-                */
-               if (free_blocks <= (windowsz/2))
-                       continue;
-
-               brelse(bitmap_bh);
-               bitmap_bh = ext4_read_block_bitmap(sb, group_no);
-               if (!bitmap_bh)
-                       goto io_error;
-               /*
-                * try to allocate block(s) from this group, without a goal(-1).
-                */
-               grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
-                                       group_no, bitmap_bh, -1, my_rsv,
-                                       &num, &fatal);
-               if (fatal)
-                       goto out;
-               if (grp_alloc_blk >= 0)
-                       goto allocated;
-       }
-       /*
-        * We may end up a bogus ealier ENOSPC error due to
-        * filesystem is "full" of reservations, but
-        * there maybe indeed free blocks avaliable on disk
-        * In this case, we just forget about the reservations
-        * just do block allocation as without reservations.
-        */
-       if (my_rsv) {
-               my_rsv = NULL;
-               windowsz = 0;
-               group_no = goal_group;
-               goto retry_alloc;
-       }
-       /* No space left on the device */
-       *errp = -ENOSPC;
-       goto out;
-
-allocated:
-
-       ext4_debug("using block group %lu(%d)\n",
-                       group_no, gdp->bg_free_blocks_count);
-
-       BUFFER_TRACE(gdp_bh, "get_write_access");
-       fatal = ext4_journal_get_write_access(handle, gdp_bh);
-       if (fatal)
-               goto out;
-
-       ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
-
-       if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
-           in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
-           in_range(ret_block, ext4_inode_table(sb, gdp),
-                    EXT4_SB(sb)->s_itb_per_group) ||
-           in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
-                    EXT4_SB(sb)->s_itb_per_group)) {
-               ext4_error(sb, "ext4_new_block",
-                           "Allocating block in system zone - "
-                           "blocks from %llu, length %lu",
-                            ret_block, num);
-               /*
-                * claim_block marked the blocks we allocated
-                * as in use. So we may want to selectively
-                * mark some of the blocks as free
-                */
-               goto retry_alloc;
-       }
-
-       performed_allocation = 1;
-
-#ifdef CONFIG_JBD2_DEBUG
-       {
-               struct buffer_head *debug_bh;
-
-               /* Record bitmap buffer state in the newly allocated block */
-               debug_bh = sb_find_get_block(sb, ret_block);
-               if (debug_bh) {
-                       BUFFER_TRACE(debug_bh, "state when allocated");
-                       BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
-                       brelse(debug_bh);
-               }
-       }
-       jbd_lock_bh_state(bitmap_bh);
-       spin_lock(sb_bgl_lock(sbi, group_no));
-       if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
-               int i;
-
-               for (i = 0; i < num; i++) {
-                       if (ext4_test_bit(grp_alloc_blk+i,
-                                       bh2jh(bitmap_bh)->b_committed_data)) {
-                               printk("%s: block was unexpectedly set in "
-                                       "b_committed_data\n", __func__);
-                       }
-               }
-       }
-       ext4_debug("found bit %d\n", grp_alloc_blk);
-       spin_unlock(sb_bgl_lock(sbi, group_no));
-       jbd_unlock_bh_state(bitmap_bh);
-#endif
-
-       if (ret_block + num - 1 >= ext4_blocks_count(es)) {
-               ext4_error(sb, "ext4_new_block",
-                           "block(%llu) >= blocks count(%llu) - "
-                           "block_group = %lu, es == %p ", ret_block,
-                       ext4_blocks_count(es), group_no, es);
-               goto out;
-       }
-
-       /*
-        * It is up to the caller to add the new buffer to a journal
-        * list of some description.  We don't know in advance whether
-        * the caller wants to use it as metadata or data.
-        */
-       spin_lock(sb_bgl_lock(sbi, group_no));
-       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-               gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-       le16_add_cpu(&gdp->bg_free_blocks_count, -num);
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
-       spin_unlock(sb_bgl_lock(sbi, group_no));
-       if (!EXT4_I(inode)->i_delalloc_reserved_flag)
-               percpu_counter_sub(&sbi->s_freeblocks_counter, num);
-
-       if (sbi->s_log_groups_per_flex) {
-               ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
-               spin_lock(sb_bgl_lock(sbi, flex_group));
-               sbi->s_flex_groups[flex_group].free_blocks -= num;
-               spin_unlock(sb_bgl_lock(sbi, flex_group));
-       }
-
-       BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
-       err = ext4_journal_dirty_metadata(handle, gdp_bh);
-       if (!fatal)
-               fatal = err;
-
-       sb->s_dirt = 1;
-       if (fatal)
-               goto out;
-
-       *errp = 0;
-       brelse(bitmap_bh);
-       DQUOT_FREE_BLOCK(inode, *count-num);
-       *count = num;
-       return ret_block;
-
-io_error:
-       *errp = -EIO;
-out:
-       if (fatal) {
-               *errp = fatal;
-               ext4_std_error(sb, fatal);
-       }
-       /*
-        * Undo the block allocation
-        */
-       if (!performed_allocation)
-               DQUOT_FREE_BLOCK(inode, *count);
-       brelse(bitmap_bh);
-       return 0;
-}
-
  #define EXT4_META_BLOCK 0x1
  
  static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
@@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
         struct ext4_allocation_request ar;
         ext4_fsblk_t ret;
  
-       if (!test_opt(inode->i_sb, MBALLOC)) {
-               return ext4_old_new_blocks(handle, inode, goal, count, errp);
-       }
-
         memset(&ar, 0, sizeof(ar));
         /* Fill with neighbour allocated blocks */
  
@@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
         /*
          * Account for the allocated meta blocks
          */
-       if (!(*errp)) {
+       if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
                 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                 EXT4_I(inode)->i_allocated_meta_blocks += *count;
                 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
                 bitmap_count += x;
         }
         brelse(bitmap_bh);
-       printk("ext4_count_free_blocks: stored = %llu"
-               ", computed = %llu, %llu\n",
-               ext4_free_blocks_count(es),
-               desc_count, bitmap_count);
+       printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
+               ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
+              desc_count, bitmap_count);
         return bitmap_count;
  #else
         desc_count = 0;
@@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
  
         if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
                         metagroup < first_meta_bg)
-               return ext4_bg_num_gdb_nometa(sb,group);
+               return ext4_bg_num_gdb_nometa(sb, group);
  
         return ext4_bg_num_gdb_meta(sb,group);
  
  }
+
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c

index d37ea67504549c8d5c36958194323b50e2dd6ce7..0a7a6663c190ef84181f6b114eeee16817cde1b6 100644 (file)
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,17 +15,17 @@
  
  static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
  
-unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
+unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars)
  {
         unsigned int i;
         unsigned long sum = 0;
  
         if (!map)
-               return (0);
+               return 0;
         for (i = 0; i < numchars; i++)
                 sum += nibblemap[map->b_data[i] & 0xf] +
                         nibblemap[(map->b_data[i] >> 4) & 0xf];
-       return (sum);
+       return sum;
  }
  
  #endif  /*  EXT4FS_DEBUG  */
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c

index ec8e33b452198ba9ab7818884a57e27ff8a20509..3ca6a2b7632dcd987db185d3fbe40926628b3d78 100644 (file)
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = {
  };
  
  static int ext4_readdir(struct file *, void *, filldir_t);
-static int ext4_dx_readdir(struct file * filp,
-                          void * dirent, filldir_t filldir);
-static int ext4_release_dir (struct inode * inode,
-                               struct file * filp);
+static int ext4_dx_readdir(struct file *filp,
+                          void *dirent, filldir_t filldir);
+static int ext4_release_dir(struct inode *inode,
+                               struct file *filp);
  
  const struct file_operations ext4_dir_operations = {
         .llseek         = generic_file_llseek,
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
  }
  
  
-int ext4_check_dir_entry (const char * function, struct inode * dir,
-                         struct ext4_dir_entry_2 * de,
-                         struct buffer_head * bh,
-                         unsigned long offset)
+int ext4_check_dir_entry(const char *function, struct inode *dir,
+                        struct ext4_dir_entry_2 *de,
+                        struct buffer_head *bh,
+                        unsigned long offset)
  {
-       const char * error_msg = NULL;
+       const char *error_msg = NULL;
         const int rlen = ext4_rec_len_from_disk(de->rec_len);
  
         if (rlen < EXT4_DIR_REC_LEN(1))
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
                 error_msg = "inode out of bounds";
  
         if (error_msg != NULL)
-               ext4_error (dir->i_sb, function,
+               ext4_error(dir->i_sb, function,
                         "bad entry in directory #%lu: %s - "
                         "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
                         dir->i_ino, error_msg, offset,
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
         return error_msg == NULL ? 1 : 0;
  }
  
-static int ext4_readdir(struct file * filp,
-                        void * dirent, filldir_t filldir)
+static int ext4_readdir(struct file *filp,
+                        void *dirent, filldir_t filldir)
  {
         int error = 0;
         unsigned long offset;
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp,
         int err;
         struct inode *inode = filp->f_path.dentry->d_inode;
         int ret = 0;
+       int dir_has_error = 0;
  
         sb = inode->i_sb;
  
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp,
                  * of recovering data when there's a bad sector
                  */
                 if (!bh) {
-                       ext4_error (sb, "ext4_readdir",
-                               "directory #%lu contains a hole at offset %lu",
-                               inode->i_ino, (unsigned long)filp->f_pos);
+                       if (!dir_has_error) {
+                               ext4_error(sb, __func__, "directory #%lu "
+                                          "contains a hole at offset %Lu",
+                                          inode->i_ino,
+                                          (unsigned long long) filp->f_pos);
+                               dir_has_error = 1;
+                       }
                         /* corrupt size?  Maybe no more blocks to read */
                         if (filp->f_pos > inode->i_blocks << 9)
                                 break;
@@ -187,14 +192,14 @@ revalidate:
                 while (!error && filp->f_pos < inode->i_size
                        && offset < sb->s_blocksize) {
                         de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
-                       if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
-                                                  bh, offset)) {
+                       if (!ext4_check_dir_entry("ext4_readdir", inode, de,
+                                                 bh, offset)) {
                                 /*
                                  * On error, skip the f_pos to the next block
                                  */
                                 filp->f_pos = (filp->f_pos |
                                                 (sb->s_blocksize - 1)) + 1;
-                               brelse (bh);
+                               brelse(bh);
                                 ret = stored;
                                 goto out;
                         }
@@ -218,12 +223,12 @@ revalidate:
                                         break;
                                 if (version != filp->f_version)
                                         goto revalidate;
-                               stored ++;
+                               stored++;
                         }
                         filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
                 }
                 offset = 0;
-               brelse (bh);
+               brelse(bh);
         }
  out:
         return ret;
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root)
                 parent = rb_parent(n);
                 fname = rb_entry(n, struct fname, rb_hash);
                 while (fname) {
-                       struct fname * old = fname;
+                       struct fname *old = fname;
                         fname = fname->next;
-                       kfree (old);
+                       kfree(old);
                 }
                 if (!parent)
                         root->rb_node = NULL;
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
                              struct ext4_dir_entry_2 *dirent)
  {
         struct rb_node **p, *parent = NULL;
-       struct fname * fname, *new_fn;
+       struct fname *fname, *new_fn;
         struct dir_private_info *info;
         int len;
  
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
   * for all entres on the fname linked list.  (Normally there is only
   * one entry on the linked list, unless there are 62 bit hash collisions.)
   */
-static int call_filldir(struct file * filp, void * dirent,
+static int call_filldir(struct file *filp, void *dirent,
                         filldir_t filldir, struct fname *fname)
  {
         struct dir_private_info *info = filp->private_data;
         loff_t  curr_pos;
         struct inode *inode = filp->f_path.dentry->d_inode;
-       struct super_block * sb;
+       struct super_block *sb;
         int error;
  
         sb = inode->i_sb;
  
         if (!fname) {
-               printk("call_filldir: called with null fname?!?\n");
+               printk(KERN_ERR "ext4: call_filldir: called with "
+                      "null fname?!?\n");
                 return 0;
         }
         curr_pos = hash2pos(fname->hash, fname->minor_hash);
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent,
         return 0;
  }
  
-static int ext4_dx_readdir(struct file * filp,
-                        void * dirent, filldir_t filldir)
+static int ext4_dx_readdir(struct file *filp,
+                        void *dirent, filldir_t filldir)
  {
         struct dir_private_info *info = filp->private_data;
         struct inode *inode = filp->f_path.dentry->d_inode;
@@ -511,7 +517,7 @@ finished:
         return 0;
  }
  
-static int ext4_release_dir (struct inode * inode, struct file * filp)
+static int ext4_release_dir(struct inode *inode, struct file *filp)
  {
         if (filp->private_data)
                 ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 295003241d3d7dbb3b4127dcbb7357c2327d0146..f46a513a515732c39d437f0e85ddc55549b209f2 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -44,9 +44,9 @@
  #ifdef EXT4FS_DEBUG
  #define ext4_debug(f, a...)                                            \
         do {                                                            \
-               printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",       \
+               printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",        \
                         __FILE__, __LINE__, __func__);                  \
-               printk (KERN_DEBUG f, ## a);                            \
+               printk(KERN_DEBUG f, ## a);                             \
         } while (0)
  #else
  #define ext4_debug(f, a...)    do {} while (0)
@@ -128,7 +128,7 @@ struct ext4_allocation_request {
  #else
  # define EXT4_BLOCK_SIZE(s)            (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
  #endif
-#define        EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
+#define        EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
  #ifdef __KERNEL__
  # define EXT4_BLOCK_SIZE_BITS(s)       ((s)->s_blocksize_bits)
  #else
@@ -245,7 +245,7 @@ struct flex_groups {
  #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
  
  #define EXT4_FL_USER_VISIBLE           0x000BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
+#define EXT4_FL_USER_MODIFIABLE                0x000B80FF /* User modifiable flags */
  
  /*
   * Inode dynamic state flags
@@ -291,8 +291,6 @@ struct ext4_new_group_data {
  #define        EXT4_IOC_SETFLAGS               FS_IOC_SETFLAGS
  #define        EXT4_IOC_GETVERSION             _IOR('f', 3, long)
  #define        EXT4_IOC_SETVERSION             _IOW('f', 4, long)
-#define EXT4_IOC_GROUP_EXTEND          _IOW('f', 7, unsigned long)
-#define EXT4_IOC_GROUP_ADD             _IOW('f', 8,struct ext4_new_group_input)
  #define        EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
  #define        EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
  #ifdef CONFIG_JBD2_DEBUG
@@ -300,7 +298,10 @@ struct ext4_new_group_data {
  #endif
  #define EXT4_IOC_GETRSVSZ              _IOR('f', 5, long)
  #define EXT4_IOC_SETRSVSZ              _IOW('f', 6, long)
-#define EXT4_IOC_MIGRATE               _IO('f', 7)
+#define EXT4_IOC_GROUP_EXTEND          _IOW('f', 7, unsigned long)
+#define EXT4_IOC_GROUP_ADD             _IOW('f', 8, struct ext4_new_group_input)
+#define EXT4_IOC_MIGRATE               _IO('f', 9)
+ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
  
  /*
   * ioctl commands in 32 bit emulation
@@ -538,7 +539,6 @@ do {                                                                               \
  #define EXT4_MOUNT_JOURNAL_CHECKSUM    0x800000 /* Journal checksums */
  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
-#define EXT4_MOUNT_MBALLOC             0x4000000 /* Buddy allocation support */
  #define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
  /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
@@ -667,7 +667,7 @@ struct ext4_super_block {
  };
  
  #ifdef __KERNEL__
-static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
+static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
  {
         return sb->s_fs_info;
  }
@@ -725,11 +725,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
   */
  
  #define EXT4_HAS_COMPAT_FEATURE(sb,mask)                       \
-       ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+       (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
  #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask)                    \
-       ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+       (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
  #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask)                     \
-       ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+       (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
  #define EXT4_SET_COMPAT_FEATURE(sb,mask)                       \
         EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
  #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask)                    \
@@ -789,6 +789,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
  #define        EXT4_DEF_RESUID         0
  #define        EXT4_DEF_RESGID         0
  
+#define EXT4_DEF_INODE_READAHEAD_BLKS  32
+
  /*
   * Default mount options
   */
@@ -954,6 +956,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
  void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
                         unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
  
+extern struct proc_dir_entry *ext4_proc_root;
+
+#ifdef CONFIG_PROC_FS
+extern const struct file_operations ext4_ui_proc_fops;
+
+#define        EXT4_PROC_HANDLER(name, var)                                    \
+do {                                                                   \
+       proc = proc_create_data(name, mode, sbi->s_proc,                \
+                               &ext4_ui_proc_fops, &sbi->s_##var);     \
+       if (proc == NULL) {                                             \
+               printk(KERN_ERR "EXT4-fs: can't create %s\n", name);    \
+               goto err_out;                                           \
+       }                                                               \
+} while (0)
+#else
+#define EXT4_PROC_HANDLER(name, var)
+#endif
+
  /*
   * Function prototypes
   */
@@ -981,23 +1001,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
  extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
                                         ext4_lblk_t iblock, ext4_fsblk_t goal,
                                         unsigned long *count, int *errp);
-extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
  extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
-                                               ext4_fsblk_t nblocks);
-extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+                                        s64 nblocks);
+extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
                         ext4_fsblk_t block, unsigned long count, int metadata);
-extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
-                                ext4_fsblk_t block, unsigned long count,
+extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
+                               ext4_fsblk_t block, unsigned long count,
                                 unsigned long *pdquot_freed_blocks);
-extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
-extern void ext4_check_blocks_bitmap (struct super_block *);
+extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+extern void ext4_check_blocks_bitmap(struct super_block *);
  extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                     ext4_group_t block_group,
                                                     struct buffer_head ** bh);
  extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-extern void ext4_init_block_alloc_info(struct inode *);
-extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
  
  /* dir.c */
  extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1009,20 +1026,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
  extern void ext4_htree_free_dir_info(struct dir_private_info *p);
  
  /* fsync.c */
-extern int ext4_sync_file (struct file *, struct dentry *, int);
+extern int ext4_sync_file(struct file *, struct dentry *, int);
  
  /* hash.c */
  extern int ext4fs_dirhash(const char *name, int len, struct
                           dx_hash_info *hinfo);
  
  /* ialloc.c */
-extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
-extern void ext4_free_inode (handle_t *, struct inode *);
-extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
-extern unsigned long ext4_count_free_inodes (struct super_block *);
-extern unsigned long ext4_count_dirs (struct super_block *);
-extern void ext4_check_inodes_bitmap (struct super_block *);
-extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
+extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
+extern void ext4_free_inode(handle_t *, struct inode *);
+extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
+extern unsigned long ext4_count_free_inodes(struct super_block *);
+extern unsigned long ext4_count_dirs(struct super_block *);
+extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
  
  /* mballoc.c */
  extern long ext4_mb_stats;
@@ -1032,7 +1049,7 @@ extern int ext4_mb_release(struct super_block *);
  extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
                                 struct ext4_allocation_request *, int *);
  extern int ext4_mb_reserve_blocks(struct super_block *, int);
-extern void ext4_mb_discard_inode_preallocations(struct inode *);
+extern void ext4_discard_preallocations(struct inode *);
  extern int __init init_ext4_mballoc(void);
  extern void exit_ext4_mballoc(void);
  extern void ext4_mb_free_blocks(handle_t *, struct inode *,
@@ -1050,24 +1067,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
                                                 ext4_lblk_t, int, int *);
  struct buffer_head *ext4_bread(handle_t *, struct inode *,
                                                 ext4_lblk_t, int, int *);
+int ext4_get_block(struct inode *inode, sector_t iblock,
+                               struct buffer_head *bh_result, int create);
  int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
                                 ext4_lblk_t iblock, unsigned long maxblocks,
                                 struct buffer_head *bh_result,
                                 int create, int extend_disksize);
  
  extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int  ext4_write_inode (struct inode *, int);
-extern int  ext4_setattr (struct dentry *, struct iattr *);
+extern int  ext4_write_inode(struct inode *, int);
+extern int  ext4_setattr(struct dentry *, struct iattr *);
  extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
                                 struct kstat *stat);
-extern void ext4_delete_inode (struct inode *);
-extern int  ext4_sync_inode (handle_t *, struct inode *);
-extern void ext4_discard_reservation (struct inode *);
+extern void ext4_delete_inode(struct inode *);
+extern int  ext4_sync_inode(handle_t *, struct inode *);
  extern void ext4_dirty_inode(struct inode *);
  extern int ext4_change_inode_journal_flag(struct inode *, int);
  extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
  extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate (struct inode *);
+extern void ext4_truncate(struct inode *);
  extern void ext4_set_inode_flags(struct inode *);
  extern void ext4_get_inode_flags(struct ext4_inode_info *);
  extern void ext4_set_aops(struct inode *inode);
@@ -1080,11 +1098,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
  
  /* ioctl.c */
  extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
-extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
+extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
  
  /* migrate.c */
-extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
-                      unsigned long);
+extern int ext4_ext_migrate(struct inode *);
  /* namei.c */
  extern int ext4_orphan_add(handle_t *, struct inode *);
  extern int ext4_orphan_del(handle_t *, struct inode *);
@@ -1099,14 +1116,14 @@ extern int ext4_group_extend(struct super_block *sb,
                                 ext4_fsblk_t n_blocks_count);
  
  /* super.c */
-extern void ext4_error (struct super_block *, const char *, const char *, ...)
+extern void ext4_error(struct super_block *, const char *, const char *, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_std_error (struct super_block *, const char *, int);
-extern void ext4_abort (struct super_block *, const char *, const char *, ...)
+extern void __ext4_std_error(struct super_block *, const char *, int);
+extern void ext4_abort(struct super_block *, const char *, const char *, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern void ext4_warning (struct super_block *, const char *, const char *, ...)
+extern void ext4_warning(struct super_block *, const char *, const char *, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern void ext4_update_dynamic_rev (struct super_block *sb);
+extern void ext4_update_dynamic_rev(struct super_block *sb);
  extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
                                         __u32 compat);
  extern int ext4_update_rocompat_feature(handle_t *handle,
@@ -1179,7 +1196,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
  
  static inline
  struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
-                                                       ext4_group_t group)
+                                           ext4_group_t group)
  {
          struct ext4_group_info ***grp_info;
          long indexv, indexh;
@@ -1207,6 +1224,28 @@ do {                                                             \
                 __ext4_std_error((sb), __func__, (errno));      \
  } while (0)
  
+#ifdef CONFIG_SMP
+/* Each CPU can accumulate FBC_BATCH blocks in their local
+ * counters. So we need to make sure we have free blocks more
+ * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
+ */
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#else
+#define EXT4_FREEBLOCKS_WATERMARK 0
+#endif
+
+static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
+{
+       /*
+        * XXX: replace with spinlock if seen contended -bzzz
+        */
+       down_write(&EXT4_I(inode)->i_data_sem);
+       if (newsize > EXT4_I(inode)->i_disksize)
+               EXT4_I(inode)->i_disksize = newsize;
+       up_write(&EXT4_I(inode)->i_data_sem);
+       return ;
+}
+
  /*
   * Inodes and files operations
   */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h

index d33dc56d6986e785ea6e75f9a256cf2191fc6b74..bec7ce59fc0d051cbdd3cdbab556ce122ada72db 100644 (file)
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -124,6 +124,19 @@ struct ext4_ext_path {
  #define EXT4_EXT_CACHE_GAP     1
  #define EXT4_EXT_CACHE_EXTENT  2
  
+/*
+ * to be called by ext4_ext_walk_space()
+ * negative retcode - error
+ * positive retcode - signal for ext4_ext_walk_space(), see below
+ * callback must return valid extent (passed or newly created)
+ */
+typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+                                       struct ext4_ext_cache *,
+                                       struct ext4_extent *, void *);
+
+#define EXT_CONTINUE   0
+#define EXT_BREAK      1
+#define EXT_REPEAT     2
  
  #define EXT_MAX_BLOCK  0xffffffff
  
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode,
                                  struct ext4_extent *);
  extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
  extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
+                                                       ext_prepare_callback, void *);
  extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
                                                         struct ext4_ext_path *);
  extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h

index ef7409f0e7e475fde5afb303b68cdcd7c05e8b1c..5c124c0ac6d3c5b4a94aa754dc37ee5542b65514 100644 (file)
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t;
  /* data type for block group number */
  typedef unsigned long ext4_group_t;
  
-struct ext4_reserve_window {
-       ext4_fsblk_t    _rsv_start;     /* First byte reserved */
-       ext4_fsblk_t    _rsv_end;       /* Last byte reserved or 0 */
-};
-
-struct ext4_reserve_window_node {
-       struct rb_node          rsv_node;
-       __u32                   rsv_goal_size;
-       __u32                   rsv_alloc_hit;
-       struct ext4_reserve_window      rsv_window;
-};
-
-struct ext4_block_alloc_info {
-       /* information about reservation window */
-       struct ext4_reserve_window_node rsv_window_node;
-       /*
-        * was i_next_alloc_block in ext4_inode_info
-        * is the logical (file-relative) number of the
-        * most-recently-allocated block in this file.
-        * We use this for detecting linearly ascending allocation requests.
-        */
-       ext4_lblk_t last_alloc_logical_block;
-       /*
-        * Was i_next_alloc_goal in ext4_inode_info
-        * is the *physical* companion to i_next_alloc_block.
-        * it the physical block number of the block which was most-recentl
-        * allocated to this file.  This give us the goal (target) for the next
-        * allocation when we detect linearly ascending requests.
-        */
-       ext4_fsblk_t last_alloc_physical_block;
-};
-
  #define rsv_start rsv_window._rsv_start
  #define rsv_end rsv_window._rsv_end
  
@@ -97,11 +65,8 @@ struct ext4_inode_info {
         ext4_group_t    i_block_group;
         __u32   i_state;                /* Dynamic state flags for ext4 */
  
-       /* block reservation info */
-       struct ext4_block_alloc_info *i_block_alloc_info;
-
         ext4_lblk_t             i_dir_start_lookup;
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         /*
          * Extended attributes can be read independently of the main file
          * data. Taking i_mutex even when reading would cause contention
@@ -111,7 +76,7 @@ struct ext4_inode_info {
          */
         struct rw_semaphore xattr_sem;
  #endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         struct posix_acl        *i_acl;
         struct posix_acl        *i_default_acl;
  #endif
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h

index 6300226d55313d168fed238287744142204e2456..6a0b40d43264b232a806f0e265150343c6375077 100644 (file)
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -40,8 +40,8 @@ struct ext4_sb_info {
         unsigned long s_blocks_last;    /* Last seen block count */
         loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
         struct buffer_head * s_sbh;     /* Buffer containing the super block */
-       struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
-       struct buffer_head ** s_group_desc;
+       struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
+       struct buffer_head **s_group_desc;
         unsigned long  s_mount_opt;
         ext4_fsblk_t s_sb_block;
         uid_t s_resuid;
@@ -52,6 +52,7 @@ struct ext4_sb_info {
         int s_desc_per_block_bits;
         int s_inode_size;
         int s_first_ino;
+       unsigned int s_inode_readahead_blks;
         spinlock_t s_next_gen_lock;
         u32 s_next_generation;
         u32 s_hash_seed[4];
@@ -59,16 +60,17 @@ struct ext4_sb_info {
         struct percpu_counter s_freeblocks_counter;
         struct percpu_counter s_freeinodes_counter;
         struct percpu_counter s_dirs_counter;
+       struct percpu_counter s_dirtyblocks_counter;
         struct blockgroup_lock s_blockgroup_lock;
+       struct proc_dir_entry *s_proc;
  
         /* root of the per fs reservation window tree */
         spinlock_t s_rsv_window_lock;
         struct rb_root s_rsv_window_root;
-       struct ext4_reserve_window_node s_rsv_window_head;
  
         /* Journaling */
-       struct inode * s_journal_inode;
-       struct journal_s * s_journal;
+       struct inode *s_journal_inode;
+       struct journal_s *s_journal;
         struct list_head s_orphan;
         unsigned long s_commit_interval;
         struct block_device *journal_bdev;
@@ -106,12 +108,12 @@ struct ext4_sb_info {
  
         /* tunables */
         unsigned long s_stripe;
-       unsigned long s_mb_stream_request;
-       unsigned long s_mb_max_to_scan;
-       unsigned long s_mb_min_to_scan;
-       unsigned long s_mb_stats;
-       unsigned long s_mb_order2_reqs;
-       unsigned long s_mb_group_prealloc;
+       unsigned int s_mb_stream_request;
+       unsigned int s_mb_max_to_scan;
+       unsigned int s_mb_min_to_scan;
+       unsigned int s_mb_stats;
+       unsigned int s_mb_order2_reqs;
+       unsigned int s_mb_group_prealloc;
         /* where last allocation was done - for stream allocation */
         unsigned long s_mb_last_group;
         unsigned long s_mb_last_start;
@@ -121,7 +123,6 @@ struct ext4_sb_info {
         int s_mb_history_cur;
         int s_mb_history_max;
         int s_mb_history_num;
-       struct proc_dir_entry *s_mb_proc;
         spinlock_t s_mb_history_lock;
         int s_mb_history_filter;
  
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index b24d3c53f20cd20407d5d5373c79b725d308cc45..ea2ce3c0ae66ec47db52596ed6d5c22cf844ba65 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -40,6 +40,7 @@
  #include <linux/slab.h>
  #include <linux/falloc.h>
  #include <asm/uaccess.h>
+#include <linux/fiemap.h>
  #include "ext4_jbd2.h"
  #include "ext4_extents.h"
  
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
         ext_debug("\n");
  }
  #else
-#define ext4_ext_show_path(inode,path)
-#define ext4_ext_show_leaf(inode,path)
+#define ext4_ext_show_path(inode, path)
+#define ext4_ext_show_leaf(inode, path)
  #endif
  
  void ext4_ext_drop_refs(struct ext4_ext_path *path)
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode,
                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
                   if (k != 0 &&
                       le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
-                               printk("k=%d, ix=0x%p, first=0x%p\n", k,
-                                       ix, EXT_FIRST_INDEX(eh));
-                               printk("%u <= %u\n",
+                               printk(KERN_DEBUG "k=%d, ix=0x%p, "
+                                      "first=0x%p\n", k,
+                                      ix, EXT_FIRST_INDEX(eh));
+                               printk(KERN_DEBUG "%u <= %u\n",
                                        le32_to_cpu(ix->ei_block),
                                        le32_to_cpu(ix[-1].ei_block));
                         }
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *newext)
  {
-       struct ext4_extent_header * eh;
+       struct ext4_extent_header *eh;
         struct ext4_extent *ex, *fex;
         struct ext4_extent *nearex; /* nearest extent */
         struct ext4_ext_path *npath = NULL;
@@ -1625,6 +1627,113 @@ cleanup:
         return err;
  }
  
+int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+                       ext4_lblk_t num, ext_prepare_callback func,
+                       void *cbdata)
+{
+       struct ext4_ext_path *path = NULL;
+       struct ext4_ext_cache cbex;
+       struct ext4_extent *ex;
+       ext4_lblk_t next, start = 0, end = 0;
+       ext4_lblk_t last = block + num;
+       int depth, exists, err = 0;
+
+       BUG_ON(func == NULL);
+       BUG_ON(inode == NULL);
+
+       while (block < last && block != EXT_MAX_BLOCK) {
+               num = last - block;
+               /* find extent for this block */
+               path = ext4_ext_find_extent(inode, block, path);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       path = NULL;
+                       break;
+               }
+
+               depth = ext_depth(inode);
+               BUG_ON(path[depth].p_hdr == NULL);
+               ex = path[depth].p_ext;
+               next = ext4_ext_next_allocated_block(path);
+
+               exists = 0;
+               if (!ex) {
+                       /* there is no extent yet, so try to allocate
+                        * all requested space */
+                       start = block;
+                       end = block + num;
+               } else if (le32_to_cpu(ex->ee_block) > block) {
+                       /* need to allocate space before found extent */
+                       start = block;
+                       end = le32_to_cpu(ex->ee_block);
+                       if (block + num < end)
+                               end = block + num;
+               } else if (block >= le32_to_cpu(ex->ee_block)
+                                       + ext4_ext_get_actual_len(ex)) {
+                       /* need to allocate space after found extent */
+                       start = block;
+                       end = block + num;
+                       if (end >= next)
+                               end = next;
+               } else if (block >= le32_to_cpu(ex->ee_block)) {
+                       /*
+                        * some part of requested space is covered
+                        * by found extent
+                        */
+                       start = block;
+                       end = le32_to_cpu(ex->ee_block)
+                               + ext4_ext_get_actual_len(ex);
+                       if (block + num < end)
+                               end = block + num;
+                       exists = 1;
+               } else {
+                       BUG();
+               }
+               BUG_ON(end <= start);
+
+               if (!exists) {
+                       cbex.ec_block = start;
+                       cbex.ec_len = end - start;
+                       cbex.ec_start = 0;
+                       cbex.ec_type = EXT4_EXT_CACHE_GAP;
+               } else {
+                       cbex.ec_block = le32_to_cpu(ex->ee_block);
+                       cbex.ec_len = ext4_ext_get_actual_len(ex);
+                       cbex.ec_start = ext_pblock(ex);
+                       cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
+               }
+
+               BUG_ON(cbex.ec_len == 0);
+               err = func(inode, path, &cbex, ex, cbdata);
+               ext4_ext_drop_refs(path);
+
+               if (err < 0)
+                       break;
+
+               if (err == EXT_REPEAT)
+                       continue;
+               else if (err == EXT_BREAK) {
+                       err = 0;
+                       break;
+               }
+
+               if (ext_depth(inode) != depth) {
+                       /* depth was changed. we have to realloc path */
+                       kfree(path);
+                       path = NULL;
+               }
+
+               block = cbex.ec_block + cbex.ec_len;
+       }
+
+       if (path) {
+               ext4_ext_drop_refs(path);
+               kfree(path);
+       }
+
+       return err;
+}
+
  static void
  ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
                         __u32 len, ext4_fsblk_t start, int type)
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb)
          */
  
         if (test_opt(sb, EXTENTS)) {
-               printk("EXT4-fs: file extents enabled");
+               printk(KERN_INFO "EXT4-fs: file extents enabled");
  #ifdef AGGRESSIVE_TEST
                 printk(", aggressive tests");
  #endif
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                 goto out2;
         }
         /*
-        * Okay, we need to do block allocation.  Lazily initialize the block
-        * allocation info here if necessary.
+        * Okay, we need to do block allocation.
          */
-       if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
-               ext4_init_block_alloc_info(inode);
  
         /* find neighbour allocated blocks */
         ar.lleft = iblock;
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                 /* free data blocks we just allocated */
                 /* not a good idea to call discard here directly,
                  * but otherwise we'd need to call it every free() */
-               ext4_mb_discard_inode_preallocations(inode);
+               ext4_discard_preallocations(inode);
                 ext4_free_blocks(handle, inode, ext_pblock(&newex),
                                         ext4_ext_get_actual_len(&newex), 0);
                 goto out2;
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode)
         down_write(&EXT4_I(inode)->i_data_sem);
         ext4_ext_invalidate_cache(inode);
  
-       ext4_discard_reservation(inode);
+       ext4_discard_preallocations(inode);
  
         /*
          * TODO: optimization is possible here.
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode,
          * Update only when preallocation was requested beyond
          * the file size.
          */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-                               new_size > i_size_read(inode)) {
-               i_size_write(inode, new_size);
-               EXT4_I(inode)->i_disksize = new_size;
+       if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+               if (new_size > i_size_read(inode))
+                       i_size_write(inode, new_size);
+               if (new_size > EXT4_I(inode)->i_disksize)
+                       ext4_update_i_disksize(inode, new_size);
         }
  
  }
@@ -2972,3 +3079,143 @@ retry:
         mutex_unlock(&inode->i_mutex);
         return ret > 0 ? ret2 : ret;
  }
+
+/*
+ * Callback function called for each extent to gather FIEMAP information.
+ */
+int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+                      struct ext4_ext_cache *newex, struct ext4_extent *ex,
+                      void *data)
+{
+       struct fiemap_extent_info *fieinfo = data;
+       unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+       __u64   logical;
+       __u64   physical;
+       __u64   length;
+       __u32   flags = 0;
+       int     error;
+
+       logical =  (__u64)newex->ec_block << blksize_bits;
+
+       if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
+               pgoff_t offset;
+               struct page *page;
+               struct buffer_head *bh = NULL;
+
+               offset = logical >> PAGE_SHIFT;
+               page = find_get_page(inode->i_mapping, offset);
+               if (!page || !page_has_buffers(page))
+                       return EXT_CONTINUE;
+
+               bh = page_buffers(page);
+
+               if (!bh)
+                       return EXT_CONTINUE;
+
+               if (buffer_delay(bh)) {
+                       flags |= FIEMAP_EXTENT_DELALLOC;
+                       page_cache_release(page);
+               } else {
+                       page_cache_release(page);
+                       return EXT_CONTINUE;
+               }
+       }
+
+       physical = (__u64)newex->ec_start << blksize_bits;
+       length =   (__u64)newex->ec_len << blksize_bits;
+
+       if (ex && ext4_ext_is_uninitialized(ex))
+               flags |= FIEMAP_EXTENT_UNWRITTEN;
+
+       /*
+        * If this extent reaches EXT_MAX_BLOCK, it must be last.
+        *
+        * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
+        * this also indicates no more allocated blocks.
+        *
+        * XXX this might miss a single-block extent at EXT_MAX_BLOCK
+        */
+       if (logical + length - 1 == EXT_MAX_BLOCK ||
+           ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+               flags |= FIEMAP_EXTENT_LAST;
+
+       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+                                       length, flags);
+       if (error < 0)
+               return error;
+       if (error == 1)
+               return EXT_BREAK;
+
+       return EXT_CONTINUE;
+}
+
+/* fiemap flags we can handle specified here */
+#define EXT4_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
+{
+       __u64 physical = 0;
+       __u64 length;
+       __u32 flags = FIEMAP_EXTENT_LAST;
+       int blockbits = inode->i_sb->s_blocksize_bits;
+       int error = 0;
+
+       /* in-inode? */
+       if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+               struct ext4_iloc iloc;
+               int offset;     /* offset of xattr in inode */
+
+               error = ext4_get_inode_loc(inode, &iloc);
+               if (error)
+                       return error;
+               physical = iloc.bh->b_blocknr << blockbits;
+               offset = EXT4_GOOD_OLD_INODE_SIZE +
+                               EXT4_I(inode)->i_extra_isize;
+               physical += offset;
+               length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
+               flags |= FIEMAP_EXTENT_DATA_INLINE;
+       } else { /* external block */
+               physical = EXT4_I(inode)->i_file_acl << blockbits;
+               length = inode->i_sb->s_blocksize;
+       }
+
+       if (physical)
+               error = fiemap_fill_next_extent(fieinfo, 0, physical,
+                                               length, flags);
+       return (error < 0 ? error : 0);
+}
+
+int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len)
+{
+       ext4_lblk_t start_blk;
+       ext4_lblk_t len_blks;
+       int error = 0;
+
+       /* fallback to generic here if not in extents fmt */
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+               return generic_block_fiemap(inode, fieinfo, start, len,
+                       ext4_get_block);
+
+       if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
+               return -EBADR;
+
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               error = ext4_xattr_fiemap(inode, fieinfo);
+       } else {
+               start_blk = start >> inode->i_sb->s_blocksize_bits;
+               len_blks = len >> inode->i_sb->s_blocksize_bits;
+
+               /*
+                * Walk the extent tree gathering extent information.
+                * ext4_ext_fiemap_cb will push extents back to user.
+                */
+               down_write(&EXT4_I(inode)->i_data_sem);
+               error = ext4_ext_walk_space(inode, start_blk, len_blks,
+                                         ext4_ext_fiemap_cb, fieinfo);
+               up_write(&EXT4_I(inode)->i_data_sem);
+       }
+
+       return error;
+}
+
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index 430eb7978db4c92f0503b92895a59000d8f97b3e..6bd11fba71f7dfb5fb641c0c89a672945500b15a 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -31,14 +31,14 @@
   * from ext4_file_open: open gets called at every open, but release
   * gets called only when /all/ the files are closed.
   */
-static int ext4_release_file (struct inode * inode, struct file * filp)
+static int ext4_release_file(struct inode *inode, struct file *filp)
  {
         /* if we are the last writer on the inode, drop the block reservation */
         if ((filp->f_mode & FMODE_WRITE) &&
                         (atomic_read(&inode->i_writecount) == 1))
         {
                 down_write(&EXT4_I(inode)->i_data_sem);
-               ext4_discard_reservation(inode);
+               ext4_discard_preallocations(inode);
                 up_write(&EXT4_I(inode)->i_data_sem);
         }
         if (is_dx(inode) && filp->private_data)
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
         return 0;
  }
  
+extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len);
+
  const struct file_operations ext4_file_operations = {
         .llseek         = generic_file_llseek,
         .read           = do_sync_read,
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = {
         .truncate       = ext4_truncate,
         .setattr        = ext4_setattr,
         .getattr        = ext4_getattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = ext4_listxattr,
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = {
  #endif
         .permission     = ext4_permission,
         .fallocate      = ext4_fallocate,
+       .fiemap         = ext4_fiemap,
  };
  
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index a45c3737ad31e69e9de6c98075ff98fabe3b0770..5afe4370840b2b0b7c683a7d738baf648eae533b 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,6 +28,7 @@
  #include <linux/writeback.h>
  #include <linux/jbd2.h>
  #include <linux/blkdev.h>
+#include <linux/marker.h>
  #include "ext4.h"
  #include "ext4_jbd2.h"
  
@@ -43,7 +44,7 @@
   * inode to disk.
   */
  
-int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
  {
         struct inode *inode = dentry->d_inode;
         journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
  
         J_ASSERT(ext4_journal_current_handle() == NULL);
  
+       trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
+                  inode->i_sb->s_id, datasync, inode->i_ino,
+                  dentry->d_parent->d_inode->i_ino);
+
         /*
          * data=writeback:
          *  The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c

index 1d6329dbe3906551929c4c8ea61e1878edfde08d..556ca8eba3db0d97a71d05256f555688e390e376 100644 (file)
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
                 sum += DELTA;
                 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
                 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
-       } while(--n);
+       } while (--n);
  
         buf[0] += b0;
         buf[1] += b1;
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
  
  
  /* The old legacy hash */
-static __u32 dx_hack_hash (const char *name, int len)
+static __u32 dx_hack_hash(const char *name, int len)
  {
         __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
         while (len--) {
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
         val = pad;
         if (len > num*4)
                 len = num * 4;
-       for (i=0; i < len; i++) {
+       for (i = 0; i < len; i++) {
                 if ((i % 4) == 0)
                         val = pad;
                 val = msg[i] + (val << 8);
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
  
         /* Check to see if the seed is all zero's */
         if (hinfo->seed) {
-               for (i=0; i < 4; i++) {
+               for (i = 0; i < 4; i++) {
                         if (hinfo->seed[i])
                                 break;
                 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index f344834bbf58a4f0228b5ab85243d8bba1b1d648..fe34d74cfb19f89bd8435690511b54529982d984 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                             block_group, bitmap_blk);
                 return NULL;
         }
-       if (bh_uptodate_or_lock(bh))
+       if (buffer_uptodate(bh) &&
+           !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
                 return bh;
  
+       lock_buffer(bh);
         spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
         if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
                 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
   * though), and then we'd have two inodes sharing the
   * same inode number and space on the harddisk.
   */
-void ext4_free_inode (handle_t *handle, struct inode * inode)
+void ext4_free_inode(handle_t *handle, struct inode *inode)
  {
-       struct super_block * sb = inode->i_sb;
+       struct super_block *sb = inode->i_sb;
         int is_directory;
         unsigned long ino;
         struct buffer_head *bitmap_bh = NULL;
         struct buffer_head *bh2;
         ext4_group_t block_group;
         unsigned long bit;
-       struct ext4_group_desc * gdp;
-       struct ext4_super_block * es;
+       struct ext4_group_desc *gdp;
+       struct ext4_super_block *es;
         struct ext4_sb_info *sbi;
         int fatal = 0, err;
         ext4_group_t flex_group;
  
         if (atomic_read(&inode->i_count) > 1) {
-               printk ("ext4_free_inode: inode has count=%d\n",
-                                       atomic_read(&inode->i_count));
+               printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
+                      atomic_read(&inode->i_count));
                 return;
         }
         if (inode->i_nlink) {
-               printk ("ext4_free_inode: inode has nlink=%d\n",
-                       inode->i_nlink);
+               printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
+                      inode->i_nlink);
                 return;
         }
         if (!sb) {
-               printk("ext4_free_inode: inode on nonexistent device\n");
+               printk(KERN_ERR "ext4_free_inode: inode on "
+                      "nonexistent device\n");
                 return;
         }
         sbi = EXT4_SB(sb);
  
         ino = inode->i_ino;
-       ext4_debug ("freeing inode %lu\n", ino);
+       ext4_debug("freeing inode %lu\n", ino);
  
         /*
          * Note: we must free any quota before locking the superblock,
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
         is_directory = S_ISDIR(inode->i_mode);
  
         /* Do this BEFORE marking the inode not in use or returning an error */
-       clear_inode (inode);
+       clear_inode(inode);
  
         es = EXT4_SB(sb)->s_es;
         if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-               ext4_error (sb, "ext4_free_inode",
-                           "reserved or nonexistent inode %lu", ino);
+               ext4_error(sb, "ext4_free_inode",
+                          "reserved or nonexistent inode %lu", ino);
                 goto error_return;
         }
         block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
         /* Ok, now we can actually update the inode bitmaps.. */
         if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
                                         bit, bitmap_bh->b_data))
-               ext4_error (sb, "ext4_free_inode",
-                             "bit already cleared for inode %lu", ino);
+               ext4_error(sb, "ext4_free_inode",
+                          "bit already cleared for inode %lu", ino);
         else {
-               gdp = ext4_get_group_desc (sb, block_group, &bh2);
+               gdp = ext4_get_group_desc(sb, block_group, &bh2);
  
                 BUFFER_TRACE(bh2, "get_write_access");
                 fatal = ext4_journal_get_write_access(handle, bh2);
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
         avefreei = freei / ngroups;
  
         for (group = 0; group < ngroups; group++) {
-               desc = ext4_get_group_desc (sb, group, NULL);
+               desc = ext4_get_group_desc(sb, group, NULL);
                 if (!desc || !desc->bg_free_inodes_count)
                         continue;
                 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
   * For other inodes, search forward from the parent directory's block
   * group to find a free inode.
   */
-struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
  {
         struct super_block *sb;
         struct buffer_head *bitmap_bh = NULL;
         struct buffer_head *bh2;
         ext4_group_t group = 0;
         unsigned long ino = 0;
-       struct inode * inode;
-       struct ext4_group_desc * gdp = NULL;
-       struct ext4_super_block * es;
+       struct inode *inode;
+       struct ext4_group_desc *gdp = NULL;
+       struct ext4_super_block *es;
         struct ext4_inode_info *ei;
         struct ext4_sb_info *sbi;
         int ret2, err = 0;
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
         }
  
         if (S_ISDIR(mode)) {
-               if (test_opt (sb, OLDALLOC))
+               if (test_opt(sb, OLDALLOC))
                         ret2 = find_group_dir(sb, dir, &group);
                 else
                         ret2 = find_group_orlov(sb, dir, &group);
@@ -783,7 +786,7 @@ got:
         }
  
         inode->i_uid = current->fsuid;
-       if (test_opt (sb, GRPID))
+       if (test_opt(sb, GRPID))
                 inode->i_gid = dir->i_gid;
         else if (dir->i_mode & S_ISGID) {
                 inode->i_gid = dir->i_gid;
@@ -816,7 +819,6 @@ got:
                 ei->i_flags &= ~EXT4_DIRSYNC_FL;
         ei->i_file_acl = 0;
         ei->i_dtime = 0;
-       ei->i_block_alloc_info = NULL;
         ei->i_block_group = group;
  
         ext4_set_inode_flags(inode);
@@ -832,7 +834,7 @@ got:
         ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
  
         ret = inode;
-       if(DQUOT_ALLOC_INODE(inode)) {
+       if (DQUOT_ALLOC_INODE(inode)) {
                 err = -EDQUOT;
                 goto fail_drop;
         }
@@ -841,7 +843,7 @@ got:
         if (err)
                 goto fail_free_drop;
  
-       err = ext4_init_security(handle,inode, dir);
+       err = ext4_init_security(handle, inode, dir);
         if (err)
                 goto fail_free_drop;
  
@@ -959,7 +961,7 @@ error:
         return ERR_PTR(err);
  }
  
-unsigned long ext4_count_free_inodes (struct super_block * sb)
+unsigned long ext4_count_free_inodes(struct super_block *sb)
  {
         unsigned long desc_count;
         struct ext4_group_desc *gdp;
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
         bitmap_count = 0;
         gdp = NULL;
         for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
-               gdp = ext4_get_group_desc (sb, i, NULL);
+               gdp = ext4_get_group_desc(sb, i, NULL);
                 if (!gdp)
                         continue;
                 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
                 bitmap_count += x;
         }
         brelse(bitmap_bh);
-       printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
-               le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+       printk(KERN_DEBUG "ext4_count_free_inodes: "
+              "stored = %u, computed = %lu, %lu\n",
+              le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
         return desc_count;
  #else
         desc_count = 0;
         for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
-               gdp = ext4_get_group_desc (sb, i, NULL);
+               gdp = ext4_get_group_desc(sb, i, NULL);
                 if (!gdp)
                         continue;
                 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
  }
  
  /* Called at mount-time, super-block is locked */
-unsigned long ext4_count_dirs (struct super_block * sb)
+unsigned long ext4_count_dirs(struct super_block * sb)
  {
         unsigned long count = 0;
         ext4_group_t i;
  
         for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
-               struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
+               struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
                 if (!gdp)
                         continue;
                 count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 7e91913e325bb511086c5a2ddb0ddeda0bc28e01..9b4ec9decfd1b6020c13bbd57d86af7eab46006b 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
  /*
   * Called at the last iput() if i_nlink is zero.
   */
-void ext4_delete_inode (struct inode * inode)
+void ext4_delete_inode(struct inode *inode)
  {
         handle_t *handle;
         int err;
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode,
         int final = 0;
  
         if (i_block < 0) {
-               ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
+               ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
         } else if (i_block < direct_blocks) {
                 offsets[n++] = i_block;
                 final = direct_blocks;
-       } else if ( (i_block -= direct_blocks) < indirect_blocks) {
+       } else if ((i_block -= direct_blocks) < indirect_blocks) {
                 offsets[n++] = EXT4_IND_BLOCK;
                 offsets[n++] = i_block;
                 final = ptrs;
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
  
         *err = 0;
         /* i_data is not going away, no lock needed */
-       add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
+       add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
         if (!p->key)
                 goto no_block;
         while (--depth) {
                 bh = sb_bread(sb, le32_to_cpu(p->key));
                 if (!bh)
                         goto failure;
-               add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+               add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
                 /* Reader: end */
                 if (!p->key)
                         goto no_block;
@@ -443,7 +443,7 @@ no_block:
  static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
  {
         struct ext4_inode_info *ei = EXT4_I(inode);
-       __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
+       __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
         __le32 *p;
         ext4_fsblk_t bg_start;
         ext4_fsblk_t last_block;
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
  static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
                 Indirect *partial)
  {
-       struct ext4_block_alloc_info *block_i;
-
-       block_i =  EXT4_I(inode)->i_block_alloc_info;
-
         /*
-        * try the heuristic for sequential allocation,
-        * failing that at least try to get decent locality.
+        * XXX need to get goal block from mballoc's data structures
          */
-       if (block_i && (block == block_i->last_alloc_logical_block + 1)
-               && (block_i->last_alloc_physical_block != 0)) {
-               return block_i->last_alloc_physical_block + 1;
-       }
  
         return ext4_find_near(inode, partial);
  }
@@ -630,7 +621,7 @@ allocated:
         *err = 0;
         return ret;
  failed_out:
-       for (i = 0; i <index; i++)
+       for (i = 0; i < index; i++)
                 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
         return ret;
  }
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                 branch[n].p = (__le32 *) bh->b_data + offsets[n];
                 branch[n].key = cpu_to_le32(new_blocks[n]);
                 *branch[n].p = branch[n].key;
-               if ( n == indirect_blks) {
+               if (n == indirect_blks) {
                         current_block = new_blocks[n];
                         /*
                          * End of chain, update the last new metablock of
@@ -730,7 +721,7 @@ failed:
                 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
                 ext4_journal_forget(handle, branch[i].bh);
         }
-       for (i = 0; i <indirect_blks; i++)
+       for (i = 0; i < indirect_blks; i++)
                 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
  
         ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
  {
         int i;
         int err = 0;
-       struct ext4_block_alloc_info *block_i;
         ext4_fsblk_t current_block;
  
-       block_i = EXT4_I(inode)->i_block_alloc_info;
         /*
          * If we're splicing into a [td]indirect block (as opposed to the
          * inode) then we need to get write access to the [td]indirect block
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
         if (num == 0 && blks > 1) {
                 current_block = le32_to_cpu(where->key) + 1;
                 for (i = 1; i < blks; i++)
-                       *(where->p + i ) = cpu_to_le32(current_block++);
-       }
-
-       /*
-        * update the most recently allocated logical & physical block
-        * in i_block_alloc_info, to assist find the proper goal block for next
-        * allocation
-        */
-       if (block_i) {
-               block_i->last_alloc_logical_block = block + blks - 1;
-               block_i->last_alloc_physical_block =
-                               le32_to_cpu(where[num].key) + blks - 1;
+                       *(where->p + i) = cpu_to_le32(current_block++);
         }
  
         /* We are done with atomic stuff, now do the rest of housekeeping */
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
                 goto cleanup;
  
         /*
-        * Okay, we need to do block allocation.  Lazily initialize the block
-        * allocation info here if necessary
+        * Okay, we need to do block allocation.
         */
-       if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
-               ext4_init_block_alloc_info(inode);
-
         goal = ext4_find_goal(inode, iblock, partial);
  
         /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
         BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
         mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
  
-       /* Account for allocated meta_blocks */
-       mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+       if (mdb_free) {
+               /* Account for allocated meta_blocks */
+               mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
  
-       /* update fs free blocks counter for truncate case */
-       percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+               /* update fs dirty blocks counter */
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
+               EXT4_I(inode)->i_allocated_meta_blocks = 0;
+               EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+       }
  
         /* update per-inode reservations */
         BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
         EXT4_I(inode)->i_reserved_data_blocks -= used;
  
-       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
-       EXT4_I(inode)->i_reserved_meta_blocks = mdb;
-       EXT4_I(inode)->i_allocated_meta_blocks = 0;
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  }
  
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  
-static int ext4_get_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
+int ext4_get_block(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh_result, int create)
  {
         handle_t *handle = ext4_journal_current_handle();
         int ret = 0, started = 0;
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                         BUFFER_TRACE(bh, "call get_create_access");
                         fatal = ext4_journal_get_create_access(handle, bh);
                         if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data,0,inode->i_sb->s_blocksize);
+                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
                                 set_buffer_uptodate(bh);
                         }
                         unlock_buffer(bh);
@@ -1266,7 +1241,7 @@ err:
  struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
                                ext4_lblk_t block, int create, int *err)
  {
-       struct buffer_head * bh;
+       struct buffer_head *bh;
  
         bh = ext4_getblk(handle, inode, block, create, err);
         if (!bh)
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
         return NULL;
  }
  
-static int walk_page_buffers(  handle_t *handle,
-                               struct buffer_head *head,
-                               unsigned from,
-                               unsigned to,
-                               int *partial,
-                               int (*fn)(      handle_t *handle,
-                                               struct buffer_head *bh))
+static int walk_page_buffers(handle_t *handle,
+                            struct buffer_head *head,
+                            unsigned from,
+                            unsigned to,
+                            int *partial,
+                            int (*fn)(handle_t *handle,
+                                      struct buffer_head *bh))
  {
         struct buffer_head *bh;
         unsigned block_start, block_end;
@@ -1296,9 +1271,9 @@ static int walk_page_buffers(     handle_t *handle,
         int err, ret = 0;
         struct buffer_head *next;
  
-       for (   bh = head, block_start = 0;
-               ret == 0 && (bh != head || !block_start);
-               block_start = block_end, bh = next)
+       for (bh = head, block_start = 0;
+            ret == 0 && (bh != head || !block_start);
+            block_start = block_end, bh = next)
         {
                 next = bh->b_this_page;
                 block_end = block_start + blocksize;
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata)
  {
-       struct inode *inode = mapping->host;
+       struct inode *inode = mapping->host;
         int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
         handle_t *handle;
         int retries = 0;
-       struct page *page;
+       struct page *page;
         pgoff_t index;
-       unsigned from, to;
+       unsigned from, to;
  
         index = pos >> PAGE_CACHE_SHIFT;
-       from = pos & (PAGE_CACHE_SIZE - 1);
-       to = from + len;
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
  
  retry:
-       handle = ext4_journal_start(inode, needed_blocks);
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               goto out;
+       handle = ext4_journal_start(inode, needed_blocks);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out;
         }
  
         page = __grab_cache_page(mapping, index);
@@ -1387,9 +1362,16 @@ retry:
         }
  
         if (ret) {
-               unlock_page(page);
+               unlock_page(page);
                 ext4_journal_stop(handle);
-               page_cache_release(page);
+               page_cache_release(page);
+               /*
+                * block_write_begin may have instantiated a few blocks
+                * outside i_size.  Trim these off again. Don't need
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+                       vmtruncate(inode, inode->i_size);
         }
  
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file,
         ret = ext4_jbd2_file_inode(handle, inode);
  
         if (ret == 0) {
-               /*
-                * generic_write_end() will run mark_inode_dirty() if i_size
-                * changes.  So let's piggyback the i_disksize mark_inode_dirty
-                * into that.
-                */
                 loff_t new_i_size;
  
                 new_i_size = pos + copied;
-               if (new_i_size > EXT4_I(inode)->i_disksize)
-                       EXT4_I(inode)->i_disksize = new_i_size;
+               if (new_i_size > EXT4_I(inode)->i_disksize) {
+                       ext4_update_i_disksize(inode, new_i_size);
+                       /* We need to mark inode dirty even if
+                        * new_i_size is less that inode->i_size
+                        * bu greater than i_disksize.(hint delalloc)
+                        */
+                       ext4_mark_inode_dirty(handle, inode);
+               }
+
                 ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
                 copied = ret2;
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file,
         loff_t new_i_size;
  
         new_i_size = pos + copied;
-       if (new_i_size > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = new_i_size;
+       if (new_i_size > EXT4_I(inode)->i_disksize) {
+               ext4_update_i_disksize(inode, new_i_size);
+               /* We need to mark inode dirty even if
+                * new_i_size is less that inode->i_size
+                * bu greater than i_disksize.(hint delalloc)
+                */
+               ext4_mark_inode_dirty(handle, inode);
+       }
  
         ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file,
         int ret = 0, ret2;
         int partial = 0;
         unsigned from, to;
+       loff_t new_i_size;
  
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file,
                                 to, &partial, write_end_fn);
         if (!partial)
                 SetPageUptodate(page);
-       if (pos+copied > inode->i_size)
+       new_i_size = pos + copied;
+       if (new_i_size > inode->i_size)
                 i_size_write(inode, pos+copied);
         EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-       if (inode->i_size > EXT4_I(inode)->i_disksize) {
-               EXT4_I(inode)->i_disksize = inode->i_size;
+       if (new_i_size > EXT4_I(inode)->i_disksize) {
+               ext4_update_i_disksize(inode, new_i_size);
                 ret2 = ext4_mark_inode_dirty(handle, inode);
                 if (!ret)
                         ret = ret2;
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file,
  
  static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
  {
+       int retries = 0;
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         unsigned long md_needed, mdblocks, total = 0;
  
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
          * in order to allocate nrblocks
          * worse case is one extent per block
          */
+repeat:
         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
         total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
         mdblocks = ext4_calc_metadata_amount(inode, total);
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
         md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
         total = md_needed + nrblocks;
  
-       if (ext4_has_free_blocks(sbi, total) < total) {
+       if (ext4_claim_free_blocks(sbi, total)) {
                 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+               if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+                       yield();
+                       goto repeat;
+               }
                 return -ENOSPC;
         }
-       /* reduce fs free blocks counter */
-       percpu_counter_sub(&sbi->s_freeblocks_counter, total);
-
         EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
         EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
  
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
  
         release = to_free + mdb_free;
  
-       /* update fs free blocks counter for truncate case */
-       percpu_counter_add(&sbi->s_freeblocks_counter, release);
+       /* update fs dirty blocks counter for truncate case */
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
  
         /* update per-inode reservations */
         BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -1630,6 +1625,7 @@ struct mpage_da_data {
         struct writeback_control *wbc;
         int io_done;
         long pages_written;
+       int retval;
  };
  
  /*
@@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
                 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
  }
  
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
+                                       sector_t logical, long blk_cnt)
+{
+       int nr_pages, i;
+       pgoff_t index, end;
+       struct pagevec pvec;
+       struct inode *inode = mpd->inode;
+       struct address_space *mapping = inode->i_mapping;
+
+       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end   = (logical + blk_cnt - 1) >>
+                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       while (index <= end) {
+               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       index++;
+
+                       BUG_ON(!PageLocked(page));
+                       BUG_ON(PageWriteback(page));
+                       block_invalidatepage(page, 0);
+                       ClearPageUptodate(page);
+                       unlock_page(page);
+               }
+       }
+       return;
+}
+
+static void ext4_print_free_blocks(struct inode *inode)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       printk(KERN_EMERG "Total free blocks count %lld\n",
+                       ext4_count_free_blocks(inode->i_sb));
+       printk(KERN_EMERG "Free/Dirty block details\n");
+       printk(KERN_EMERG "free_blocks=%lld\n",
+                       percpu_counter_sum(&sbi->s_freeblocks_counter));
+       printk(KERN_EMERG "dirty_blocks=%lld\n",
+                       percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+       printk(KERN_EMERG "Block reservation details\n");
+       printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+                       EXT4_I(inode)->i_reserved_data_blocks);
+       printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+                       EXT4_I(inode)->i_reserved_meta_blocks);
+       return;
+}
+
  /*
   * mpage_da_map_blocks - go through given space
   *
@@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
   * The function skips space we know is already mapped to disk blocks.
   *
   */
-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
  {
         int err = 0;
-       struct buffer_head *lbh = &mpd->lbh;
-       sector_t next = lbh->b_blocknr;
         struct buffer_head new;
+       struct buffer_head *lbh = &mpd->lbh;
+       sector_t next;
  
         /*
          * We consider only non-mapped and non-allocated blocks
          */
         if (buffer_mapped(lbh) && !buffer_delay(lbh))
-               return;
-
+               return 0;
         new.b_state = lbh->b_state;
         new.b_blocknr = 0;
         new.b_size = lbh->b_size;
-
+       next = lbh->b_blocknr;
         /*
          * If we didn't accumulate anything
          * to write simply return
          */
         if (!new.b_size)
-               return;
+               return 0;
         err = mpd->get_block(mpd->inode, next, &new, 1);
-       if (err)
-               return;
+       if (err) {
+
+               /* If get block returns with error
+                * we simply return. Later writepage
+                * will redirty the page and writepages
+                * will find the dirty page again
+                */
+               if (err == -EAGAIN)
+                       return 0;
+
+               if (err == -ENOSPC &&
+                               ext4_count_free_blocks(mpd->inode->i_sb)) {
+                       mpd->retval = err;
+                       return 0;
+               }
+
+               /*
+                * get block failure will cause us
+                * to loop in writepages. Because
+                * a_ops->writepage won't be able to
+                * make progress. The page will be redirtied
+                * by writepage and writepages will again
+                * try to write the same.
+                */
+               printk(KERN_EMERG "%s block allocation failed for inode %lu "
+                                 "at logical offset %llu with max blocks "
+                                 "%zd with error %d\n",
+                                 __func__, mpd->inode->i_ino,
+                                 (unsigned long long)next,
+                                 lbh->b_size >> mpd->inode->i_blkbits, err);
+               printk(KERN_EMERG "This should not happen.!! "
+                                       "Data will be lost\n");
+               if (err == -ENOSPC) {
+                       ext4_print_free_blocks(mpd->inode);
+               }
+               /* invlaidate all the pages */
+               ext4_da_block_invalidatepages(mpd, next,
+                               lbh->b_size >> mpd->inode->i_blkbits);
+               return err;
+       }
         BUG_ON(new.b_size == 0);
  
         if (buffer_new(&new))
@@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
         if (buffer_delay(lbh) || buffer_unwritten(lbh))
                 mpage_put_bnr_to_bhs(mpd, next, &new);
  
-       return;
+       return 0;
  }
  
  #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1899,8 +1983,8 @@ flush_it:
          * We couldn't merge the block to our extent, so we
          * need to flush current  extent and start new one
          */
-       mpage_da_map_blocks(mpd);
-       mpage_da_submit_io(mpd);
+       if (mpage_da_map_blocks(mpd) == 0)
+               mpage_da_submit_io(mpd);
         mpd->io_done = 1;
         return;
  }
@@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page,
                  * and start IO on them using writepage()
                  */
                 if (mpd->next_page != mpd->first_page) {
-                       mpage_da_map_blocks(mpd);
-                       mpage_da_submit_io(mpd);
+                       if (mpage_da_map_blocks(mpd) == 0)
+                               mpage_da_submit_io(mpd);
                         /*
                          * skip rest of the page in the page_vec
                          */
@@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page,
   */
  static int mpage_da_writepages(struct address_space *mapping,
                                struct writeback_control *wbc,
-                              get_block_t get_block)
+                              struct mpage_da_data *mpd)
  {
-       struct mpage_da_data mpd;
         long to_write;
         int ret;
  
-       if (!get_block)
+       if (!mpd->get_block)
                 return generic_writepages(mapping, wbc);
  
-       mpd.wbc = wbc;
-       mpd.inode = mapping->host;
-       mpd.lbh.b_size = 0;
-       mpd.lbh.b_state = 0;
-       mpd.lbh.b_blocknr = 0;
-       mpd.first_page = 0;
-       mpd.next_page = 0;
-       mpd.get_block = get_block;
-       mpd.io_done = 0;
-       mpd.pages_written = 0;
+       mpd->lbh.b_size = 0;
+       mpd->lbh.b_state = 0;
+       mpd->lbh.b_blocknr = 0;
+       mpd->first_page = 0;
+       mpd->next_page = 0;
+       mpd->io_done = 0;
+       mpd->pages_written = 0;
+       mpd->retval = 0;
  
         to_write = wbc->nr_to_write;
  
-       ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
+       ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
  
         /*
          * Handle last extent of pages
          */
-       if (!mpd.io_done && mpd.next_page != mpd.first_page) {
-               mpage_da_map_blocks(&mpd);
-               mpage_da_submit_io(&mpd);
+       if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+               if (mpage_da_map_blocks(mpd) == 0)
+                       mpage_da_submit_io(mpd);
         }
  
-       wbc->nr_to_write = to_write - mpd.pages_written;
+       wbc->nr_to_write = to_write - mpd->pages_written;
         return ret;
  }
  
@@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
         handle_t *handle = NULL;
  
         handle = ext4_journal_current_handle();
-       if (!handle) {
-               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                                  bh_result, 0, 0, 0);
-               BUG_ON(!ret);
-       } else {
-               ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                                  bh_result, create, 0, EXT4_DELALLOC_RSVED);
-       }
-
+       BUG_ON(!handle);
+       ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                       bh_result, create, 0, EXT4_DELALLOC_RSVED);
         if (ret > 0) {
+
                 bh_result->b_size = (ret << inode->i_blkbits);
  
+               if (ext4_should_order_data(inode)) {
+                       int retval;
+                       retval = ext4_jbd2_file_inode(handle, inode);
+                       if (retval)
+                               /*
+                                * Failed to add inode for ordered
+                                * mode. Don't update file size
+                                */
+                               return retval;
+               }
+
                 /*
                  * Update on-disk size along with block allocation
                  * we don't use 'extend_disksize' as size may change
@@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
                 if (disksize > i_size_read(inode))
                         disksize = i_size_read(inode);
                 if (disksize > EXT4_I(inode)->i_disksize) {
-                       /*
-                        * XXX: replace with spinlock if seen contended -bzzz
-                        */
-                       down_write(&EXT4_I(inode)->i_data_sem);
-                       if (disksize > EXT4_I(inode)->i_disksize)
-                               EXT4_I(inode)->i_disksize = disksize;
-                       up_write(&EXT4_I(inode)->i_data_sem);
-
-                       if (EXT4_I(inode)->i_disksize == disksize) {
-                               ret = ext4_mark_inode_dirty(handle, inode);
-                               return ret;
-                       }
+                       ext4_update_i_disksize(inode, disksize);
+                       ret = ext4_mark_inode_dirty(handle, inode);
+                       return ret;
                 }
                 ret = 0;
         }
@@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping,
  {
         handle_t *handle = NULL;
         loff_t range_start = 0;
+       struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
         int needed_blocks, ret = 0, nr_to_writebump = 0;
         long to_write, pages_skipped = 0;
@@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping,
         range_start =  wbc->range_start;
         pages_skipped = wbc->pages_skipped;
  
+       mpd.wbc = wbc;
+       mpd.inode = mapping->host;
+
  restart_loop:
         to_write = wbc->nr_to_write;
         while (!ret && to_write > 0) {
@@ -2340,23 +2422,17 @@ restart_loop:
                         dump_stack();
                         goto out_writepages;
                 }
-               if (ext4_should_order_data(inode)) {
-                       /*
-                        * With ordered mode we need to add
-                        * the inode to the journal handl
-                        * when we do block allocation.
-                        */
-                       ret = ext4_jbd2_file_inode(handle, inode);
-                       if (ret) {
-                               ext4_journal_stop(handle);
-                               goto out_writepages;
-                       }
-               }
-
                 to_write -= wbc->nr_to_write;
-               ret = mpage_da_writepages(mapping, wbc,
-                                         ext4_da_get_block_write);
+
+               mpd.get_block = ext4_da_get_block_write;
+               ret = mpage_da_writepages(mapping, wbc, &mpd);
+
                 ext4_journal_stop(handle);
+
+               if (mpd.retval == -ENOSPC)
+                       jbd2_journal_force_commit_nested(sbi->s_journal);
+
+               /* reset the retry count */
                 if (ret == MPAGE_DA_EXTENT_TAIL) {
                         /*
                          * got one extent now try with
@@ -2391,6 +2467,33 @@ out_writepages:
         return ret;
  }
  
+#define FALL_BACK_TO_NONDELALLOC 1
+static int ext4_nonda_switch(struct super_block *sb)
+{
+       s64 free_blocks, dirty_blocks;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       /*
+        * switch to non delalloc mode if we are running low
+        * on free block. The free block accounting via percpu
+        * counters can get slightly wrong with FBC_BATCH getting
+        * accumulated on each CPU without updating global counters
+        * Delalloc need an accurate free block accounting. So switch
+        * to non delalloc when we are near to error range.
+        */
+       free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+       if (2 * free_blocks < 3 * dirty_blocks ||
+               free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+               /*
+                * free block count is less that 150% of dirty blocks
+                * or free blocks is less that watermark
+                */
+               return 1;
+       }
+       return 0;
+}
+
  static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata)
@@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
  
+       if (ext4_nonda_switch(inode->i_sb)) {
+               *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
+               return ext4_write_begin(file, mapping, pos,
+                                       len, flags, pagep, fsdata);
+       }
+       *fsdata = (void *)0;
  retry:
         /*
          * With delayed allocation, we don't log the i_disksize update
@@ -2433,6 +2542,13 @@ retry:
                 unlock_page(page);
                 ext4_journal_stop(handle);
                 page_cache_release(page);
+               /*
+                * block_write_begin may have instantiated a few blocks
+                * outside i_size.  Trim these off again. Don't need
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+                       vmtruncate(inode, inode->i_size);
         }
  
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
         bh = page_buffers(page);
         idx = offset >> inode->i_blkbits;
  
-       for (i=0; i < idx; i++)
+       for (i = 0; i < idx; i++)
                 bh = bh->b_this_page;
  
         if (!buffer_mapped(bh) || (buffer_delay(bh)))
@@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file,
         handle_t *handle = ext4_journal_current_handle();
         loff_t new_i_size;
         unsigned long start, end;
+       int write_mode = (int)(unsigned long)fsdata;
+
+       if (write_mode == FALL_BACK_TO_NONDELALLOC) {
+               if (ext4_should_order_data(inode)) {
+                       return ext4_ordered_write_end(file, mapping, pos,
+                                       len, copied, page, fsdata);
+               } else if (ext4_should_writeback_data(inode)) {
+                       return ext4_writeback_write_end(file, mapping, pos,
+                                       len, copied, page, fsdata);
+               } else {
+                       BUG();
+               }
+       }
  
         start = pos & (PAGE_CACHE_SIZE - 1);
-       end = start + copied -1;
+       end = start + copied - 1;
  
         /*
          * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file,
                                 EXT4_I(inode)->i_disksize = new_i_size;
                         }
                         up_write(&EXT4_I(inode)->i_data_sem);
+                       /* We need to mark inode dirty even if
+                        * new_i_size is less that inode->i_size
+                        * bu greater than i_disksize.(hint delalloc)
+                        */
+                       ext4_mark_inode_dirty(handle, inode);
                 }
         }
         ret2 = generic_write_end(file, mapping, pos, len, copied,
@@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
                         return 0;
         }
  
-       return generic_block_bmap(mapping,block,ext4_get_block);
+       return generic_block_bmap(mapping, block, ext4_get_block);
  }
  
  static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
         if (!partial->key && *partial->p)
                 /* Writer: end */
                 goto no_top;
-       for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+       for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
                 ;
         /*
          * OK, we've found the last block that must survive. The rest of our
@@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
         }
         /* Writer: end */
  
-       while(partial > p) {
+       while (partial > p) {
                 brelse(partial->bh);
                 partial--;
         }
@@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                         /* This zaps the entire block.  Bottom up. */
                         BUFFER_TRACE(bh, "free child branches");
                         ext4_free_branches(handle, inode, bh,
-                                          (__le32*)bh->b_data,
-                                          (__le32*)bh->b_data + addr_per_block,
-                                          depth);
+                                       (__le32 *) bh->b_data,
+                                       (__le32 *) bh->b_data + addr_per_block,
+                                       depth);
  
                         /*
                          * We've probably journalled the indirect block several
@@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode)
          */
         down_write(&ei->i_data_sem);
  
-       ext4_discard_reservation(inode);
+       ext4_discard_preallocations(inode);
  
         /*
          * The orphan list entry will now protect us from any crash which
@@ -3673,41 +3807,6 @@ out_stop:
         ext4_journal_stop(handle);
  }
  
-static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
-               unsigned long ino, struct ext4_iloc *iloc)
-{
-       ext4_group_t block_group;
-       unsigned long offset;
-       ext4_fsblk_t block;
-       struct ext4_group_desc *gdp;
-
-       if (!ext4_valid_inum(sb, ino)) {
-               /*
-                * This error is already checked for in namei.c unless we are
-                * looking at an NFS filehandle, in which case no error
-                * report is needed
-                */
-               return 0;
-       }
-
-       block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-       gdp = ext4_get_group_desc(sb, block_group, NULL);
-       if (!gdp)
-               return 0;
-
-       /*
-        * Figure out the offset within the block group inode table
-        */
-       offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
-               EXT4_INODE_SIZE(sb);
-       block = ext4_inode_table(sb, gdp) +
-               (offset >> EXT4_BLOCK_SIZE_BITS(sb));
-
-       iloc->block_group = block_group;
-       iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
-       return block;
-}
-
  /*
   * ext4_get_inode_loc returns with an extra refcount against the inode's
   * underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
  static int __ext4_get_inode_loc(struct inode *inode,
                                 struct ext4_iloc *iloc, int in_mem)
  {
-       ext4_fsblk_t block;
-       struct buffer_head *bh;
+       struct ext4_group_desc  *gdp;
+       struct buffer_head      *bh;
+       struct super_block      *sb = inode->i_sb;
+       ext4_fsblk_t            block;
+       int                     inodes_per_block, inode_offset;
+
+       iloc->bh = 0;
+       if (!ext4_valid_inum(sb, inode->i_ino))
+               return -EIO;
  
-       block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
-       if (!block)
+       iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+       gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
+       if (!gdp)
                 return -EIO;
  
-       bh = sb_getblk(inode->i_sb, block);
+       /*
+        * Figure out the offset within the block group inode table
+        */
+       inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
+       inode_offset = ((inode->i_ino - 1) %
+                       EXT4_INODES_PER_GROUP(sb));
+       block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
+       iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+
+       bh = sb_getblk(sb, block);
         if (!bh) {
-               ext4_error (inode->i_sb, "ext4_get_inode_loc",
-                               "unable to read inode block - "
-                               "inode=%lu, block=%llu",
-                                inode->i_ino, block);
+               ext4_error(sb, "ext4_get_inode_loc", "unable to read "
+                          "inode block - inode=%lu, block=%llu",
+                          inode->i_ino, block);
                 return -EIO;
         }
         if (!buffer_uptodate(bh)) {
@@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
                  */
                 if (in_mem) {
                         struct buffer_head *bitmap_bh;
-                       struct ext4_group_desc *desc;
-                       int inodes_per_buffer;
-                       int inode_offset, i;
-                       ext4_group_t block_group;
-                       int start;
-
-                       block_group = (inode->i_ino - 1) /
-                                       EXT4_INODES_PER_GROUP(inode->i_sb);
-                       inodes_per_buffer = bh->b_size /
-                               EXT4_INODE_SIZE(inode->i_sb);
-                       inode_offset = ((inode->i_ino - 1) %
-                                       EXT4_INODES_PER_GROUP(inode->i_sb));
-                       start = inode_offset & ~(inodes_per_buffer - 1);
+                       int i, start;
  
-                       /* Is the inode bitmap in cache? */
-                       desc = ext4_get_group_desc(inode->i_sb,
-                                               block_group, NULL);
-                       if (!desc)
-                               goto make_io;
+                       start = inode_offset & ~(inodes_per_block - 1);
  
-                       bitmap_bh = sb_getblk(inode->i_sb,
-                               ext4_inode_bitmap(inode->i_sb, desc));
+                       /* Is the inode bitmap in cache? */
+                       bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
                         if (!bitmap_bh)
                                 goto make_io;
  
@@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
                                 brelse(bitmap_bh);
                                 goto make_io;
                         }
-                       for (i = start; i < start + inodes_per_buffer; i++) {
+                       for (i = start; i < start + inodes_per_block; i++) {
                                 if (i == inode_offset)
                                         continue;
                                 if (ext4_test_bit(i, bitmap_bh->b_data))
                                         break;
                         }
                         brelse(bitmap_bh);
-                       if (i == start + inodes_per_buffer) {
+                       if (i == start + inodes_per_block) {
                                 /* all other inodes are free, so skip I/O */
                                 memset(bh->b_data, 0, bh->b_size);
                                 set_buffer_uptodate(bh);
@@ -3808,6 +3907,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
                 }
  
  make_io:
+               /*
+                * If we need to do any I/O, try to pre-readahead extra
+                * blocks from the inode table.
+                */
+               if (EXT4_SB(sb)->s_inode_readahead_blks) {
+                       ext4_fsblk_t b, end, table;
+                       unsigned num;
+
+                       table = ext4_inode_table(sb, gdp);
+                       /* Make sure s_inode_readahead_blks is a power of 2 */
+                       while (EXT4_SB(sb)->s_inode_readahead_blks &
+                              (EXT4_SB(sb)->s_inode_readahead_blks-1))
+                               EXT4_SB(sb)->s_inode_readahead_blks = 
+                                  (EXT4_SB(sb)->s_inode_readahead_blks &
+                                   (EXT4_SB(sb)->s_inode_readahead_blks-1));
+                       b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
+                       if (table > b)
+                               b = table;
+                       end = b + EXT4_SB(sb)->s_inode_readahead_blks;
+                       num = EXT4_INODES_PER_GROUP(sb);
+                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+                               num -= le16_to_cpu(gdp->bg_itable_unused);
+                       table += num / inodes_per_block;
+                       if (end > table)
+                               end = table;
+                       while (b <= end)
+                               sb_breadahead(sb, b++);
+               }
+
                 /*
                  * There are other valid inodes in the buffer, this inode
                  * has in-inode xattrs, or we don't have this inode in memory.
@@ -3818,10 +3947,9 @@ make_io:
                 submit_bh(READ_META, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
-                       ext4_error(inode->i_sb, "ext4_get_inode_loc",
-                                       "unable to read inode block - "
-                                       "inode=%lu, block=%llu",
-                                       inode->i_ino, block);
+                       ext4_error(sb, __func__,
+                                  "unable to read inode block - inode=%lu, "
+                                  "block=%llu", inode->i_ino, block);
                         brelse(bh);
                         return -EIO;
                 }
@@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 return inode;
  
         ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         ei->i_acl = EXT4_ACL_NOT_CACHED;
         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
  #endif
-       ei->i_block_alloc_info = NULL;
  
         ret = __ext4_get_inode_loc(inode, &iloc, 0);
         if (ret < 0)
@@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
         inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
         inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
-       if(!(test_opt (inode->i_sb, NO_UID32))) {
+       if (!(test_opt(inode->i_sb, NO_UID32))) {
                 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
         }
@@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 if (inode->i_mode == 0 ||
                     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
                         /* this inode is deleted */
-                       brelse (bh);
+                       brelse(bh);
                         ret = -ESTALE;
                         goto bad_inode;
                 }
@@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                     EXT4_INODE_SIZE(inode->i_sb)) {
-                       brelse (bh);
+                       brelse(bh);
                         ret = -EIO;
                         goto bad_inode;
                 }
@@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                         init_special_inode(inode, inode->i_mode,
                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
         }
-       brelse (iloc.bh);
+       brelse(iloc.bh);
         ext4_set_inode_flags(inode);
         unlock_new_inode(inode);
         return inode;
@@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle,
  
         ext4_get_inode_flags(ei);
         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
-       if(!(test_opt(inode->i_sb, NO_UID32))) {
+       if (!(test_opt(inode->i_sb, NO_UID32))) {
                 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
                 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
  /*
   * Fix up interoperability with old kernels. Otherwise, old inodes get
   * re-used with the upper 16 bits of the uid/gid intact
   */
-               if(!ei->i_dtime) {
+               if (!ei->i_dtime) {
                         raw_inode->i_uid_high =
                                 cpu_to_le16(high_16_bits(inode->i_uid));
                         raw_inode->i_gid_high =
@@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle,
         ei->i_state &= ~EXT4_STATE_NEW;
  
  out_brelse:
-       brelse (bh);
+       brelse(bh);
         ext4_std_error(inode->i_sb, err);
         return err;
  }
@@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
         loff_t size;
         unsigned long len;
         int ret = -EINVAL;
+       void *fsdata;
         struct file *file = vma->vm_file;
         struct inode *inode = file->f_path.dentry->d_inode;
         struct address_space *mapping = inode->i_mapping;
@@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
          * on the same page though
          */
         ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
-                       len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+                       len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
         if (ret < 0)
                 goto out_unlock;
         ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
-                       len, len, page, NULL);
+                       len, len, page, fsdata);
         if (ret < 0)
                 goto out_unlock;
         ret = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index 7a6c2f1faba607e4a41b5d2d10b3c7f39655268f..ea27eaa0cfe5292ea4630644ed0ac8a81225424c 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
         struct inode *inode = filp->f_dentry->d_inode;
         struct ext4_inode_info *ei = EXT4_I(inode);
         unsigned int flags;
-       unsigned short rsv_window_size;
  
-       ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+       ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
  
         switch (cmd) {
         case EXT4_IOC_GETFLAGS:
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                 return put_user(flags, (int __user *) arg);
         case EXT4_IOC_SETFLAGS: {
                 handle_t *handle = NULL;
-               int err;
+               int err, migrate = 0;
                 struct ext4_iloc iloc;
                 unsigned int oldflags;
                 unsigned int jflag;
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                         if (!capable(CAP_SYS_RESOURCE))
                                 goto flags_out;
                 }
+               if (oldflags & EXT4_EXTENTS_FL) {
+                       /* We don't support clearning extent flags */
+                       if (!(flags & EXT4_EXTENTS_FL)) {
+                               err = -EOPNOTSUPP;
+                               goto flags_out;
+                       }
+               } else if (flags & EXT4_EXTENTS_FL) {
+                       /* migrate the file */
+                       migrate = 1;
+                       flags &= ~EXT4_EXTENTS_FL;
+               }
  
                 handle = ext4_journal_start(inode, 1);
                 if (IS_ERR(handle)) {
@@ -109,6 +119,10 @@ flags_err:
  
                 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
                         err = ext4_change_inode_journal_flag(inode, jflag);
+               if (err)
+                       goto flags_out;
+               if (migrate)
+                       err = ext4_ext_migrate(inode);
  flags_out:
                 mutex_unlock(&inode->i_mutex);
                 mnt_drop_write(filp->f_path.mnt);
@@ -175,49 +189,6 @@ setversion_out:
                         return ret;
                 }
  #endif
-       case EXT4_IOC_GETRSVSZ:
-               if (test_opt(inode->i_sb, RESERVATION)
-                       && S_ISREG(inode->i_mode)
-                       && ei->i_block_alloc_info) {
-                       rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
-                       return put_user(rsv_window_size, (int __user *)arg);
-               }
-               return -ENOTTY;
-       case EXT4_IOC_SETRSVSZ: {
-               int err;
-
-               if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
-                       return -ENOTTY;
-
-               if (!is_owner_or_cap(inode))
-                       return -EACCES;
-
-               if (get_user(rsv_window_size, (int __user *)arg))
-                       return -EFAULT;
-
-               err = mnt_want_write(filp->f_path.mnt);
-               if (err)
-                       return err;
-
-               if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
-                       rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
-
-               /*
-                * need to allocate reservation structure for this inode
-                * before set the window size
-                */
-               down_write(&ei->i_data_sem);
-               if (!ei->i_block_alloc_info)
-                       ext4_init_block_alloc_info(inode);
-
-               if (ei->i_block_alloc_info){
-                       struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
-                       rsv->rsv_goal_size = rsv_window_size;
-               }
-               up_write(&ei->i_data_sem);
-               mnt_drop_write(filp->f_path.mnt);
-               return 0;
-       }
         case EXT4_IOC_GROUP_EXTEND: {
                 ext4_fsblk_t n_blocks_count;
                 struct super_block *sb = inode->i_sb;
@@ -267,7 +238,26 @@ setversion_out:
         }
  
         case EXT4_IOC_MIGRATE:
-               return ext4_ext_migrate(inode, filp, cmd, arg);
+       {
+               int err;
+               if (!is_owner_or_cap(inode))
+                       return -EACCES;
+
+               err = mnt_want_write(filp->f_path.mnt);
+               if (err)
+                       return err;
+               /*
+                * inode_mutex prevent write and truncate on the file.
+                * Read still goes through. We take i_data_sem in
+                * ext4_ext_swap_inode_data before we switch the
+                * inode format to prevent read.
+                */
+               mutex_lock(&(inode->i_mutex));
+               err = ext4_ext_migrate(inode);
+               mutex_unlock(&(inode->i_mutex));
+               mnt_drop_write(filp->f_path.mnt);
+               return err;
+       }
  
         default:
                 return -ENOTTY;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index e0e3a5eb1ddba0575798cd5d561b23c25409cdb6..b580714f0d859c107e94a3f6c61286141fb0cdab 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
                 b2 = (unsigned char *) bitmap;
                 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
                         if (b1[i] != b2[i]) {
-                               printk("corruption in group %lu at byte %u(%u):"
-                                      " %x in copy != %x on disk/prealloc\n",
-                                       e4b->bd_group, i, i * 8, b1[i], b2[i]);
+                               printk(KERN_ERR "corruption in group %lu "
+                                      "at byte %u(%u): %x in copy != %x "
+                                      "on disk/prealloc\n",
+                                      e4b->bd_group, i, i * 8, b1[i], b2[i]);
                                 BUG();
                         }
                 }
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
         void *buddy;
         void *buddy2;
  
-       if (!test_opt(sb, MBALLOC))
-               return 0;
-
         {
                 static int mb_check_counter;
                 if (mb_check_counter++ % 100 != 0)
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                 if (bh[i] == NULL)
                         goto out;
  
-               if (bh_uptodate_or_lock(bh[i]))
+               if (buffer_uptodate(bh[i]) &&
+                   !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
                         continue;
  
+               lock_buffer(bh[i]);
                 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
                 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                         ext4_init_block_bitmap(sb, bh[i],
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb)
  {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       remove_proc_entry("mb_groups", sbi->s_mb_proc);
-       remove_proc_entry("mb_history", sbi->s_mb_proc);
-
+       if (sbi->s_proc != NULL) {
+               remove_proc_entry("mb_groups", sbi->s_proc);
+               remove_proc_entry("mb_history", sbi->s_proc);
+       }
         kfree(sbi->s_mb_history);
  }
  
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb)
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         int i;
  
-       if (sbi->s_mb_proc != NULL) {
-               proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
+       if (sbi->s_proc != NULL) {
+               proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
                                  &ext4_mb_seq_history_fops, sb);
-               proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
+               proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
                                  &ext4_mb_seq_groups_fops, sb);
         }
  
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         unsigned max;
         int ret;
  
-       if (!test_opt(sb, MBALLOC))
-               return 0;
-
         i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
  
         sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
         if (sbi->s_mb_offsets == NULL) {
-               clear_opt(sbi->s_mount_opt, MBALLOC);
                 return -ENOMEM;
         }
         sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
         if (sbi->s_mb_maxs == NULL) {
-               clear_opt(sbi->s_mount_opt, MBALLOC);
                 kfree(sbi->s_mb_maxs);
                 return -ENOMEM;
         }
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         /* init file for buddy data */
         ret = ext4_mb_init_backend(sb);
         if (ret != 0) {
-               clear_opt(sbi->s_mount_opt, MBALLOC);
                 kfree(sbi->s_mb_offsets);
                 kfree(sbi->s_mb_maxs);
                 return ret;
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
         sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
  
-       i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
-       sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
+       sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
         if (sbi->s_locality_groups == NULL) {
-               clear_opt(sbi->s_mount_opt, MBALLOC);
                 kfree(sbi->s_mb_offsets);
                 kfree(sbi->s_mb_maxs);
                 return -ENOMEM;
         }
-       for (i = 0; i < nr_cpu_ids; i++) {
+       for_each_possible_cpu(i) {
                 struct ext4_locality_group *lg;
-               lg = &sbi->s_locality_groups[i];
+               lg = per_cpu_ptr(sbi->s_locality_groups, i);
                 mutex_init(&lg->lg_mutex);
                 for (j = 0; j < PREALLOC_TB_SIZE; j++)
                         INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         ext4_mb_init_per_dev_proc(sb);
         ext4_mb_history_init(sb);
  
-       printk("EXT4-fs: mballoc enabled\n");
+       printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
         return 0;
  }
  
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
         struct ext4_group_info *grinfo;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       if (!test_opt(sb, MBALLOC))
-               return 0;
-
         /* release freed, non-committed blocks */
         spin_lock(&sbi->s_md_lock);
         list_splice_init(&sbi->s_closed_transaction,
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb)
                                 atomic_read(&sbi->s_mb_discarded));
         }
  
-       kfree(sbi->s_locality_groups);
-
+       free_percpu(sbi->s_locality_groups);
         ext4_mb_history_release(sb);
         ext4_mb_destroy_per_dev_proc(sb);
  
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
  #define EXT4_MB_STREAM_REQ             "stream_req"
  #define EXT4_MB_GROUP_PREALLOC         "group_prealloc"
  
-
-
-#define MB_PROC_FOPS(name)                                     \
-static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v)     \
-{                                                              \
-       struct ext4_sb_info *sbi = m->private;                  \
-                                                               \
-       seq_printf(m, "%ld\n", sbi->s_mb_##name);               \
-       return 0;                                               \
-}                                                              \
-                                                               \
-static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
-{                                                              \
-       return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
-}                                                              \
-                                                               \
-static ssize_t ext4_mb_##name##_proc_write(struct file *file,  \
-               const char __user *buf, size_t cnt, loff_t *ppos)       \
-{                                                              \
-       struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
-       char str[32];                                           \
-       long value;                                             \
-       if (cnt >= sizeof(str))                                 \
-               return -EINVAL;                                 \
-       if (copy_from_user(str, buf, cnt))                      \
-               return -EFAULT;                                 \
-       value = simple_strtol(str, NULL, 0);                    \
-       if (value <= 0)                                         \
-               return -ERANGE;                                 \
-       sbi->s_mb_##name = value;                               \
-       return cnt;                                             \
-}                                                              \
-                                                               \
-static const struct file_operations ext4_mb_##name##_proc_fops = {     \
-       .owner          = THIS_MODULE,                          \
-       .open           = ext4_mb_##name##_proc_open,           \
-       .read           = seq_read,                             \
-       .llseek         = seq_lseek,                            \
-       .release        = single_release,                       \
-       .write          = ext4_mb_##name##_proc_write,          \
-};
-
-MB_PROC_FOPS(stats);
-MB_PROC_FOPS(max_to_scan);
-MB_PROC_FOPS(min_to_scan);
-MB_PROC_FOPS(order2_reqs);
-MB_PROC_FOPS(stream_request);
-MB_PROC_FOPS(group_prealloc);
-
-#define        MB_PROC_HANDLER(name, var)                                      \
-do {                                                                   \
-       proc = proc_create_data(name, mode, sbi->s_mb_proc,             \
-                               &ext4_mb_##var##_proc_fops, sbi);       \
-       if (proc == NULL) {                                             \
-               printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
-               goto err_out;                                           \
-       }                                                               \
-} while (0)
-
  static int ext4_mb_init_per_dev_proc(struct super_block *sb)
  {
         mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct proc_dir_entry *proc;
-       char devname[64];
  
-       if (proc_root_ext4 == NULL) {
-               sbi->s_mb_proc = NULL;
+       if (sbi->s_proc == NULL)
                 return -EINVAL;
-       }
-       bdevname(sb->s_bdev, devname);
-       sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
-
-       MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
-       MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
-       MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
-       MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
-       MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
-       MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
  
+       EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
+       EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
+       EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
+       EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
+       EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
+       EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
         return 0;
  
  err_out:
-       printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
-       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
-       remove_proc_entry(devname, proc_root_ext4);
-       sbi->s_mb_proc = NULL;
-
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
         return -ENOMEM;
  }
  
  static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
  {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-       char devname[64];
  
-       if (sbi->s_mb_proc == NULL)
+       if (sbi->s_proc == NULL)
                 return -EINVAL;
  
-       bdevname(sb->s_bdev, devname);
-       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
-       remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
-       remove_proc_entry(devname, proc_root_ext4);
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
+       remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
  
         return 0;
  }
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void)
                 kmem_cache_destroy(ext4_pspace_cachep);
                 return -ENOMEM;
         }
-#ifdef CONFIG_PROC_FS
-       proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
-       if (proc_root_ext4 == NULL)
-               printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
-#endif
         return 0;
  }
  
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void)
         /* XXX: synchronize_rcu(); */
         kmem_cache_destroy(ext4_pspace_cachep);
         kmem_cache_destroy(ext4_ac_cachep);
-#ifdef CONFIG_PROC_FS
-       remove_proc_entry("fs/ext4", NULL);
-#endif
  }
  
  
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void)
   */
  static noinline_for_stack int
  ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
-                               handle_t *handle)
+                               handle_t *handle, unsigned long reserv_blks)
  {
         struct buffer_head *bitmap_bh = NULL;
         struct ext4_super_block *es;
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
         le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
         gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
         spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-
+       percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
         /*
-        * free blocks account has already be reduced/reserved
-        * at write_begin() time for delayed allocation
-        * do not double accounting
+        * Now reduce the dirty block count also. Should not go negative
          */
         if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
-               percpu_counter_sub(&sbi->s_freeblocks_counter,
-                                       ac->ac_b_ex.fe_len);
+               /* release all the reserved blocks if non delalloc */
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+       else
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                               ac->ac_b_ex.fe_len);
  
         if (sbi->s_log_groups_per_flex) {
                 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3884,7 +3794,7 @@ out:
   *
   * FIXME!! Make sure it is valid at all the call sites
   */
-void ext4_mb_discard_inode_preallocations(struct inode *inode)
+void ext4_discard_preallocations(struct inode *inode)
  {
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct super_block *sb = inode->i_sb;
@@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
         struct ext4_buddy e4b;
         int err;
  
-       if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) {
+       if (!S_ISREG(inode->i_mode)) {
                 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
                 return;
         }
@@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
          * per cpu locality group is to reduce the contention between block
          * request from multiple CPUs.
          */
-       ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
-       put_cpu();
+       ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
  
         /* we're going to use group allocation */
         ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
  ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                                  struct ext4_allocation_request *ar, int *errp)
  {
+       int freed;
         struct ext4_allocation_context *ac = NULL;
         struct ext4_sb_info *sbi;
         struct super_block *sb;
         ext4_fsblk_t block = 0;
-       int freed;
-       int inquota;
+       unsigned long inquota;
+       unsigned long reserv_blks = 0;
  
         sb = ar->inode->i_sb;
         sbi = EXT4_SB(sb);
  
-       if (!test_opt(sb, MBALLOC)) {
-               block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
-                                           &(ar->len), errp);
-               return block;
-       }
         if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
                 /*
                  * With delalloc we already reserved the blocks
                  */
-               ar->len = ext4_has_free_blocks(sbi, ar->len);
-       }
-
-       if (ar->len == 0) {
-               *errp = -ENOSPC;
-               return 0;
+               while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
+                       /* let others to free the space */
+                       yield();
+                       ar->len = ar->len >> 1;
+               }
+               if (!ar->len) {
+                       *errp = -ENOSPC;
+                       return 0;
+               }
+               reserv_blks = ar->len;
         }
-
         while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
                 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
                 ar->len--;
@@ -4441,7 +4349,7 @@ repeat:
         }
  
         if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-               *errp = ext4_mb_mark_diskspace_used(ac, handle);
+               *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
                 if (*errp ==  -EAGAIN) {
                         ac->ac_b_ex.fe_group = 0;
                         ac->ac_b_ex.fe_start = 0;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h

index c7c9906c2a754dec0953412b1c0843873ef7f6b9..b3b4828f8b894c3cda416e0af366f2ecc890a284 100644 (file)
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac);
  
  #define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
  
-static struct proc_dir_entry *proc_root_ext4;
  struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
  
  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c

index 46fc0b5b12bab540cf00bb1ffb7fcec1b51854fa..f2a9cf498ecda12ddde2c7fb1a73a40c64767ade 100644 (file)
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
  
  }
  
-int ext4_ext_migrate(struct inode *inode, struct file *filp,
-                               unsigned int cmd, unsigned long arg)
+int ext4_ext_migrate(struct inode *inode)
  {
         handle_t *handle;
         int retval = 0, i;
@@ -515,12 +514,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
          * trascation that created the inode. Later as and
          * when we add extents we extent the journal
          */
-       /*
-        * inode_mutex prevent write and truncate on the file. Read still goes
-        * through. We take i_data_sem in ext4_ext_swap_inode_data before we
-        * switch the inode format to prevent read.
-        */
-       mutex_lock(&(inode->i_mutex));
         /*
          * Even though we take i_mutex we can still cause block allocation
          * via mmap write to holes. If we have allocated new blocks we fail
@@ -623,7 +616,6 @@ err_out:
         tmp_inode->i_nlink = 0;
  
         ext4_journal_stop(handle);
-       mutex_unlock(&(inode->i_mutex));
  
         if (tmp_inode)
                 iput(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 387ad98350c378425beee97065cc51389903564d..92db9e94514779dce3d0cdccdcc78bc4e12ea465 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -151,34 +151,36 @@ struct dx_map_entry
  
  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
  static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
-static inline unsigned dx_get_hash (struct dx_entry *entry);
-static void dx_set_hash (struct dx_entry *entry, unsigned value);
-static unsigned dx_get_count (struct dx_entry *entries);
-static unsigned dx_get_limit (struct dx_entry *entries);
-static void dx_set_count (struct dx_entry *entries, unsigned value);
-static void dx_set_limit (struct dx_entry *entries, unsigned value);
-static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
-static unsigned dx_node_limit (struct inode *dir);
-static struct dx_frame *dx_probe(struct dentry *dentry,
+static inline unsigned dx_get_hash(struct dx_entry *entry);
+static void dx_set_hash(struct dx_entry *entry, unsigned value);
+static unsigned dx_get_count(struct dx_entry *entries);
+static unsigned dx_get_limit(struct dx_entry *entries);
+static void dx_set_count(struct dx_entry *entries, unsigned value);
+static void dx_set_limit(struct dx_entry *entries, unsigned value);
+static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
+static unsigned dx_node_limit(struct inode *dir);
+static struct dx_frame *dx_probe(const struct qstr *d_name,
                                  struct inode *dir,
                                  struct dx_hash_info *hinfo,
                                  struct dx_frame *frame,
                                  int *err);
-static void dx_release (struct dx_frame *frames);
-static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
-                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+static void dx_release(struct dx_frame *frames);
+static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
+                      struct dx_hash_info *hinfo, struct dx_map_entry map[]);
  static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
+static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
                 struct dx_map_entry *offsets, int count);
-static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
  static void dx_insert_block(struct dx_frame *frame,
                                         u32 hash, ext4_lblk_t block);
  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
                                  struct dx_frame *frame,
                                  struct dx_frame *frames,
                                  __u32 *start_hash);
-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
-                      struct ext4_dir_entry_2 **res_dir, int *err);
+static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
+               const struct qstr *d_name,
+               struct ext4_dir_entry_2 **res_dir,
+               int *err);
  static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                              struct inode *inode);
  
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
         entry->block = cpu_to_le32(value);
  }
  
-static inline unsigned dx_get_hash (struct dx_entry *entry)
+static inline unsigned dx_get_hash(struct dx_entry *entry)
  {
         return le32_to_cpu(entry->hash);
  }
  
-static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
  {
         entry->hash = cpu_to_le32(value);
  }
  
-static inline unsigned dx_get_count (struct dx_entry *entries)
+static inline unsigned dx_get_count(struct dx_entry *entries)
  {
         return le16_to_cpu(((struct dx_countlimit *) entries)->count);
  }
  
-static inline unsigned dx_get_limit (struct dx_entry *entries)
+static inline unsigned dx_get_limit(struct dx_entry *entries)
  {
         return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
  }
  
-static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+static inline void dx_set_count(struct dx_entry *entries, unsigned value)
  {
         ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
  }
  
-static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
  {
         ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
  }
  
-static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
  {
         unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
                 EXT4_DIR_REC_LEN(2) - infosize;
         return entry_space / sizeof(struct dx_entry);
  }
  
-static inline unsigned dx_node_limit (struct inode *dir)
+static inline unsigned dx_node_limit(struct inode *dir)
  {
         unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
         return entry_space / sizeof(struct dx_entry);
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir)
   * Debug
   */
  #ifdef DX_DEBUG
-static void dx_show_index (char * label, struct dx_entry *entries)
+static void dx_show_index(char * label, struct dx_entry *entries)
  {
         int i, n = dx_get_count (entries);
-       printk("%s index ", label);
+       printk(KERN_DEBUG "%s index ", label);
         for (i = 0; i < n; i++) {
-               printk("%x->%lu ", i? dx_get_hash(entries + i) :
+               printk("%x->%lu ", i ? dx_get_hash(entries + i) :
                                 0, (unsigned long)dx_get_block(entries + i));
         }
         printk("\n");
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                              struct dx_entry *entries, int levels)
  {
         unsigned blocksize = dir->i_sb->s_blocksize;
-       unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+       unsigned count = dx_get_count(entries), names = 0, space = 0, i;
         unsigned bcount = 0;
         struct buffer_head *bh;
         int err;
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                 names += stats.names;
                 space += stats.space;
                 bcount += stats.bcount;
-               brelse (bh);
+               brelse(bh);
         }
         if (bcount)
-               printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
-                       names, space/bcount,(space/bcount)*100/blocksize);
+               printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 
+                      levels ? "" : "   ", names, space/bcount,
+                      (space/bcount)*100/blocksize);
         return (struct stats) { names, space, bcount};
  }
  #endif /* DX_DEBUG */
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
   * back to userspace.
   */
  static struct dx_frame *
-dx_probe(struct dentry *dentry, struct inode *dir,
+dx_probe(const struct qstr *d_name, struct inode *dir,
          struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
  {
         unsigned count, indirect;
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
         u32 hash;
  
         frame->bh = NULL;
-       if (dentry)
-               dir = dentry->d_parent->d_inode;
         if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
                 goto fail;
         root = (struct dx_root *) bh->b_data;
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
         }
         hinfo->hash_version = root->info.hash_version;
         hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-       if (dentry)
-               ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+       if (d_name)
+               ext4fs_dirhash(d_name->name, d_name->len, hinfo);
         hash = hinfo->hash;
  
         if (root->info.unused_flags & 1) {
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                 goto fail;
         }
  
-       dxtrace (printk("Look up %x", hash));
+       dxtrace(printk("Look up %x", hash));
         while (1)
         {
                 count = dx_get_count(entries);
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
                                       0, &err)))
                         return err; /* Failure */
                 p++;
-               brelse (p->bh);
+               brelse(p->bh);
                 p->bh = bh;
                 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
         }
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
                         /* On error, skip the f_pos to the next block. */
                         dir_file->f_pos = (dir_file->f_pos |
                                         (dir->i_sb->s_blocksize - 1)) + 1;
-                       brelse (bh);
+                       brelse(bh);
                         return count;
                 }
                 ext4fs_dirhash(de->name, de->name_len, hinfo);
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
         int ret, err;
         __u32 hashval;
  
-       dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
-                      start_minor_hash));
+       dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 
+                      start_hash, start_minor_hash));
         dir = dir_file->f_path.dentry->d_inode;
         if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
                 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
         }
         hinfo.hash = start_hash;
         hinfo.minor_hash = 0;
-       frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
+       frame = dx_probe(NULL, dir, &hinfo, frames, &err);
         if (!frame)
                 return err;
  
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
                         break;
         }
         dx_release(frames);
-       dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
-                      count, *next_hash));
+       dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
+                      "next hash: %x\n", count, *next_hash));
         return count;
  errout:
         dx_release(frames);
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name,
  /*
   * Returns 0 if not found, -1 on failure, and 1 on success
   */
-static inline int search_dirblock(struct buffer_head * bh,
+static inline int search_dirblock(struct buffer_head *bh,
                                   struct inode *dir,
-                                 struct dentry *dentry,
+                                 const struct qstr *d_name,
                                   unsigned long offset,
                                   struct ext4_dir_entry_2 ** res_dir)
  {
         struct ext4_dir_entry_2 * de;
         char * dlimit;
         int de_len;
-       const char *name = dentry->d_name.name;
-       int namelen = dentry->d_name.len;
+       const char *name = d_name->name;
+       int namelen = d_name->len;
  
         de = (struct ext4_dir_entry_2 *) bh->b_data;
         dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh,
   * The returned buffer_head has ->b_count elevated.  The caller is expected
   * to brelse() it when appropriate.
   */
-static struct buffer_head * ext4_find_entry (struct dentry *dentry,
+static struct buffer_head * ext4_find_entry (struct inode *dir,
+                                       const struct qstr *d_name,
                                         struct ext4_dir_entry_2 ** res_dir)
  {
-       struct super_block * sb;
-       struct buffer_head * bh_use[NAMEI_RA_SIZE];
-       struct buffer_head * bh, *ret = NULL;
+       struct super_block *sb;
+       struct buffer_head *bh_use[NAMEI_RA_SIZE];
+       struct buffer_head *bh, *ret = NULL;
         ext4_lblk_t start, block, b;
         int ra_max = 0;         /* Number of bh's in the readahead
                                    buffer, bh_use[] */
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
         int num = 0;
         ext4_lblk_t  nblocks;
         int i, err;
-       struct inode *dir = dentry->d_parent->d_inode;
         int namelen;
  
         *res_dir = NULL;
         sb = dir->i_sb;
-       namelen = dentry->d_name.len;
+       namelen = d_name->len;
         if (namelen > EXT4_NAME_LEN)
                 return NULL;
         if (is_dx(dir)) {
-               bh = ext4_dx_find_entry(dentry, res_dir, &err);
+               bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
                 /*
                  * On success, or if the error was file not found,
                  * return.  Otherwise, fall back to doing a search the
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
                  */
                 if (bh || (err != ERR_BAD_DX_DIR))
                         return bh;
-               dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
+               dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
+                              "falling back\n"));
         }
         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
         start = EXT4_I(dir)->i_dir_start_lookup;
@@ -926,7 +928,7 @@ restart:
                         brelse(bh);
                         goto next;
                 }
-               i = search_dirblock(bh, dir, dentry,
+               i = search_dirblock(bh, dir, d_name,
                             block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
                 if (i == 1) {
                         EXT4_I(dir)->i_dir_start_lookup = block;
@@ -956,11 +958,11 @@ restart:
  cleanup_and_exit:
         /* Clean up the read-ahead blocks */
         for (; ra_ptr < ra_max; ra_ptr++)
-               brelse (bh_use[ra_ptr]);
+               brelse(bh_use[ra_ptr]);
         return ret;
  }
  
-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
                        struct ext4_dir_entry_2 **res_dir, int *err)
  {
         struct super_block * sb;
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
         struct buffer_head *bh;
         ext4_lblk_t block;
         int retval;
-       int namelen = dentry->d_name.len;
-       const u8 *name = dentry->d_name.name;
-       struct inode *dir = dentry->d_parent->d_inode;
+       int namelen = d_name->len;
+       const u8 *name = d_name->name;
  
         sb = dir->i_sb;
         /* NFS may look up ".." - look at dx_root directory block */
         if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
-               if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+               if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
                         return NULL;
         } else {
                 frame = frames;
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
                                 return bh;
                         }
                 }
-               brelse (bh);
+               brelse(bh);
                 /* Check to see if we should continue to search */
                 retval = ext4_htree_next_block(dir, hash, frame,
                                                frames, NULL);
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
  
         *err = -ENOENT;
  errout:
-       dxtrace(printk("%s not found\n", name));
+       dxtrace(printk(KERN_DEBUG "%s not found\n", name));
         dx_release (frames);
         return NULL;
  }
  
-static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
  {
-       struct inode * inode;
-       struct ext4_dir_entry_2 * de;
-       struct buffer_head * bh;
+       struct inode *inode;
+       struct ext4_dir_entry_2 *de;
+       struct buffer_head *bh;
  
         if (dentry->d_name.len > EXT4_NAME_LEN)
                 return ERR_PTR(-ENAMETOOLONG);
  
-       bh = ext4_find_entry(dentry, &de);
+       bh = ext4_find_entry(dir, &dentry->d_name, &de);
         inode = NULL;
         if (bh) {
                 unsigned long ino = le32_to_cpu(de->inode);
-               brelse (bh);
+               brelse(bh);
                 if (!ext4_valid_inum(dir->i_sb, ino)) {
                         ext4_error(dir->i_sb, "ext4_lookup",
                                    "bad inode number: %lu", ino);
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child)
         unsigned long ino;
         struct dentry *parent;
         struct inode *inode;
-       struct dentry dotdot;
+       static const struct qstr dotdot = {
+               .name = "..",
+               .len = 2,
+       };
         struct ext4_dir_entry_2 * de;
         struct buffer_head *bh;
  
-       dotdot.d_name.name = "..";
-       dotdot.d_name.len = 2;
-       dotdot.d_parent = child; /* confusing, isn't it! */
-
-       bh = ext4_find_entry(&dotdot, &de);
+       bh = ext4_find_entry(child->d_inode, &dotdot, &de);
         inode = NULL;
         if (!bh)
                 return ERR_PTR(-ENOENT);
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
  
         /* create map in the end of data2 block */
         map = (struct dx_map_entry *) (data2 + blocksize);
-       count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
+       count = dx_make_map((struct ext4_dir_entry_2 *) data1,
                              blocksize, hinfo, map);
         map -= count;
-       dx_sort_map (map, count);
+       dx_sort_map(map, count);
         /* Split the existing block in the middle, size-wise */
         size = 0;
         move = 0;
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
  
         /* Fancy dance to stay within two buffers */
         de2 = dx_move_dirents(data1, data2, map + split, count - split);
-       de = dx_pack_dirents(data1,blocksize);
+       de = dx_pack_dirents(data1, blocksize);
         de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
         de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
         dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                 swap(*bh, bh2);
                 de = de2;
         }
-       dx_insert_block (frame, hash2 + continued, newblock);
-       err = ext4_journal_dirty_metadata (handle, bh2);
+       dx_insert_block(frame, hash2 + continued, newblock);
+       err = ext4_journal_dirty_metadata(handle, bh2);
         if (err)
                 goto journal_error;
-       err = ext4_journal_dirty_metadata (handle, frame->bh);
+       err = ext4_journal_dirty_metadata(handle, frame->bh);
         if (err)
                 goto journal_error;
-       brelse (bh2);
-       dxtrace(dx_show_index ("frame", frame->entries));
+       brelse(bh2);
+       dxtrace(dx_show_index("frame", frame->entries));
         return de;
  
  journal_error:
@@ -1271,7 +1271,7 @@ errout:
   */
  static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                              struct inode *inode, struct ext4_dir_entry_2 *de,
-                            struct buffer_head * bh)
+                            struct buffer_head *bh)
  {
         struct inode    *dir = dentry->d_parent->d_inode;
         const char      *name = dentry->d_name.name;
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                 while ((char *) de <= top) {
                         if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
                                                   bh, offset)) {
-                               brelse (bh);
+                               brelse(bh);
                                 return -EIO;
                         }
-                       if (ext4_match (namelen, name, de)) {
-                               brelse (bh);
+                       if (ext4_match(namelen, name, de)) {
+                               brelse(bh);
                                 return -EEXIST;
                         }
                         nlen = EXT4_DIR_REC_LEN(de->name_len);
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
         } else
                 de->inode = 0;
         de->name_len = namelen;
-       memcpy (de->name, name, namelen);
+       memcpy(de->name, name, namelen);
         /*
          * XXX shouldn't update any times until successful
          * completion of syscall, but too many callers depend
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
         struct fake_dirent *fde;
  
         blocksize =  dir->i_sb->s_blocksize;
-       dxtrace(printk("Creating index\n"));
+       dxtrace(printk(KERN_DEBUG "Creating index\n"));
         retval = ext4_journal_get_write_access(handle, bh);
         if (retval) {
                 ext4_std_error(dir->i_sb, retval);
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
         }
         root = (struct dx_root *) bh->b_data;
  
-       bh2 = ext4_append (handle, dir, &block, &retval);
+       bh2 = ext4_append(handle, dir, &block, &retval);
         if (!(bh2)) {
                 brelse(bh);
                 return retval;
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
         root->info.info_length = sizeof(root->info);
         root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
         entries = root->entries;
-       dx_set_block (entries, 1);
-       dx_set_count (entries, 1);
-       dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
+       dx_set_block(entries, 1);
+       dx_set_count(entries, 1);
+       dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
  
         /* Initialize as for dx_probe */
         hinfo.hash_version = root->info.hash_version;
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
   * may not sleep between calling this and putting something into
   * the entry, as someone else might have used it while you slept.
   */
-static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
-       struct inode *inode)
+static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+                         struct inode *inode)
  {
         struct inode *dir = dentry->d_parent->d_inode;
         unsigned long offset;
-       struct buffer_head * bh;
+       struct buffer_head *bh;
         struct ext4_dir_entry_2 *de;
-       struct super_block * sb;
+       struct super_block *sb;
         int     retval;
         int     dx_fallback=0;
         unsigned blocksize;
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
         struct dx_frame frames[2], *frame;
         struct dx_entry *entries, *at;
         struct dx_hash_info hinfo;
-       struct buffer_head * bh;
+       struct buffer_head *bh;
         struct inode *dir = dentry->d_parent->d_inode;
-       struct super_block * sb = dir->i_sb;
+       struct super_block *sb = dir->i_sb;
         struct ext4_dir_entry_2 *de;
         int err;
  
-       frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+       frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
         if (!frame)
                 return err;
         entries = frame->entries;
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
         }
  
         /* Block full, should compress but for now just split */
-       dxtrace(printk("using %u of %u node entries\n",
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
                        dx_get_count(entries), dx_get_limit(entries)));
         /* Need to split index? */
         if (dx_get_count(entries) == dx_get_limit(entries)) {
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                 if (levels) {
                         unsigned icount1 = icount/2, icount2 = icount - icount1;
                         unsigned hash2 = dx_get_hash(entries + icount1);
-                       dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+                       dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
+                                      icount1, icount2));
  
                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
                         err = ext4_journal_get_write_access(handle,
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                         if (err)
                                 goto journal_error;
  
-                       memcpy ((char *) entries2, (char *) (entries + icount1),
-                               icount2 * sizeof(struct dx_entry));
-                       dx_set_count (entries, icount1);
-                       dx_set_count (entries2, icount2);
-                       dx_set_limit (entries2, dx_node_limit(dir));
+                       memcpy((char *) entries2, (char *) (entries + icount1),
+                              icount2 * sizeof(struct dx_entry));
+                       dx_set_count(entries, icount1);
+                       dx_set_count(entries2, icount2);
+                       dx_set_limit(entries2, dx_node_limit(dir));
  
                         /* Which index block gets the new entry? */
                         if (at - entries >= icount1) {
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                                 frame->entries = entries = entries2;
                                 swap(frame->bh, bh2);
                         }
-                       dx_insert_block (frames + 0, hash2, newblock);
-                       dxtrace(dx_show_index ("node", frames[1].entries));
-                       dxtrace(dx_show_index ("node",
+                       dx_insert_block(frames + 0, hash2, newblock);
+                       dxtrace(dx_show_index("node", frames[1].entries));
+                       dxtrace(dx_show_index("node",
                                ((struct dx_node *) bh2->b_data)->entries));
                         err = ext4_journal_dirty_metadata(handle, bh2);
                         if (err)
                                 goto journal_error;
                         brelse (bh2);
                 } else {
-                       dxtrace(printk("Creating second level index...\n"));
+                       dxtrace(printk(KERN_DEBUG
+                                      "Creating second level index...\n"));
                         memcpy((char *) entries2, (char *) entries,
                                icount * sizeof(struct dx_entry));
                         dx_set_limit(entries2, dx_node_limit(dir));
@@ -1630,12 +1632,12 @@ cleanup:
   * ext4_delete_entry deletes a directory entry by merging it with the
   * previous entry
   */
-static int ext4_delete_entry (handle_t *handle,
-                             struct inode * dir,
-                             struct ext4_dir_entry_2 * de_del,
-                             struct buffer_head * bh)
+static int ext4_delete_entry(handle_t *handle,
+                            struct inode *dir,
+                            struct ext4_dir_entry_2 *de_del,
+                            struct buffer_head *bh)
  {
-       struct ext4_dir_entry_2 * de, * pde;
+       struct ext4_dir_entry_2 *de, *pde;
         int i;
  
         i = 0;
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle,
   * If the create succeeds, we fill in the inode information
   * with d_instantiate().
   */
-static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
-               struct nameidata *nd)
+static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
+                      struct nameidata *nd)
  {
         handle_t *handle;
-       struct inode * inode;
+       struct inode *inode;
         int err, retries = 0;
  
  retry:
@@ -1747,8 +1749,8 @@ retry:
         return err;
  }
  
-static int ext4_mknod (struct inode * dir, struct dentry *dentry,
-                       int mode, dev_t rdev)
+static int ext4_mknod(struct inode *dir, struct dentry *dentry,
+                     int mode, dev_t rdev)
  {
         handle_t *handle;
         struct inode *inode;
@@ -1767,11 +1769,11 @@ retry:
         if (IS_DIRSYNC(dir))
                 handle->h_sync = 1;
  
-       inode = ext4_new_inode (handle, dir, mode);
+       inode = ext4_new_inode(handle, dir, mode);
         err = PTR_ERR(inode);
         if (!IS_ERR(inode)) {
                 init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
                 inode->i_op = &ext4_special_inode_operations;
  #endif
                 err = ext4_add_nondir(handle, dentry, inode);
@@ -1782,12 +1784,12 @@ retry:
         return err;
  }
  
-static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
  {
         handle_t *handle;
-       struct inode * inode;
-       struct buffer_head * dir_block;
-       struct ext4_dir_entry_2 * de;
+       struct inode *inode;
+       struct buffer_head *dir_block;
+       struct ext4_dir_entry_2 *de;
         int err, retries = 0;
  
         if (EXT4_DIR_LINK_MAX(dir))
@@ -1803,7 +1805,7 @@ retry:
         if (IS_DIRSYNC(dir))
                 handle->h_sync = 1;
  
-       inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
+       inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
         err = PTR_ERR(inode);
         if (IS_ERR(inode))
                 goto out_stop;
@@ -1811,7 +1813,7 @@ retry:
         inode->i_op = &ext4_dir_inode_operations;
         inode->i_fop = &ext4_dir_operations;
         inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-       dir_block = ext4_bread (handle, inode, 0, 1, &err);
+       dir_block = ext4_bread(handle, inode, 0, 1, &err);
         if (!dir_block)
                 goto out_clear_inode;
         BUFFER_TRACE(dir_block, "get_write_access");
@@ -1820,26 +1822,26 @@ retry:
         de->inode = cpu_to_le32(inode->i_ino);
         de->name_len = 1;
         de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
-       strcpy (de->name, ".");
+       strcpy(de->name, ".");
         ext4_set_de_type(dir->i_sb, de, S_IFDIR);
         de = ext4_next_entry(de);
         de->inode = cpu_to_le32(dir->i_ino);
         de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
                                                 EXT4_DIR_REC_LEN(1));
         de->name_len = 2;
-       strcpy (de->name, "..");
+       strcpy(de->name, "..");
         ext4_set_de_type(dir->i_sb, de, S_IFDIR);
         inode->i_nlink = 2;
         BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
         ext4_journal_dirty_metadata(handle, dir_block);
-       brelse (dir_block);
+       brelse(dir_block);
         ext4_mark_inode_dirty(handle, inode);
-       err = ext4_add_entry (handle, dentry, inode);
+       err = ext4_add_entry(handle, dentry, inode);
         if (err) {
  out_clear_inode:
                 clear_nlink(inode);
                 ext4_mark_inode_dirty(handle, inode);
-               iput (inode);
+               iput(inode);
                 goto out_stop;
         }
         ext4_inc_count(handle, dir);
@@ -1856,17 +1858,17 @@ out_stop:
  /*
   * routine to check that the specified directory is empty (for rmdir)
   */
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *inode)
  {
         unsigned long offset;
-       struct buffer_head * bh;
-       struct ext4_dir_entry_2 * de, * de1;
-       struct super_block * sb;
+       struct buffer_head *bh;
+       struct ext4_dir_entry_2 *de, *de1;
+       struct super_block *sb;
         int err = 0;
  
         sb = inode->i_sb;
         if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
-           !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
+           !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
                 if (err)
                         ext4_error(inode->i_sb, __func__,
                                    "error %d reading directory #%lu offset 0",
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode)
         de1 = ext4_next_entry(de);
         if (le32_to_cpu(de->inode) != inode->i_ino ||
                         !le32_to_cpu(de1->inode) ||
-                       strcmp (".", de->name) ||
-                       strcmp ("..", de1->name)) {
-               ext4_warning (inode->i_sb, "empty_dir",
-                             "bad directory (dir #%lu) - no `.' or `..'",
-                             inode->i_ino);
-               brelse (bh);
+                       strcmp(".", de->name) ||
+                       strcmp("..", de1->name)) {
+               ext4_warning(inode->i_sb, "empty_dir",
+                            "bad directory (dir #%lu) - no `.' or `..'",
+                            inode->i_ino);
+               brelse(bh);
                 return 1;
         }
         offset = ext4_rec_len_from_disk(de->rec_len) +
                  ext4_rec_len_from_disk(de1->rec_len);
         de = ext4_next_entry(de1);
-       while (offset < inode->i_size ) {
+       while (offset < inode->i_size) {
                 if (!bh ||
                         (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
                         err = 0;
-                       brelse (bh);
-                       bh = ext4_bread (NULL, inode,
+                       brelse(bh);
+                       bh = ext4_bread(NULL, inode,
                                 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
                         if (!bh) {
                                 if (err)
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode)
                         continue;
                 }
                 if (le32_to_cpu(de->inode)) {
-                       brelse (bh);
+                       brelse(bh);
                         return 0;
                 }
                 offset += ext4_rec_len_from_disk(de->rec_len);
                 de = ext4_next_entry(de);
         }
-       brelse (bh);
+       brelse(bh);
         return 1;
  }
  
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
          * ->i_nlink. For, say it, character device. Not a regular file,
          * not a directory, not a symlink and ->i_nlink > 0.
          */
-       J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-               S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+       J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+                 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
  
         BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
         err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
@@ -2069,12 +2071,12 @@ out_brelse:
         goto out_err;
  }
  
-static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
+static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
  {
         int retval;
-       struct inode * inode;
-       struct buffer_head * bh;
-       struct ext4_dir_entry_2 * de;
+       struct inode *inode;
+       struct buffer_head *bh;
+       struct ext4_dir_entry_2 *de;
         handle_t *handle;
  
         /* Initialize quotas before so that eventual writes go in
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
                 return PTR_ERR(handle);
  
         retval = -ENOENT;
-       bh = ext4_find_entry (dentry, &de);
+       bh = ext4_find_entry(dir, &dentry->d_name, &de);
         if (!bh)
                 goto end_rmdir;
  
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
                 goto end_rmdir;
  
         retval = -ENOTEMPTY;
-       if (!empty_dir (inode))
+       if (!empty_dir(inode))
                 goto end_rmdir;
  
         retval = ext4_delete_entry(handle, dir, de, bh);
         if (retval)
                 goto end_rmdir;
         if (!EXT4_DIR_LINK_EMPTY(inode))
-               ext4_warning (inode->i_sb, "ext4_rmdir",
-                             "empty directory has too many links (%d)",
-                             inode->i_nlink);
+               ext4_warning(inode->i_sb, "ext4_rmdir",
+                            "empty directory has too many links (%d)",
+                            inode->i_nlink);
         inode->i_version++;
         clear_nlink(inode);
         /* There's no need to set i_disksize: the fact that i_nlink is
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
  
  end_rmdir:
         ext4_journal_stop(handle);
-       brelse (bh);
+       brelse(bh);
         return retval;
  }
  
-static int ext4_unlink(struct inode * dir, struct dentry *dentry)
+static int ext4_unlink(struct inode *dir, struct dentry *dentry)
  {
         int retval;
-       struct inode * inode;
-       struct buffer_head * bh;
-       struct ext4_dir_entry_2 * de;
+       struct inode *inode;
+       struct buffer_head *bh;
+       struct ext4_dir_entry_2 *de;
         handle_t *handle;
  
         /* Initialize quotas before so that eventual writes go
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
                 handle->h_sync = 1;
  
         retval = -ENOENT;
-       bh = ext4_find_entry (dentry, &de);
+       bh = ext4_find_entry(dir, &dentry->d_name, &de);
         if (!bh)
                 goto end_unlink;
  
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
                 goto end_unlink;
  
         if (!inode->i_nlink) {
-               ext4_warning (inode->i_sb, "ext4_unlink",
-                             "Deleting nonexistent file (%lu), %d",
-                             inode->i_ino, inode->i_nlink);
+               ext4_warning(inode->i_sb, "ext4_unlink",
+                            "Deleting nonexistent file (%lu), %d",
+                            inode->i_ino, inode->i_nlink);
                 inode->i_nlink = 1;
         }
         retval = ext4_delete_entry(handle, dir, de, bh);
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
  
  end_unlink:
         ext4_journal_stop(handle);
-       brelse (bh);
+       brelse(bh);
         return retval;
  }
  
-static int ext4_symlink (struct inode * dir,
-               struct dentry *dentry, const char * symname)
+static int ext4_symlink(struct inode *dir,
+                       struct dentry *dentry, const char *symname)
  {
         handle_t *handle;
-       struct inode * inode;
+       struct inode *inode;
         int l, err, retries = 0;
  
         l = strlen(symname)+1;
@@ -2203,12 +2205,12 @@ retry:
         if (IS_DIRSYNC(dir))
                 handle->h_sync = 1;
  
-       inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+       inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
         err = PTR_ERR(inode);
         if (IS_ERR(inode))
                 goto out_stop;
  
-       if (l > sizeof (EXT4_I(inode)->i_data)) {
+       if (l > sizeof(EXT4_I(inode)->i_data)) {
                 inode->i_op = &ext4_symlink_inode_operations;
                 ext4_set_aops(inode);
                 /*
@@ -2221,14 +2223,14 @@ retry:
                 if (err) {
                         clear_nlink(inode);
                         ext4_mark_inode_dirty(handle, inode);
-                       iput (inode);
+                       iput(inode);
                         goto out_stop;
                 }
         } else {
                 /* clear the extent format for fast symlink */
                 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
                 inode->i_op = &ext4_fast_symlink_inode_operations;
-               memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
+               memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
                 inode->i_size = l-1;
         }
         EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2240,8 +2242,8 @@ out_stop:
         return err;
  }
  
-static int ext4_link (struct dentry * old_dentry,
-               struct inode * dir, struct dentry *dentry)
+static int ext4_link(struct dentry *old_dentry,
+                    struct inode *dir, struct dentry *dentry)
  {
         handle_t *handle;
         struct inode *inode = old_dentry->d_inode;
@@ -2284,13 +2286,13 @@ retry:
   * Anybody can rename anything with this: the permission checks are left to the
   * higher-level routines.
   */
-static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
-                          struct inode * new_dir,struct dentry *new_dentry)
+static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+                      struct inode *new_dir, struct dentry *new_dentry)
  {
         handle_t *handle;
-       struct inode * old_inode, * new_inode;
-       struct buffer_head * old_bh, * new_bh, * dir_bh;
-       struct ext4_dir_entry_2 * old_de, * new_de;
+       struct inode *old_inode, *new_inode;
+       struct buffer_head *old_bh, *new_bh, *dir_bh;
+       struct ext4_dir_entry_2 *old_de, *new_de;
         int retval;
  
         old_bh = new_bh = dir_bh = NULL;
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
         if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
                 handle->h_sync = 1;
  
-       old_bh = ext4_find_entry (old_dentry, &old_de);
+       old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
         /*
          *  Check for inode number is _not_ due to possible IO errors.
          *  We might rmdir the source, keep it as pwd of some process
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
                 goto end_rename;
  
         new_inode = new_dentry->d_inode;
-       new_bh = ext4_find_entry (new_dentry, &new_de);
+       new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
         if (new_bh) {
                 if (!new_inode) {
-                       brelse (new_bh);
+                       brelse(new_bh);
                         new_bh = NULL;
                 }
         }
         if (S_ISDIR(old_inode->i_mode)) {
                 if (new_inode) {
                         retval = -ENOTEMPTY;
-                       if (!empty_dir (new_inode))
+                       if (!empty_dir(new_inode))
                                 goto end_rename;
                 }
                 retval = -EIO;
-               dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
+               dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
                 if (!dir_bh)
                         goto end_rename;
                 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
                         goto end_rename;
                 retval = -EMLINK;
-               if (!new_inode && new_dir!=old_dir &&
+               if (!new_inode && new_dir != old_dir &&
                                 new_dir->i_nlink >= EXT4_LINK_MAX)
                         goto end_rename;
         }
         if (!new_bh) {
-               retval = ext4_add_entry (handle, new_dentry, old_inode);
+               retval = ext4_add_entry(handle, new_dentry, old_inode);
                 if (retval)
                         goto end_rename;
         } else {
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
                 struct buffer_head *old_bh2;
                 struct ext4_dir_entry_2 *old_de2;
  
-               old_bh2 = ext4_find_entry(old_dentry, &old_de2);
+               old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
                 if (old_bh2) {
                         retval = ext4_delete_entry(handle, old_dir,
                                                    old_de2, old_bh2);
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
         retval = 0;
  
  end_rename:
-       brelse (dir_bh);
-       brelse (old_bh);
-       brelse (new_bh);
+       brelse(dir_bh);
+       brelse(old_bh);
+       brelse(new_bh);
         ext4_journal_stop(handle);
         return retval;
  }
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = {
         .mknod          = ext4_mknod,
         .rename         = ext4_rename,
         .setattr        = ext4_setattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = ext4_listxattr,
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = {
  
  const struct inode_operations ext4_special_inode_operations = {
         .setattr        = ext4_setattr,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = ext4_listxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index b3d35604ea1890f3b33567658ba9827f4d2c3a98..b6ec1843a015d2f1d243d74a0818a5d31f06d536 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                        "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
                        gdb_num);
  
-        /*
-         * If we are not using the primary superblock/GDT copy don't resize,
+       /*
+        * If we are not using the primary superblock/GDT copy don't resize,
           * because the user tools have no way of handling this.  Probably a
           * bad time to do it anyways.
           */
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
          * We can allocate memory for mb_alloc based on the new group
          * descriptor
          */
-       if (test_opt(sb, MBALLOC)) {
-               err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
-               if (err)
-                       goto exit_journal;
-       }
+       err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
+       if (err)
+               goto exit_journal;
+
         /*
          * Make the new blocks and inodes valid next.  We do this before
          * increasing the group count so that once the group is enabled,
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         percpu_counter_add(&sbi->s_freeinodes_counter,
                            EXT4_INODES_PER_GROUP(sb));
  
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+               ext4_group_t flex_group;
+               flex_group = ext4_flex_group(sbi, input->group);
+               sbi->s_flex_groups[flex_group].free_blocks +=
+                       input->free_blocks_count;
+               sbi->s_flex_groups[flex_group].free_inodes +=
+                       EXT4_INODES_PER_GROUP(sb);
+       }
+
         ext4_journal_dirty_metadata(handle, sbi->s_sbh);
         sb->s_dirt = 1;
  
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         ext4_group_t o_groups_count;
         ext4_grpblk_t last;
         ext4_grpblk_t add;
-       struct buffer_head * bh;
+       struct buffer_head *bh;
         handle_t *handle;
         int err;
         unsigned long freed_blocks;
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         /*
          * Mark mballoc pages as not up to date so that they will be updated
          * next time they are loaded by ext4_mb_load_buddy.
+        *
+        * XXX Bad, Bad, BAD!!!  We should not be overloading the
+        * Uptodate flag, particularly on thte bitmap bh, as way of
+        * hinting to ext4_mb_load_buddy() that it needs to be
+        * overloaded.  A user could take a LVM snapshot, then do an
+        * on-line fsck, and clear the uptodate flag, and this would
+        * not be a bug in userspace, but a bug in the kernel.  FIXME!!!
          */
-       if (test_opt(sb, MBALLOC)) {
+       {
                 struct ext4_sb_info *sbi = EXT4_SB(sb);
                 struct inode *inode = sbi->s_buddy_cache;
                 int blocks_per_page;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 566344b926b772bb668ea62e15b3ddc641389a96..0e661c569660b4cb331a8c11c2cd45852ddb78bd 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -34,6 +34,8 @@
  #include <linux/namei.h>
  #include <linux/quotaops.h>
  #include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/marker.h>
  #include <linux/log2.h>
  #include <linux/crc16.h>
  #include <asm/uaccess.h>
@@ -45,6 +47,8 @@
  #include "namei.h"
  #include "group.h"
  
+struct proc_dir_entry *ext4_proc_root;
+
  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                              unsigned long journal_devnum);
  static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
@@ -508,10 +512,12 @@ static void ext4_put_super(struct super_block *sb)
         if (!(sb->s_flags & MS_RDONLY)) {
                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                 es->s_state = cpu_to_le16(sbi->s_mount_state);
-               BUFFER_TRACE(sbi->s_sbh, "marking dirty");
-               mark_buffer_dirty(sbi->s_sbh);
                 ext4_commit_super(sb, es, 1);
         }
+       if (sbi->s_proc) {
+               remove_proc_entry("inode_readahead_blks", sbi->s_proc);
+               remove_proc_entry(sb->s_id, ext4_proc_root);
+       }
  
         for (i = 0; i < sbi->s_gdb_count; i++)
                 brelse(sbi->s_group_desc[i]);
@@ -520,6 +526,7 @@ static void ext4_put_super(struct super_block *sb)
         percpu_counter_destroy(&sbi->s_freeblocks_counter);
         percpu_counter_destroy(&sbi->s_freeinodes_counter);
         percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
         brelse(sbi->s_sbh);
  #ifdef CONFIG_QUOTA
         for (i = 0; i < MAXQUOTAS; i++)
@@ -562,11 +569,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
         if (!ei)
                 return NULL;
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         ei->i_acl = EXT4_ACL_NOT_CACHED;
         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
  #endif
-       ei->i_block_alloc_info = NULL;
         ei->vfs_inode.i_version = 1;
         ei->vfs_inode.i_data.writeback_index = 0;
         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -599,7 +605,7 @@ static void init_once(void *foo)
         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
  
         INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         init_rwsem(&ei->xattr_sem);
  #endif
         init_rwsem(&ei->i_data_sem);
@@ -625,8 +631,7 @@ static void destroy_inodecache(void)
  
  static void ext4_clear_inode(struct inode *inode)
  {
-       struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         if (EXT4_I(inode)->i_acl &&
                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
                 posix_acl_release(EXT4_I(inode)->i_acl);
@@ -638,10 +643,7 @@ static void ext4_clear_inode(struct inode *inode)
                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
         }
  #endif
-       ext4_discard_reservation(inode);
-       EXT4_I(inode)->i_block_alloc_info = NULL;
-       if (unlikely(rsv))
-               kfree(rsv);
+       ext4_discard_preallocations(inode);
         jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
                                        &EXT4_I(inode)->jinode);
  }
@@ -654,7 +656,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
  
         if (sbi->s_jquota_fmt)
                 seq_printf(seq, ",jqfmt=%s",
-               (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+               (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
  
         if (sbi->s_qf_names[USRQUOTA])
                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -718,7 +720,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",debug");
         if (test_opt(sb, OLDALLOC))
                 seq_puts(seq, ",oldalloc");
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         if (test_opt(sb, XATTR_USER) &&
                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
                 seq_puts(seq, ",user_xattr");
@@ -727,7 +729,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",nouser_xattr");
         }
  #endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
                 seq_puts(seq, ",acl");
         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
@@ -752,8 +754,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",nobh");
         if (!test_opt(sb, EXTENTS))
                 seq_puts(seq, ",noextents");
-       if (!test_opt(sb, MBALLOC))
-               seq_puts(seq, ",nomballoc");
         if (test_opt(sb, I_VERSION))
                 seq_puts(seq, ",i_version");
         if (!test_opt(sb, DELALLOC))
@@ -773,6 +773,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
                 seq_puts(seq, ",data=writeback");
  
+       if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
+               seq_printf(seq, ",inode_readahead_blks=%u",
+                          sbi->s_inode_readahead_blks);
+
         ext4_show_quota_options(seq, sb);
         return 0;
  }
@@ -822,7 +826,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
  }
  
  #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
+#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
  #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
  
  static int ext4_dquot_initialize(struct inode *inode, int type);
@@ -907,6 +911,7 @@ enum {
         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
         Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
         Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_inode_readahead_blks
  };
  
  static match_table_t tokens = {
@@ -967,6 +972,7 @@ static match_table_t tokens = {
         {Opt_resize, "resize"},
         {Opt_delalloc, "delalloc"},
         {Opt_nodelalloc, "nodelalloc"},
+       {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
         {Opt_err, NULL},
  };
  
@@ -981,7 +987,7 @@ static ext4_fsblk_t get_sb_block(void **data)
         /*todo: use simple_strtoll with >32bit ext4 */
         sb_block = simple_strtoul(options, &options, 0);
         if (*options && *options != ',') {
-               printk("EXT4-fs: Invalid sb specification: %s\n",
+               printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
                        (char *) *data);
                 return 1;
         }
@@ -1072,7 +1078,7 @@ static int parse_options(char *options, struct super_block *sb,
                 case Opt_orlov:
                         clear_opt(sbi->s_mount_opt, OLDALLOC);
                         break;
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
                 case Opt_user_xattr:
                         set_opt(sbi->s_mount_opt, XATTR_USER);
                         break;
@@ -1082,10 +1088,11 @@ static int parse_options(char *options, struct super_block *sb,
  #else
                 case Opt_user_xattr:
                 case Opt_nouser_xattr:
-                       printk("EXT4 (no)user_xattr options not supported\n");
+                       printk(KERN_ERR "EXT4 (no)user_xattr options "
+                              "not supported\n");
                         break;
  #endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
                 case Opt_acl:
                         set_opt(sbi->s_mount_opt, POSIX_ACL);
                         break;
@@ -1095,7 +1102,8 @@ static int parse_options(char *options, struct super_block *sb,
  #else
                 case Opt_acl:
                 case Opt_noacl:
-                       printk("EXT4 (no)acl options not supported\n");
+                       printk(KERN_ERR "EXT4 (no)acl options "
+                              "not supported\n");
                         break;
  #endif
                 case Opt_reservation:
@@ -1189,8 +1197,8 @@ set_qf_name:
                              sb_any_quota_suspended(sb)) &&
                             !sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR
-                                       "EXT4-fs: Cannot change journaled "
-                                       "quota options when quota turned on.\n");
+                                      "EXT4-fs: Cannot change journaled "
+                                      "quota options when quota turned on.\n");
                                 return 0;
                         }
                         qname = match_strdup(&args[0]);
@@ -1357,12 +1365,6 @@ set_qf_format:
                 case Opt_nodelalloc:
                         clear_opt(sbi->s_mount_opt, DELALLOC);
                         break;
-               case Opt_mballoc:
-                       set_opt(sbi->s_mount_opt, MBALLOC);
-                       break;
-               case Opt_nomballoc:
-                       clear_opt(sbi->s_mount_opt, MBALLOC);
-                       break;
                 case Opt_stripe:
                         if (match_int(&args[0], &option))
                                 return 0;
@@ -1373,6 +1375,13 @@ set_qf_format:
                 case Opt_delalloc:
                         set_opt(sbi->s_mount_opt, DELALLOC);
                         break;
+               case Opt_inode_readahead_blks:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0 || option > (1 << 30))
+                               return 0;
+                       sbi->s_inode_readahead_blks = option;
+                       break;
                 default:
                         printk(KERN_ERR
                                "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1473,15 +1482,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                         EXT4_INODES_PER_GROUP(sb),
                         sbi->s_mount_opt);
  
-       printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
-       if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
-               char b[BDEVNAME_SIZE];
-
-               printk("external journal on %s\n",
-                       bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
-       } else {
-               printk("internal journal\n");
-       }
+       printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+              sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+              "external", EXT4_SB(sb)->s_journal->j_devname);
         return res;
  }
  
@@ -1504,8 +1507,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
  
-       flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
-               groups_per_flex;
+       /* We allocate both existing and potentially added groups */
+       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+                           ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
+                             EXT4_DESC_PER_BLOCK_BITS(sb))) /
+                          groups_per_flex;
         sbi->s_flex_groups = kzalloc(flex_group_count *
                                      sizeof(struct flex_groups), GFP_KERNEL);
         if (sbi->s_flex_groups == NULL) {
@@ -1584,7 +1590,7 @@ static int ext4_check_descriptors(struct super_block *sb)
         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                 flexbg_flag = 1;
  
-       ext4_debug ("Checking group descriptors");
+       ext4_debug("Checking group descriptors");
  
         for (i = 0; i < sbi->s_groups_count; i++) {
                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
@@ -1623,8 +1629,10 @@ static int ext4_check_descriptors(struct super_block *sb)
                                "Checksum for group %lu failed (%u!=%u)\n",
                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
                                gdp)), le16_to_cpu(gdp->bg_checksum));
-                       if (!(sb->s_flags & MS_RDONLY))
+                       if (!(sb->s_flags & MS_RDONLY)) {
+                               spin_unlock(sb_bgl_lock(sbi, i));
                                 return 0;
+                       }
                 }
                 spin_unlock(sb_bgl_lock(sbi, i));
                 if (!flexbg_flag)
@@ -1714,9 +1722,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                 DQUOT_INIT(inode);
                 if (inode->i_nlink) {
                         printk(KERN_DEBUG
-                               "%s: truncating inode %lu to %Ld bytes\n",
+                               "%s: truncating inode %lu to %lld bytes\n",
                                 __func__, inode->i_ino, inode->i_size);
-                       jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
+                       jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                   inode->i_ino, inode->i_size);
                         ext4_truncate(inode);
                         nr_truncates++;
@@ -1914,6 +1922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         unsigned long journal_devnum = 0;
         unsigned long def_mount_opts;
         struct inode *root;
+       char *cp;
         int ret = -EINVAL;
         int blocksize;
         int db_count;
@@ -1930,10 +1939,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         sbi->s_mount_opt = 0;
         sbi->s_resuid = EXT4_DEF_RESUID;
         sbi->s_resgid = EXT4_DEF_RESGID;
+       sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
         sbi->s_sb_block = sb_block;
  
         unlock_kernel();
  
+       /* Cleanup superblock name */
+       for (cp = sb->s_id; (cp = strchr(cp, '/'));)
+               *cp = '!';
+
         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
         if (!blocksize) {
                 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
@@ -1973,11 +1987,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 set_opt(sbi->s_mount_opt, GRPID);
         if (def_mount_opts & EXT4_DEFM_UID16)
                 set_opt(sbi->s_mount_opt, NO_UID32);
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
                 set_opt(sbi->s_mount_opt, XATTR_USER);
  #endif
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         if (def_mount_opts & EXT4_DEFM_ACL)
                 set_opt(sbi->s_mount_opt, POSIX_ACL);
  #endif
@@ -2012,11 +2026,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 ext4_warning(sb, __func__,
                         "extents feature not enabled on this filesystem, "
                         "use tune2fs.\n");
-       /*
-        * turn on mballoc code by default in ext4 filesystem
-        * Use -o nomballoc to turn it off
-        */
-       set_opt(sbi->s_mount_opt, MBALLOC);
  
         /*
          * enable delayed allocation by default
@@ -2040,16 +2049,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        "EXT4-fs warning: feature flags set on rev 0 fs, "
                        "running e2fsck is recommended\n");
  
-       /*
-        * Since ext4 is still considered development code, we require
-        * that the TEST_FILESYS flag in s->flags be set.
-        */
-       if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
-               printk(KERN_WARNING "EXT4-fs: %s: not marked "
-                      "OK to use with test code.\n", sb->s_id);
-               goto failed_mount;
-       }
-
         /*
          * Check feature flags regardless of the revision level, since we
          * previously didn't change the revision level when setting the flags,
@@ -2219,6 +2218,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 goto failed_mount;
         }
  
+       if (ext4_proc_root)
+               sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
+
+       if (sbi->s_proc)
+               proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
+                                &ext4_ui_proc_fops,
+                                &sbi->s_inode_readahead_blks);
+
         bgl_lock_init(&sbi->s_blockgroup_lock);
  
         for (i = 0; i < db_count; i++) {
@@ -2257,24 +2264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 err = percpu_counter_init(&sbi->s_dirs_counter,
                                 ext4_count_dirs(sb));
         }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+       }
         if (err) {
                 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
                 goto failed_mount3;
         }
  
-       /* per fileystem reservation list head & lock */
-       spin_lock_init(&sbi->s_rsv_window_lock);
-       sbi->s_rsv_window_root = RB_ROOT;
-       /* Add a single, static dummy reservation to the start of the
-        * reservation window list --- it gives us a placeholder for
-        * append-at-start-of-list which makes the allocation logic
-        * _much_ simpler. */
-       sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-       sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
-       sbi->s_rsv_window_head.rsv_alloc_hit = 0;
-       sbi->s_rsv_window_head.rsv_goal_size = 0;
-       ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
-
         sbi->s_stripe = ext4_get_stripe_size(sbi);
  
         /*
@@ -2471,7 +2468,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
  
         ext4_ext_init(sb);
-       ext4_mb_init(sb, needs_recovery);
+       err = ext4_mb_init(sb, needs_recovery);
+       if (err) {
+               printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
+                      err);
+               goto failed_mount4;
+       }
  
         lock_kernel();
         return 0;
@@ -2489,11 +2491,16 @@ failed_mount3:
         percpu_counter_destroy(&sbi->s_freeblocks_counter);
         percpu_counter_destroy(&sbi->s_freeinodes_counter);
         percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
  failed_mount2:
         for (i = 0; i < db_count; i++)
                 brelse(sbi->s_group_desc[i]);
         kfree(sbi->s_group_desc);
  failed_mount:
+       if (sbi->s_proc) {
+               remove_proc_entry("inode_readahead_blks", sbi->s_proc);
+               remove_proc_entry(sb->s_id, ext4_proc_root);
+       }
  #ifdef CONFIG_QUOTA
         for (i = 0; i < MAXQUOTAS; i++)
                 kfree(sbi->s_qf_names[i]);
@@ -2552,7 +2559,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
                 return NULL;
         }
  
-       jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
+       jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
                   journal_inode, journal_inode->i_size);
         if (!S_ISREG(journal_inode->i_mode)) {
                 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
@@ -2715,6 +2722,11 @@ static int ext4_load_journal(struct super_block *sb,
                         return -EINVAL;
         }
  
+       if (journal->j_flags & JBD2_BARRIER)
+               printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+       else
+               printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+
         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
                 err = jbd2_journal_update_format(journal);
                 if (err)  {
@@ -2799,13 +2811,34 @@ static void ext4_commit_super(struct super_block *sb,
  
         if (!sbh)
                 return;
+       if (buffer_write_io_error(sbh)) {
+               /*
+                * Oh, dear.  A previous attempt to write the
+                * superblock failed.  This could happen because the
+                * USB device was yanked out.  Or it could happen to
+                * be a transient write error and maybe the block will
+                * be remapped.  Nothing we can do but to retry the
+                * write and hope for the best.
+                */
+               printk(KERN_ERR "ext4: previous I/O error to "
+                      "superblock detected for %s.\n", sb->s_id);
+               clear_buffer_write_io_error(sbh);
+               set_buffer_uptodate(sbh);
+       }
         es->s_wtime = cpu_to_le32(get_seconds());
         ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
         es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
         BUFFER_TRACE(sbh, "marking dirty");
         mark_buffer_dirty(sbh);
-       if (sync)
+       if (sync) {
                 sync_dirty_buffer(sbh);
+               if (buffer_write_io_error(sbh)) {
+                       printk(KERN_ERR "ext4: I/O error while writing "
+                              "superblock for %s.\n", sb->s_id);
+                       clear_buffer_write_io_error(sbh);
+                       set_buffer_uptodate(sbh);
+               }
+       }
  }
  
  
@@ -2907,6 +2940,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
  {
         tid_t target;
  
+       trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
         sb->s_dirt = 0;
         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
                 if (wait)
@@ -3162,7 +3196,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
         buf->f_type = EXT4_SUPER_MAGIC;
         buf->f_bsize = sb->s_blocksize;
         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
-       buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+       buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+                      percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
         ext4_free_blocks_count_set(es, buf->f_bfree);
         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
         if (buf->f_bfree < ext4_r_blocks_count(es))
@@ -3432,7 +3467,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
         handle_t *handle = journal_current_handle();
  
         if (!handle) {
-               printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
+               printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
                         " cancelled because transaction is not started.\n",
                         (unsigned long long)off, (unsigned long long)len);
                 return -EIO;
@@ -3493,18 +3528,82 @@ static int ext4_get_sb(struct file_system_type *fs_type,
         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
  }
  
+#ifdef CONFIG_PROC_FS
+static int ext4_ui_proc_show(struct seq_file *m, void *v)
+{
+       unsigned int *p = m->private;
+
+       seq_printf(m, "%u\n", *p);
+       return 0;
+}
+
+static int ext4_ui_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
+}
+
+static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
+                              size_t cnt, loff_t *ppos)
+{
+       unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
+       char str[32];
+       unsigned long value;
+
+       if (cnt >= sizeof(str))
+               return -EINVAL;
+       if (copy_from_user(str, buf, cnt))
+               return -EFAULT;
+       value = simple_strtol(str, NULL, 0);
+       if (value < 0)
+               return -ERANGE;
+       *p = value;
+       return cnt;
+}
+
+const struct file_operations ext4_ui_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ext4_ui_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .write          = ext4_ui_proc_write,
+};
+#endif
+
+static struct file_system_type ext4_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "ext4",
+       .get_sb         = ext4_get_sb,
+       .kill_sb        = kill_block_super,
+       .fs_flags       = FS_REQUIRES_DEV,
+};
+
+#ifdef CONFIG_EXT4DEV_COMPAT
+static int ext4dev_get_sb(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+       printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
+              "to mount using ext4\n");
+       printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
+              "will go away by 2.6.31\n");
+       return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+}
+
  static struct file_system_type ext4dev_fs_type = {
         .owner          = THIS_MODULE,
         .name           = "ext4dev",
-       .get_sb         = ext4_get_sb,
+       .get_sb         = ext4dev_get_sb,
         .kill_sb        = kill_block_super,
         .fs_flags       = FS_REQUIRES_DEV,
  };
+MODULE_ALIAS("ext4dev");
+#endif
  
  static int __init init_ext4_fs(void)
  {
         int err;
  
+       ext4_proc_root = proc_mkdir("fs/ext4", NULL);
         err = init_ext4_mballoc();
         if (err)
                 return err;
@@ -3515,9 +3614,16 @@ static int __init init_ext4_fs(void)
         err = init_inodecache();
         if (err)
                 goto out1;
-       err = register_filesystem(&ext4dev_fs_type);
+       err = register_filesystem(&ext4_fs_type);
         if (err)
                 goto out;
+#ifdef CONFIG_EXT4DEV_COMPAT
+       err = register_filesystem(&ext4dev_fs_type);
+       if (err) {
+               unregister_filesystem(&ext4_fs_type);
+               goto out;
+       }
+#endif
         return 0;
  out:
         destroy_inodecache();
@@ -3530,10 +3636,14 @@ out2:
  
  static void __exit exit_ext4_fs(void)
  {
+       unregister_filesystem(&ext4_fs_type);
+#ifdef CONFIG_EXT4DEV_COMPAT
         unregister_filesystem(&ext4dev_fs_type);
+#endif
         destroy_inodecache();
         exit_ext4_xattr();
         exit_ext4_mballoc();
+       remove_proc_entry("fs/ext4", NULL);
  }
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index e9178643dc01132b72a396b8099ad0cd8c3785c6..00740cb32be3eb739d0b42bcc107a58947edf0ce 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,10 +23,10 @@
  #include "ext4.h"
  #include "xattr.h"
  
-static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
-       nd_set_link(nd, (char*)ei->i_data);
+       nd_set_link(nd, (char *) ei->i_data);
         return NULL;
  }
  
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = page_follow_link_light,
         .put_link       = page_put_link,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = ext4_listxattr,
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
  const struct inode_operations ext4_fast_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = ext4_follow_link,
-#ifdef CONFIG_EXT4DEV_FS_XATTR
+#ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
         .listxattr      = ext4_listxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 8954208b489328a662b68803caa8b2b9e5b59daf..80626d516fee9b1e9c985551b8d39190f0f734df 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache;
  
  static struct xattr_handler *ext4_xattr_handler_map[] = {
         [EXT4_XATTR_INDEX_USER]              = &ext4_xattr_user_handler,
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
         [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
  #endif
         [EXT4_XATTR_INDEX_TRUSTED]           = &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
         [EXT4_XATTR_INDEX_SECURITY]          = &ext4_xattr_security_handler,
  #endif
  };
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = {
  struct xattr_handler *ext4_xattr_handlers[] = {
         &ext4_xattr_user_handler,
         &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
         &ext4_xattr_acl_access_handler,
         &ext4_xattr_acl_default_handler,
  #endif
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
         &ext4_xattr_security_handler,
  #endif
         NULL
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
         struct ext4_xattr_block_find bs = {
                 .s = { .not_found = -ENODATA, },
         };
+       unsigned long no_expand;
         int error;
  
         if (!name)
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
         if (strlen(name) > 255)
                 return -ERANGE;
         down_write(&EXT4_I(inode)->xattr_sem);
+       no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
+       EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
+
         error = ext4_get_inode_loc(inode, &is.iloc);
         if (error)
                 goto cleanup;
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
  cleanup:
         brelse(is.iloc.bh);
         brelse(bs.bh);
+       if (no_expand == 0)
+               EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
         up_write(&EXT4_I(inode)->xattr_sem);
         return error;
  }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h

index 5992fe979bb960e4eda701b4c5f422d1fcb51148..8ede88b18c292d71390e4987ebd6b5f3e5139cb0 100644 (file)
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -51,8 +51,8 @@ struct ext4_xattr_entry {
         (((name_len) + EXT4_XATTR_ROUND + \
         sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
  #define EXT4_XATTR_NEXT(entry) \
-       ( (struct ext4_xattr_entry *)( \
-         (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
+       ((struct ext4_xattr_entry *)( \
+        (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
  #define EXT4_XATTR_SIZE(size) \
         (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
  
@@ -63,7 +63,7 @@ struct ext4_xattr_entry {
                 EXT4_I(inode)->i_extra_isize))
  #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
  
-# ifdef CONFIG_EXT4DEV_FS_XATTR
+# ifdef CONFIG_EXT4_FS_XATTR
  
  extern struct xattr_handler ext4_xattr_user_handler;
  extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
  
  extern struct xattr_handler *ext4_xattr_handlers[];
  
-# else  /* CONFIG_EXT4DEV_FS_XATTR */
+# else  /* CONFIG_EXT4_FS_XATTR */
  
  static inline int
  ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
  
  #define ext4_xattr_handlers    NULL
  
-# endif  /* CONFIG_EXT4DEV_FS_XATTR */
+# endif  /* CONFIG_EXT4_FS_XATTR */
  
-#ifdef CONFIG_EXT4DEV_FS_SECURITY
+#ifdef CONFIG_EXT4_FS_SECURITY
  extern int ext4_init_security(handle_t *handle, struct inode *inode,
                                 struct inode *dir);
  #else
diff --git a/fs/ioctl.c b/fs/ioctl.c

index 7db32b3382d3a66452388ad9e9023216a1855ab3..33a6b7ecb8b8686449fb42b1ab5f8ef6ce64709f 100644 (file)
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -13,9 +13,14 @@
  #include <linux/security.h>
  #include <linux/module.h>
  #include <linux/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
  
  #include <asm/ioctls.h>
  
+/* So that the fiemap access checks can't overflow on 32 bit machines. */
+#define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
+
  /**
   * vfs_ioctl - call filesystem specific ioctl methods
   * @filp:      open file to invoke ioctl method on
@@ -71,6 +76,272 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
         return put_user(res, p);
  }
  
+/**
+ * fiemap_fill_next_extent - Fiemap helper function
+ * @fieinfo:   Fiemap context passed into ->fiemap
+ * @logical:   Extent logical start offset, in bytes
+ * @phys:      Extent physical start offset, in bytes
+ * @len:       Extent length, in bytes
+ * @flags:     FIEMAP_EXTENT flags that describe this extent
+ *
+ * Called from file system ->fiemap callback. Will populate extent
+ * info as passed in via arguments and copy to user memory. On
+ * success, extent count on fieinfo is incremented.
+ *
+ * Returns 0 on success, -errno on error, 1 if this was the last
+ * extent that will fit in user array.
+ */
+#define SET_UNKNOWN_FLAGS      (FIEMAP_EXTENT_DELALLOC)
+#define SET_NO_UNMOUNTED_IO_FLAGS      (FIEMAP_EXTENT_DATA_ENCRYPTED)
+#define SET_NOT_ALIGNED_FLAGS  (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
+                           u64 phys, u64 len, u32 flags)
+{
+       struct fiemap_extent extent;
+       struct fiemap_extent *dest = fieinfo->fi_extents_start;
+
+       /* only count the extents */
+       if (fieinfo->fi_extents_max == 0) {
+               fieinfo->fi_extents_mapped++;
+               return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+       }
+
+       if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+               return 1;
+
+       if (flags & SET_UNKNOWN_FLAGS)
+               flags |= FIEMAP_EXTENT_UNKNOWN;
+       if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
+               flags |= FIEMAP_EXTENT_ENCODED;
+       if (flags & SET_NOT_ALIGNED_FLAGS)
+               flags |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+       memset(&extent, 0, sizeof(extent));
+       extent.fe_logical = logical;
+       extent.fe_physical = phys;
+       extent.fe_length = len;
+       extent.fe_flags = flags;
+
+       dest += fieinfo->fi_extents_mapped;
+       if (copy_to_user(dest, &extent, sizeof(extent)))
+               return -EFAULT;
+
+       fieinfo->fi_extents_mapped++;
+       if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
+               return 1;
+       return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+}
+EXPORT_SYMBOL(fiemap_fill_next_extent);
+
+/**
+ * fiemap_check_flags - check validity of requested flags for fiemap
+ * @fieinfo:   Fiemap context passed into ->fiemap
+ * @fs_flags:  Set of fiemap flags that the file system understands
+ *
+ * Called from file system ->fiemap callback. This will compute the
+ * intersection of valid fiemap flags and those that the fs supports. That
+ * value is then compared against the user supplied flags. In case of bad user
+ * flags, the invalid values will be written into the fieinfo structure, and
+ * -EBADR is returned, which tells ioctl_fiemap() to return those values to
+ * userspace. For this reason, a return code of -EBADR should be preserved.
+ *
+ * Returns 0 on success, -EBADR on bad flags.
+ */
+int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
+{
+       u32 incompat_flags;
+
+       incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
+       if (incompat_flags) {
+               fieinfo->fi_flags = incompat_flags;
+               return -EBADR;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(fiemap_check_flags);
+
+static int fiemap_check_ranges(struct super_block *sb,
+                              u64 start, u64 len, u64 *new_len)
+{
+       *new_len = len;
+
+       if (len == 0)
+               return -EINVAL;
+
+       if (start > sb->s_maxbytes)
+               return -EFBIG;
+
+       /*
+        * Shrink request scope to what the fs can actually handle.
+        */
+       if ((len > sb->s_maxbytes) ||
+           (sb->s_maxbytes - len) < start)
+               *new_len = sb->s_maxbytes - start;
+
+       return 0;
+}
+
+static int ioctl_fiemap(struct file *filp, unsigned long arg)
+{
+       struct fiemap fiemap;
+       struct fiemap_extent_info fieinfo = { 0, };
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct super_block *sb = inode->i_sb;
+       u64 len;
+       int error;
+
+       if (!inode->i_op->fiemap)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&fiemap, (struct fiemap __user *)arg,
+                          sizeof(struct fiemap)))
+               return -EFAULT;
+
+       if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
+               return -EINVAL;
+
+       error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
+                                   &len);
+       if (error)
+               return error;
+
+       fieinfo.fi_flags = fiemap.fm_flags;
+       fieinfo.fi_extents_max = fiemap.fm_extent_count;
+       fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
+
+       if (fiemap.fm_extent_count != 0 &&
+           !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
+                      fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
+               return -EFAULT;
+
+       if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
+               filemap_write_and_wait(inode->i_mapping);
+
+       error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
+       fiemap.fm_flags = fieinfo.fi_flags;
+       fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
+       if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
+               error = -EFAULT;
+
+       return error;
+}
+
+#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
+#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+
+/*
+ * @inode - the inode to map
+ * @arg - the pointer to userspace where we copy everything to
+ * @get_block - the fs's get_block function
+ *
+ * This does FIEMAP for block based inodes.  Basically it will just loop
+ * through get_block until we hit the number of extents we want to map, or we
+ * go past the end of the file and hit a hole.
+ *
+ * If it is possible to have data blocks beyond a hole past @inode->i_size, then
+ * please do not use this function, it will stop at the first unmapped block
+ * beyond i_size
+ */
+int generic_block_fiemap(struct inode *inode,
+                        struct fiemap_extent_info *fieinfo, u64 start,
+                        u64 len, get_block_t *get_block)
+{
+       struct buffer_head tmp;
+       unsigned int start_blk;
+       long long length = 0, map_len = 0;
+       u64 logical = 0, phys = 0, size = 0;
+       u32 flags = FIEMAP_EXTENT_MERGED;
+       int ret = 0;
+
+       if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+               return ret;
+
+       start_blk = logical_to_blk(inode, start);
+
+       /* guard against change */
+       mutex_lock(&inode->i_mutex);
+
+       length = (long long)min_t(u64, len, i_size_read(inode));
+       map_len = length;
+
+       do {
+               /*
+                * we set b_size to the total size we want so it will map as
+                * many contiguous blocks as possible at once
+                */
+               memset(&tmp, 0, sizeof(struct buffer_head));
+               tmp.b_size = map_len;
+
+               ret = get_block(inode, start_blk, &tmp, 0);
+               if (ret)
+                       break;
+
+               /* HOLE */
+               if (!buffer_mapped(&tmp)) {
+                       /*
+                        * first hole after going past the EOF, this is our
+                        * last extent
+                        */
+                       if (length <= 0) {
+                               flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
+                               ret = fiemap_fill_next_extent(fieinfo, logical,
+                                                             phys, size,
+                                                             flags);
+                               break;
+                       }
+
+                       length -= blk_to_logical(inode, 1);
+
+                       /* if we have holes up to/past EOF then we're done */
+                       if (length <= 0)
+                               break;
+
+                       start_blk++;
+               } else {
+                       if (length <= 0 && size) {
+                               ret = fiemap_fill_next_extent(fieinfo, logical,
+                                                             phys, size,
+                                                             flags);
+                               if (ret)
+                                       break;
+                       }
+
+                       logical = blk_to_logical(inode, start_blk);
+                       phys = blk_to_logical(inode, tmp.b_blocknr);
+                       size = tmp.b_size;
+                       flags = FIEMAP_EXTENT_MERGED;
+
+                       length -= tmp.b_size;
+                       start_blk += logical_to_blk(inode, size);
+
+                       /*
+                        * if we are past the EOF we need to loop again to see
+                        * if there is a hole so we can mark this extent as the
+                        * last one, and if not keep mapping things until we
+                        * find a hole, or we run out of slots in the extent
+                        * array
+                        */
+                       if (length <= 0)
+                               continue;
+
+                       ret = fiemap_fill_next_extent(fieinfo, logical, phys,
+                                                     size, flags);
+                       if (ret)
+                               break;
+               }
+               cond_resched();
+       } while (1);
+
+       mutex_unlock(&inode->i_mutex);
+
+       /* if ret is 1 then we just hit the end of the extent array */
+       if (ret == 1)
+               ret = 0;
+
+       return ret;
+}
+EXPORT_SYMBOL(generic_block_fiemap);
+
  static int file_ioctl(struct file *filp, unsigned int cmd,
                 unsigned long arg)
  {
@@ -80,6 +351,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
         switch (cmd) {
         case FIBMAP:
                 return ioctl_fibmap(filp, p);
+       case FS_IOC_FIEMAP:
+               return ioctl_fiemap(filp, arg);
         case FIGETBSZ:
                 return put_user(inode->i_sb->s_blocksize, p);
         case FIONREAD:
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 91389c8aee8a7c765c1ae9d7f2bbeed51f95cd0c..42895d3694581885de894790f505557fda891c24 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,6 +20,7 @@
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/jbd2.h>
+#include <linux/marker.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
  
@@ -126,14 +127,29 @@ void __jbd2_log_wait_for_space(journal_t *journal)
  
                 /*
                  * Test again, another process may have checkpointed while we
-                * were waiting for the checkpoint lock
+                * were waiting for the checkpoint lock. If there are no
+                * outstanding transactions there is nothing to checkpoint and
+                * we can't make progress. Abort the journal in this case.
                  */
                 spin_lock(&journal->j_state_lock);
+               spin_lock(&journal->j_list_lock);
                 nblocks = jbd_space_needed(journal);
                 if (__jbd2_log_space_left(journal) < nblocks) {
+                       int chkpt = journal->j_checkpoint_transactions != NULL;
+
+                       spin_unlock(&journal->j_list_lock);
                         spin_unlock(&journal->j_state_lock);
-                       jbd2_log_do_checkpoint(journal);
+                       if (chkpt) {
+                               jbd2_log_do_checkpoint(journal);
+                       } else {
+                               printk(KERN_ERR "%s: no transactions\n",
+                                      __func__);
+                               jbd2_journal_abort(journal, 0);
+                       }
+
                         spin_lock(&journal->j_state_lock);
+               } else {
+                       spin_unlock(&journal->j_list_lock);
                 }
                 mutex_unlock(&journal->j_checkpoint_mutex);
         }
@@ -313,6 +329,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
          * journal straight away.
          */
         result = jbd2_cleanup_journal_tail(journal);
+       trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
+                  journal->j_devname, result);
         jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
         if (result <= 0)
                 return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index f2ad061e95ec93937b7b5f3adfe48fb2b446ad0d..0d3814a35ed11e19a09beb3007113bfa067d0ecb 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,6 +16,7 @@
  #include <linux/time.h>
  #include <linux/fs.h>
  #include <linux/jbd2.h>
+#include <linux/marker.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/mm.h>
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal,
  
         JBUFFER_TRACE(descriptor, "submit commit block");
         lock_buffer(bh);
-       get_bh(bh);
-       set_buffer_dirty(bh);
+       clear_buffer_dirty(bh);
         set_buffer_uptodate(bh);
         bh->b_end_io = journal_end_buffer_io_sync;
  
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal,
          * to remember if we sent a barrier request
          */
         if (ret == -EOPNOTSUPP && barrier_done) {
-               char b[BDEVNAME_SIZE];
-
                 printk(KERN_WARNING
-                       "JBD: barrier-based sync failed on %s - "
-                       "disabling barriers\n",
-                       bdevname(journal->j_dev, b));
+                      "JBD: barrier-based sync failed on %s - "
+                      "disabling barriers\n", journal->j_devname);
                 spin_lock(&journal->j_state_lock);
                 journal->j_flags &= ~JBD2_BARRIER;
                 spin_unlock(&journal->j_state_lock);
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal,
                 /* And try again, without the barrier */
                 lock_buffer(bh);
                 set_buffer_uptodate(bh);
-               set_buffer_dirty(bh);
+               clear_buffer_dirty(bh);
                 ret = submit_bh(WRITE, bh);
         }
         *cbh = bh;
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         commit_transaction = journal->j_running_transaction;
         J_ASSERT(commit_transaction->t_state == T_RUNNING);
  
+       trace_mark(jbd2_start_commit, "dev %s transaction %d",
+                  journal->j_devname, commit_transaction->t_tid);
         jbd_debug(1, "JBD: starting commit of transaction %d\n",
                         commit_transaction->t_tid);
  
@@ -681,11 +680,9 @@ start_journal_io:
          */
         err = journal_finish_inode_data_buffers(journal, commit_transaction);
         if (err) {
-               char b[BDEVNAME_SIZE];
-
                 printk(KERN_WARNING
                         "JBD2: Detected IO errors while flushing file data "
-                       "on %s\n", bdevname(journal->j_fs_dev, b));
+                      "on %s\n", journal->j_devname);
                 err = 0;
         }
  
@@ -990,6 +987,9 @@ restart_loop:
         }
         spin_unlock(&journal->j_list_lock);
  
+       trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
+                  journal->j_devname, commit_transaction->t_tid,
+                  journal->j_tail_sequence);
         jbd_debug(1, "JBD: commit %d complete, head %d\n",
                   journal->j_commit_sequence, journal->j_tail_sequence);
  
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index 8207a01c4edbea2d28747d93cf069d526c446aed..01c3901c3a07599e592e9bd09282a3d6cd42cf24 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
                 if (ret)
                         *retp = ret;
                 else {
-                       char b[BDEVNAME_SIZE];
-
                         printk(KERN_ALERT "%s: journal block not found "
                                         "at offset %lu on %s\n",
-                               __func__,
-                               blocknr,
-                               bdevname(journal->j_dev, b));
+                              __func__, blocknr, journal->j_devname);
                         err = -EIO;
                         __journal_abort_soft(journal, err);
                 }
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats;
  
  static void jbd2_stats_proc_init(journal_t *journal)
  {
-       char name[BDEVNAME_SIZE];
-
-       bdevname(journal->j_dev, name);
-       journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
+       journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
         if (journal->j_proc_entry) {
                 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
                                  &jbd2_seq_history_fops, journal);
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal)
  
  static void jbd2_stats_proc_exit(journal_t *journal)
  {
-       char name[BDEVNAME_SIZE];
-
-       bdevname(journal->j_dev, name);
         remove_proc_entry("info", journal->j_proc_entry);
         remove_proc_entry("history", journal->j_proc_entry);
-       remove_proc_entry(name, proc_jbd2_stats);
+       remove_proc_entry(journal->j_devname, proc_jbd2_stats);
  }
  
  static void journal_init_stats(journal_t *journal)
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
  {
         journal_t *journal = journal_init_common();
         struct buffer_head *bh;
+       char *p;
         int n;
  
         if (!journal)
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
         journal->j_fs_dev = fs_dev;
         journal->j_blk_offset = start;
         journal->j_maxlen = len;
+       bdevname(journal->j_dev, journal->j_devname);
+       p = journal->j_devname;
+       while ((p = strchr(p, '/')))
+               *p = '!';
         jbd2_stats_proc_init(journal);
  
         bh = __getblk(journal->j_dev, start, journal->j_blocksize);
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
  {
         struct buffer_head *bh;
         journal_t *journal = journal_init_common();
+       char *p;
         int err;
         int n;
         unsigned long long blocknr;
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
  
         journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
         journal->j_inode = inode;
+       bdevname(journal->j_dev, journal->j_devname);
+       p = journal->j_devname;
+       while ((p = strchr(p, '/')))
+               *p = '!';
+       p = journal->j_devname + strlen(journal->j_devname);
+       sprintf(p, ":%lu", journal->j_inode->i_ino);
         jbd_debug(1,
                   "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
                   journal, inode->i_sb->s_id, inode->i_ino,
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
                 goto out;
         }
  
+       if (buffer_write_io_error(bh)) {
+               /*
+                * Oh, dear.  A previous attempt to write the journal
+                * superblock failed.  This could happen because the
+                * USB device was yanked out.  Or it could happen to
+                * be a transient write error and maybe the block will
+                * be remapped.  Nothing we can do but to retry the
+                * write and hope for the best.
+                */
+               printk(KERN_ERR "JBD2: previous I/O error detected "
+                      "for journal superblock update for %s.\n",
+                      journal->j_devname);
+               clear_buffer_write_io_error(bh);
+               set_buffer_uptodate(bh);
+       }
+
         spin_lock(&journal->j_state_lock);
         jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
                   journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
  
         BUFFER_TRACE(bh, "marking dirty");
         mark_buffer_dirty(bh);
-       if (wait)
+       if (wait) {
                 sync_dirty_buffer(bh);
-       else
+               if (buffer_write_io_error(bh)) {
+                       printk(KERN_ERR "JBD2: I/O error detected "
+                              "when updating journal superblock for %s.\n",
+                              journal->j_devname);
+                       clear_buffer_write_io_error(bh);
+                       set_buffer_uptodate(bh);
+               }
+       } else
                 ll_rw_block(SWRITE, 1, &bh);
  
  out:
@@ -1760,23 +1785,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
         return err;
  }
  
-/*
- * journal_dev_name: format a character string to describe on what
- * device this journal is present.
- */
-
-static const char *journal_dev_name(journal_t *journal, char *buffer)
-{
-       struct block_device *bdev;
-
-       if (journal->j_inode)
-               bdev = journal->j_inode->i_sb->s_bdev;
-       else
-               bdev = journal->j_dev;
-
-       return bdevname(bdev, buffer);
-}
-
  /*
   * Journal abort has very specific semantics, which we describe
   * for journal abort.
@@ -1793,13 +1801,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
  void __jbd2_journal_abort_hard(journal_t *journal)
  {
         transaction_t *transaction;
-       char b[BDEVNAME_SIZE];
  
         if (journal->j_flags & JBD2_ABORT)
                 return;
  
         printk(KERN_ERR "Aborting journal on device %s.\n",
-               journal_dev_name(journal, b));
+              journal->j_devname);
  
         spin_lock(&journal->j_state_lock);
         journal->j_flags |= JBD2_ABORT;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index 10bfb466e0687eb4c99dd3c5fc1e787c07d68efc..29ff57ec5d1f97047549e69f97a5d426b3e60af2 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -989,15 +989,6 @@ out:
         return ret;
  }
  
-/*
- * This is only valid for leaf nodes, which are the only ones that can
- * have empty extents anyway.
- */
-static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
-{
-       return !rec->e_leaf_clusters;
-}
-
  /*
   * This function will discard the rightmost extent record.
   */
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h

index 42ff94bd8011f541cfd82ad45faaca17f742a80a..60cd3d59230c85398babfa7225fe087f2992a739 100644 (file)
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
                 return le16_to_cpu(rec->e_leaf_clusters);
  }
  
+/*
+ * This is only valid for leaf nodes, which are the only ones that can
+ * have empty extents anyway.
+ */
+static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
+{
+       return !rec->e_leaf_clusters;
+}
+
  #endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c

index c58668a326fe87f58074741965c531761fe97359..aed268e80b49173a5a3af264bcd5ca5c7e55ca8a 100644 (file)
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -25,6 +25,7 @@
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/types.h>
+#include <linux/fiemap.h>
  
  #define MLOG_MASK_PREFIX ML_EXTENT_MAP
  #include <cluster/masklog.h>
@@ -32,6 +33,7 @@
  #include "ocfs2.h"
  
  #include "alloc.h"
+#include "dlmglue.h"
  #include "extent_map.h"
  #include "inode.h"
  #include "super.h"
@@ -282,6 +284,51 @@ out:
                 kfree(new_emi);
  }
  
+static int ocfs2_last_eb_is_empty(struct inode *inode,
+                                 struct ocfs2_dinode *di)
+{
+       int ret, next_free;
+       u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
+       struct buffer_head *eb_bh = NULL;
+       struct ocfs2_extent_block *eb;
+       struct ocfs2_extent_list *el;
+
+       ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
+                              &eb_bh, OCFS2_BH_CACHED, inode);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+       el = &eb->h_list;
+
+       if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
+               ret = -EROFS;
+               OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
+               goto out;
+       }
+
+       if (el->l_tree_depth) {
+               ocfs2_error(inode->i_sb,
+                           "Inode %lu has non zero tree depth in "
+                           "leaf block %llu\n", inode->i_ino,
+                           (unsigned long long)eb_bh->b_blocknr);
+               ret = -EROFS;
+               goto out;
+       }
+
+       next_free = le16_to_cpu(el->l_next_free_rec);
+
+       if (next_free == 0 ||
+           (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
+               ret = 1;
+
+out:
+       brelse(eb_bh);
+       return ret;
+}
+
  /*
   * Return the 1st index within el which contains an extent start
   * larger than v_cluster.
@@ -373,42 +420,28 @@ out:
         return ret;
  }
  
-int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
-                      u32 *p_cluster, u32 *num_clusters,
-                      unsigned int *extent_flags)
+static int ocfs2_get_clusters_nocache(struct inode *inode,
+                                     struct buffer_head *di_bh,
+                                     u32 v_cluster, unsigned int *hole_len,
+                                     struct ocfs2_extent_rec *ret_rec,
+                                     unsigned int *is_last)
  {
-       int ret, i;
-       unsigned int flags = 0;
-       struct buffer_head *di_bh = NULL;
-       struct buffer_head *eb_bh = NULL;
+       int i, ret, tree_height, len;
         struct ocfs2_dinode *di;
-       struct ocfs2_extent_block *eb;
+       struct ocfs2_extent_block *uninitialized_var(eb);
         struct ocfs2_extent_list *el;
         struct ocfs2_extent_rec *rec;
-       u32 coff;
-
-       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-               ret = -ERANGE;
-               mlog_errno(ret);
-               goto out;
-       }
-
-       ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
-                                     num_clusters, extent_flags);
-       if (ret == 0)
-               goto out;
+       struct buffer_head *eb_bh = NULL;
  
-       ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
-                              &di_bh, OCFS2_BH_CACHED, inode);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
-       }
+       memset(ret_rec, 0, sizeof(*ret_rec));
+       if (is_last)
+               *is_last = 0;
  
         di = (struct ocfs2_dinode *) di_bh->b_data;
         el = &di->id2.i_list;
+       tree_height = le16_to_cpu(el->l_tree_depth);
  
-       if (el->l_tree_depth) {
+       if (tree_height > 0) {
                 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
                 if (ret) {
                         mlog_errno(ret);
@@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
         i = ocfs2_search_extent_list(el, v_cluster);
         if (i == -1) {
                 /*
-                * A hole was found. Return some canned values that
-                * callers can key on. If asked for, num_clusters will
-                * be populated with the size of the hole.
+                * Holes can be larger than the maximum size of an
+                * extent, so we return their lengths in a seperate
+                * field.
                  */
-               *p_cluster = 0;
-               if (num_clusters) {
+               if (hole_len) {
                         ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
-                                                        v_cluster,
-                                                        num_clusters);
+                                                        v_cluster, &len);
                         if (ret) {
                                 mlog_errno(ret);
                                 goto out;
                         }
+
+                       *hole_len = len;
                 }
-       } else {
-               rec = &el->l_recs[i];
+               goto out_hole;
+       }
  
-               BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
+       rec = &el->l_recs[i];
  
-               if (!rec->e_blkno) {
-                       ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
-                                   "record (%u, %u, 0)", inode->i_ino,
-                                   le32_to_cpu(rec->e_cpos),
-                                   ocfs2_rec_clusters(el, rec));
-                       ret = -EROFS;
-                       goto out;
+       BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
+
+       if (!rec->e_blkno) {
+               ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
+                           "record (%u, %u, 0)", inode->i_ino,
+                           le32_to_cpu(rec->e_cpos),
+                           ocfs2_rec_clusters(el, rec));
+               ret = -EROFS;
+               goto out;
+       }
+
+       *ret_rec = *rec;
+
+       /*
+        * Checking for last extent is potentially expensive - we
+        * might have to look at the next leaf over to see if it's
+        * empty.
+        *
+        * The first two checks are to see whether the caller even
+        * cares for this information, and if the extent is at least
+        * the last in it's list.
+        *
+        * If those hold true, then the extent is last if any of the
+        * additional conditions hold true:
+        *  - Extent list is in-inode
+        *  - Extent list is right-most
+        *  - Extent list is 2nd to rightmost, with empty right-most
+        */
+       if (is_last) {
+               if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
+                       if (tree_height == 0)
+                               *is_last = 1;
+                       else if (eb->h_blkno == di->i_last_eb_blk)
+                               *is_last = 1;
+                       else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
+                               ret = ocfs2_last_eb_is_empty(inode, di);
+                               if (ret < 0) {
+                                       mlog_errno(ret);
+                                       goto out;
+                               }
+                               if (ret == 1)
+                                       *is_last = 1;
+                       }
                 }
+       }
+
+out_hole:
+       ret = 0;
+out:
+       brelse(eb_bh);
+       return ret;
+}
+
+static void ocfs2_relative_extent_offsets(struct super_block *sb,
+                                         u32 v_cluster,
+                                         struct ocfs2_extent_rec *rec,
+                                         u32 *p_cluster, u32 *num_clusters)
+
+{
+       u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
+
+       *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
+       *p_cluster = *p_cluster + coff;
+
+       if (num_clusters)
+               *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
+}
+
+int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
+                      u32 *p_cluster, u32 *num_clusters,
+                      unsigned int *extent_flags)
+{
+       int ret;
+       unsigned int uninitialized_var(hole_len), flags = 0;
+       struct buffer_head *di_bh = NULL;
+       struct ocfs2_extent_rec rec;
  
-               coff = v_cluster - le32_to_cpu(rec->e_cpos);
+       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+               ret = -ERANGE;
+               mlog_errno(ret);
+               goto out;
+       }
  
-               *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
-                                                   le64_to_cpu(rec->e_blkno));
-               *p_cluster = *p_cluster + coff;
+       ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
+                                     num_clusters, extent_flags);
+       if (ret == 0)
+               goto out;
  
-               if (num_clusters)
-                       *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
+       ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
+                              &di_bh, OCFS2_BH_CACHED, inode);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
  
-               flags = rec->e_flags;
+       ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
+                                        &rec, NULL);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
  
-               ocfs2_extent_map_insert_rec(inode, rec);
+       if (rec.e_blkno == 0ULL) {
+               /*
+                * A hole was found. Return some canned values that
+                * callers can key on. If asked for, num_clusters will
+                * be populated with the size of the hole.
+                */
+               *p_cluster = 0;
+               if (num_clusters) {
+                       *num_clusters = hole_len;
+               }
+       } else {
+               ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
+                                             p_cluster, num_clusters);
+               flags = rec.e_flags;
+
+               ocfs2_extent_map_insert_rec(inode, &rec);
         }
  
         if (extent_flags)
@@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
  
  out:
         brelse(di_bh);
-       brelse(eb_bh);
         return ret;
  }
  
@@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
  out:
         return ret;
  }
+
+static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
+                              struct fiemap_extent_info *fieinfo,
+                              u64 map_start)
+{
+       int ret;
+       unsigned int id_count;
+       struct ocfs2_dinode *di;
+       u64 phys;
+       u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+       di = (struct ocfs2_dinode *)di_bh->b_data;
+       id_count = le16_to_cpu(di->id2.i_data.id_count);
+
+       if (map_start < id_count) {
+               phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
+               phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
+
+               ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
+                                             flags);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+#define OCFS2_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC)
+
+int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                u64 map_start, u64 map_len)
+{
+       int ret, is_last;
+       u32 mapping_end, cpos;
+       unsigned int hole_size;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       u64 len_bytes, phys_bytes, virt_bytes;
+       struct buffer_head *di_bh = NULL;
+       struct ocfs2_extent_rec rec;
+
+       ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
+       if (ret)
+               return ret;
+
+       ret = ocfs2_inode_lock(inode, &di_bh, 0);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       down_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+       /*
+        * Handle inline-data separately.
+        */
+       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+               ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
+               goto out_unlock;
+       }
+
+       cpos = map_start >> osb->s_clustersize_bits;
+       mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
+                                              map_start + map_len);
+       mapping_end -= cpos;
+       is_last = 0;
+       while (cpos < mapping_end && !is_last) {
+               u32 fe_flags;
+
+               ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
+                                                &hole_size, &rec, &is_last);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               if (rec.e_blkno == 0ULL) {
+                       cpos += hole_size;
+                       continue;
+               }
+
+               fe_flags = 0;
+               if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
+                       fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
+               if (is_last)
+                       fe_flags |= FIEMAP_EXTENT_LAST;
+               len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
+               phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
+               virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
+
+               ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
+                                             len_bytes, fe_flags);
+               if (ret)
+                       break;
+
+               cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
+       }
+
+       if (ret > 0)
+               ret = 0;
+
+out_unlock:
+       brelse(di_bh);
+
+       up_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+       ocfs2_inode_unlock(inode, 0);
+out:
+
+       return ret;
+}
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h

index de91e3e41a224ac7f1eb39f1e35bdd22034d811a..1b97490e1ea8d7b431cf40c9bbcab801b3bb4305 100644 (file)
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
  int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
                                 u64 *ret_count, unsigned int *extent_flags);
  
+int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                u64 map_start, u64 map_len);
+
  #endif  /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index ec2ed15c3daab10894cf249d698a834924d74651..ed38796052d201f28da44d0ec60d7bf4baf078a8 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = {
         .getattr        = ocfs2_getattr,
         .permission     = ocfs2_permission,
         .fallocate      = ocfs2_fallocate,
+       .fiemap         = ocfs2_fiemap,
  };
  
  const struct inode_operations ocfs2_special_file_iops = {
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h

index 80171ee89a2222b8e00fb65b2220b4f0fcf78165..8120fa1bc2357008f63c06c78d3304c781bf9341 100644 (file)
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *);
  extern void ext3_set_inode_flags(struct inode *);
  extern void ext3_get_inode_flags(struct ext3_inode_info *);
  extern void ext3_set_aops(struct inode *inode);
+extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                      u64 start, u64 len);
  
  /* ioctl.c */
  extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h

new file mode 100644 (file)

index 0000000..671decb
--- /dev/null
+++ b/include/linux/fiemap.h
@@ -0,0 +1,64 @@
+/*
+ * FS_IOC_FIEMAP ioctl infrastructure.
+ *
+ * Some portions copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Authors: Mark Fasheh <mfasheh@suse.com>
+ *          Kalpak Shah <kalpak.shah@sun.com>
+ *          Andreas Dilger <adilger@sun.com>
+ */
+
+#ifndef _LINUX_FIEMAP_H
+#define _LINUX_FIEMAP_H
+
+struct fiemap_extent {
+       __u64 fe_logical;  /* logical offset in bytes for the start of
+                           * the extent from the beginning of the file */
+       __u64 fe_physical; /* physical offset in bytes for the start
+                           * of the extent from the beginning of the disk */
+       __u64 fe_length;   /* length in bytes for this extent */
+       __u64 fe_reserved64[2];
+       __u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+       __u32 fe_reserved[3];
+};
+
+struct fiemap {
+       __u64 fm_start;         /* logical offset (inclusive) at
+                                * which to start mapping (in) */
+       __u64 fm_length;        /* logical length of mapping which
+                                * userspace wants (in) */
+       __u32 fm_flags;         /* FIEMAP_FLAG_* flags for request (in/out) */
+       __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
+       __u32 fm_extent_count;  /* size of fm_extents array (in) */
+       __u32 fm_reserved;
+       struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+};
+
+#define FIEMAP_MAX_OFFSET      (~0ULL)
+
+#define FIEMAP_FLAG_SYNC       0x00000001 /* sync file data before map */
+#define FIEMAP_FLAG_XATTR      0x00000002 /* map extended attribute tree */
+
+#define FIEMAP_FLAGS_COMPAT    (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
+
+#define FIEMAP_EXTENT_LAST             0x00000001 /* Last extent in file. */
+#define FIEMAP_EXTENT_UNKNOWN          0x00000002 /* Data location unknown. */
+#define FIEMAP_EXTENT_DELALLOC         0x00000004 /* Location still pending.
+                                                   * Sets EXTENT_UNKNOWN. */
+#define FIEMAP_EXTENT_ENCODED          0x00000008 /* Data can not be read
+                                                   * while fs is unmounted */
+#define FIEMAP_EXTENT_DATA_ENCRYPTED   0x00000080 /* Data is encrypted by fs.
+                                                   * Sets EXTENT_NO_BYPASS. */
+#define FIEMAP_EXTENT_NOT_ALIGNED      0x00000100 /* Extent offsets may not be
+                                                   * block aligned. */
+#define FIEMAP_EXTENT_DATA_INLINE      0x00000200 /* Data mixed with metadata.
+                                                   * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_DATA_TAIL                0x00000400 /* Multiple files in block.
+                                                   * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_UNWRITTEN                0x00000800 /* Space allocated, but
+                                                   * no data (i.e. zero). */
+#define FIEMAP_EXTENT_MERGED           0x00001000 /* File does not natively
+                                                   * support extents. Result
+                                                   * merged for efficiency. */
+
+#endif /* _LINUX_FIEMAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 32477e8872d544ce9abeca8cde1b603d4739814d..44e3cb2f1966d86b4a422fa616b97f485a150c95 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -234,6 +234,7 @@ extern int dir_notify_enable;
  #define        FS_IOC_SETFLAGS                 _IOW('f', 2, long)
  #define        FS_IOC_GETVERSION               _IOR('v', 1, long)
  #define        FS_IOC_SETVERSION               _IOW('v', 2, long)
+#define FS_IOC_FIEMAP                  _IOWR('f', 11, struct fiemap)
  #define FS_IOC32_GETFLAGS              _IOR('f', 1, int)
  #define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
  #define FS_IOC32_GETVERSION            _IOR('v', 1, int)
@@ -294,6 +295,7 @@ extern int dir_notify_enable;
  #include <linux/mutex.h>
  #include <linux/capability.h>
  #include <linux/semaphore.h>
+#include <linux/fiemap.h>
  
  #include <asm/atomic.h>
  #include <asm/byteorder.h>
@@ -1181,6 +1183,20 @@ extern void dentry_unhash(struct dentry *dentry);
   */
  extern int file_permission(struct file *, int);
  
+/*
+ * VFS FS_IOC_FIEMAP helper definitions.
+ */
+struct fiemap_extent_info {
+       unsigned int fi_flags;          /* Flags as passed from user */
+       unsigned int fi_extents_mapped; /* Number of mapped extents */
+       unsigned int fi_extents_max;    /* Size of fiemap_extent array */
+       struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
+                                                * array */
+};
+int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+                           u64 phys, u64 len, u32 flags);
+int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+
  /*
   * File types
   *
@@ -1290,6 +1306,8 @@ struct inode_operations {
         void (*truncate_range)(struct inode *, loff_t, loff_t);
         long (*fallocate)(struct inode *inode, int mode, loff_t offset,
                           loff_t len);
+       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+                     u64 len);
  };
  
  struct seq_file;
@@ -1987,6 +2005,9 @@ extern int vfs_fstat(unsigned int, struct kstat *);
  
  extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                     unsigned long arg);
+extern int generic_block_fiemap(struct inode *inode,
+                               struct fiemap_extent_info *fieinfo, u64 start,
+                               u64 len, get_block_t *get_block);
  
  extern void get_filesystem(struct file_system_type *fs);
  extern void put_filesystem(struct file_system_type *fs);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 3dd2090070989863af4e87f9cc25b21900578bd9..66c3499478b57579c8b4a719bdee3b8a2c7c42bb 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -850,7 +850,8 @@ struct journal_s
          */
         struct block_device     *j_dev;
         int                     j_blocksize;
-       unsigned long long              j_blk_offset;
+       unsigned long long      j_blk_offset;
+       char                    j_devname[BDEVNAME_SIZE+24];
  
         /*
          * Device which holds the client fs.  For internal journal this will be
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h

index 20838883535704c89cf72bafe29a28d12dea45e3..9007ccdfc1127cfe73db03e31dd82a843f8f4fa8 100644 (file)
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
  void percpu_counter_destroy(struct percpu_counter *fbc);
  void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
  void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
+s64 __percpu_counter_sum(struct percpu_counter *fbc);
  
  static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
  {
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
  
  static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
  {
-       s64 ret = __percpu_counter_sum(fbc, 0);
+       s64 ret = __percpu_counter_sum(fbc);
         return ret < 0 ? 0 : ret;
  }
  
-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
-{
-       return __percpu_counter_sum(fbc, 1);
-}
-
-
  static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
  {
-       return __percpu_counter_sum(fbc, 0);
+       return __percpu_counter_sum(fbc);
  }
  
  static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c

index 4a8ba4bf5f6f2b1c0de7d16f794d6d39cbb00d31..a8663890a88c2e358f21b8cdc6f23a8ebb9d77e5 100644 (file)
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
   * Add up all the per-cpu counts, return the result.  This is a more accurate
   * but much slower version of percpu_counter_read_positive()
   */
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
  {
         s64 ret;
         int cpu;
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
         for_each_online_cpu(cpu) {
                 s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                 ret += *pcount;
-               if (set)
-                       *pcount = 0;
+               *pcount = 0;
         }
-       if (set)
-               fbc->count = ret;
+       fbc->count = ret;
  
         spin_unlock(&fbc->lock);
         return ret;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 11 Oct 2008 20:23:48 +0000 (13:23 -0700)
Documentation/filesystems/ext4.txt		patch \| blob \| history
Documentation/filesystems/fiemap.txt	[new file with mode: 0644]	patch \| blob
Documentation/filesystems/proc.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
fs/Kconfig		patch \| blob \| history
fs/Makefile		patch \| blob \| history
fs/ext2/ext2.h		patch \| blob \| history
fs/ext2/file.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/ext3/file.c		patch \| blob \| history
fs/ext3/inode.c		patch \| blob \| history
fs/ext4/Makefile		patch \| blob \| history
fs/ext4/acl.h		patch \| blob \| history
fs/ext4/balloc.c		patch \| blob \| history
fs/ext4/bitmap.c		patch \| blob \| history
fs/ext4/dir.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ext4_extents.h		patch \| blob \| history
fs/ext4/ext4_i.h		patch \| blob \| history
fs/ext4/ext4_sb.h		patch \| blob \| history
fs/ext4/extents.c		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
fs/ext4/fsync.c		patch \| blob \| history
fs/ext4/hash.c		patch \| blob \| history
fs/ext4/ialloc.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/mballoc.h		patch \| blob \| history
fs/ext4/migrate.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/resize.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/ext4/symlink.c		patch \| blob \| history
fs/ext4/xattr.c		patch \| blob \| history
fs/ext4/xattr.h		patch \| blob \| history
fs/ioctl.c		patch \| blob \| history
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
fs/ocfs2/alloc.c		patch \| blob \| history
fs/ocfs2/alloc.h		patch \| blob \| history
fs/ocfs2/extent_map.c		patch \| blob \| history
fs/ocfs2/extent_map.h		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
include/linux/ext3_fs.h		patch \| blob \| history
include/linux/fiemap.h	[new file with mode: 0644]	patch \| blob
include/linux/fs.h		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history
include/linux/percpu_counter.h		patch \| blob \| history
lib/percpu_counter.c		patch \| blob \| history