tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
        depends on EXPERIMENTAL
        select JBD2
+       select CRC16
        help
          Ext4dev is a predecessor filesystem of the next generation
          extended fs ext4, based on ext3 filesystem code. It will be
 
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
+#include "group.h"
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
 
 }
 
+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+                               int block_group, struct ext4_group_desc *gdp)
+{
+       unsigned long start;
+       int bit, bit_max;
+       unsigned free_blocks, group_blocks;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (bh) {
+               J_ASSERT_BH(bh, buffer_locked(bh));
+
+               /* If checksum is bad mark all blocks used to prevent allocation
+                * essentially implementing a per-group read-only flag. */
+               if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+                       ext4_error(sb, __FUNCTION__,
+                                  "Checksum bad for group %u\n", block_group);
+                       gdp->bg_free_blocks_count = 0;
+                       gdp->bg_free_inodes_count = 0;
+                       gdp->bg_itable_unused = 0;
+                       memset(bh->b_data, 0xff, sb->s_blocksize);
+                       return 0;
+               }
+               memset(bh->b_data, 0, sb->s_blocksize);
+       }
+
+       /* Check for superblock and gdt backups in this group */
+       bit_max = ext4_bg_has_super(sb, block_group);
+
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+                         sbi->s_desc_per_block) {
+               if (bit_max) {
+                       bit_max += ext4_bg_num_gdb(sb, block_group);
+                       bit_max +=
+                               le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+               }
+       } else { /* For META_BG_BLOCK_GROUPS */
+               int group_rel = (block_group -
+                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+                               EXT4_DESC_PER_BLOCK(sb);
+               if (group_rel == 0 || group_rel == 1 ||
+                   (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+                       bit_max += 1;
+       }
+
+       if (block_group == sbi->s_groups_count - 1) {
+               /*
+                * Even though mke2fs always initialize first and last group
+                * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
+                * to make sure we calculate the right free blocks
+                */
+               group_blocks = ext4_blocks_count(sbi->s_es) -
+                       le32_to_cpu(sbi->s_es->s_first_data_block) -
+                       (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
+       } else {
+               group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
+       }
+
+       free_blocks = group_blocks - bit_max;
+
+       if (bh) {
+               for (bit = 0; bit < bit_max; bit++)
+                       ext4_set_bit(bit, bh->b_data);
+
+               start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+                       le32_to_cpu(sbi->s_es->s_first_data_block);
+
+               /* Set bits for block and inode bitmaps, and inode table */
+               ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+               ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+               for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+                    bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+                       ext4_set_bit(bit, bh->b_data);
+
+               /*
+                * Also if the number of blocks within the group is
+                * less than the blocksize * 8 ( which is the size
+                * of bitmap ), set rest of the block bitmap to 1
+                */
+               mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+       }
+
+       return free_blocks - sbi->s_itb_per_group - 2;
+}
+
+
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
  *
  * Return buffer_head on success or NULL in case of failure.
  */
-static struct buffer_head *
+struct buffer_head *
 read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
        int i;
        struct buffer_head * bh = NULL;
        ext4_fsblk_t bitmap_blk;
 
-       desc = ext4_get_group_desc (sb, block_group, NULL);
+       desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
                return NULL;
        bitmap_blk = ext4_block_bitmap(sb, desc);
-       bh = sb_bread(sb, bitmap_blk);
+       if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+               bh = sb_getblk(sb, bitmap_blk);
+               if (!buffer_uptodate(bh)) {
+                       lock_buffer(bh);
+                       if (!buffer_uptodate(bh)) {
+                               ext4_init_block_bitmap(sb, bh, block_group,
+                                                      desc);
+                               set_buffer_uptodate(bh);
+                       }
+                       unlock_buffer(bh);
+               }
+       } else {
+               bh = sb_bread(sb, bitmap_blk);
+       }
        if (!bh)
                ext4_error (sb, __FUNCTION__,
                            "Cannot read block bitmap - "
        desc->bg_free_blocks_count =
                cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
                        group_freed);
+       desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
                        ret_block, goal_hits, goal_attempts);
 
        spin_lock(sb_bgl_lock(sbi, group_no));
+       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+               gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
        gdp->bg_free_blocks_count =
                        cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
        spin_unlock(sb_bgl_lock(sbi, group_no));
        percpu_counter_sub(&sbi->s_freeblocks_counter, num);
 
 
--- /dev/null
+/*
+ *  linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+                                  struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+                                      struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+                                     unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+                                      struct buffer_head *bh, int group,
+                                      struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)                   \
+               ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+                                      struct buffer_head *bh, int group,
+                                      struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+#endif /* _LINUX_EXT4_GROUP_H */
 
 
 #include "xattr.h"
 #include "acl.h"
+#include "group.h"
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
  * the free blocks count in the block.
  */
 
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+       int i;
+
+       if (start_bit >= end_bit)
+               return;
+
+       ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+       for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+               ext4_set_bit(i, bitmap);
+       if (i < end_bit)
+               memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+                               struct buffer_head *bh, int block_group,
+                               struct ext4_group_desc *gdp)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       J_ASSERT_BH(bh, buffer_locked(bh));
+
+       /* If checksum is bad mark all blocks and inodes use to prevent
+        * allocation, essentially implementing a per-group read-only flag. */
+       if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+               ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+                          block_group);
+               gdp->bg_free_blocks_count = 0;
+               gdp->bg_free_inodes_count = 0;
+               gdp->bg_itable_unused = 0;
+               memset(bh->b_data, 0xff, sb->s_blocksize);
+               return 0;
+       }
+
+       memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+                       bh->b_data);
+
+       return EXT4_INODES_PER_GROUP(sb);
+}
 
 /*
  * Read the inode allocation bitmap for a given block_group, reading
        desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
                goto error_out;
-
-       bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+       if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+               bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+               if (!buffer_uptodate(bh)) {
+                       lock_buffer(bh);
+                       if (!buffer_uptodate(bh)) {
+                               ext4_init_inode_bitmap(sb, bh, block_group,
+                                                      desc);
+                               set_buffer_uptodate(bh);
+                       }
+                       unlock_buffer(bh);
+               }
+       } else {
+               bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+       }
        if (!bh)
                ext4_error(sb, "read_inode_bitmap",
                            "Cannot read inode bitmap - "
                        if (is_directory)
                                gdp->bg_used_dirs_count = cpu_to_le16(
                                  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+                       gdp->bg_checksum = ext4_group_desc_csum(sbi,
+                                                       block_group, gdp);
                        spin_unlock(sb_bgl_lock(sbi, block_group));
                        percpu_counter_inc(&sbi->s_freeinodes_counter);
                        if (is_directory)
        struct ext4_sb_info *sbi;
        int err = 0;
        struct inode *ret;
-       int i;
+       int i, free = 0;
 
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
        goto out;
 
 got:
-       ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
-       if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-               ext4_error (sb, "ext4_new_inode",
-                           "reserved inode or inode > inodes count - "
-                           "block_group = %d, inode=%lu", group, ino);
+       ino++;
+       if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+           ino > EXT4_INODES_PER_GROUP(sb)) {
+               ext4_error(sb, __FUNCTION__,
+                          "reserved inode or inode > inodes count - "
+                          "block_group = %d, inode=%lu", group,
+                          ino + group * EXT4_INODES_PER_GROUP(sb));
                err = -EIO;
                goto fail;
        }
        BUFFER_TRACE(bh2, "get_write_access");
        err = ext4_journal_get_write_access(handle, bh2);
        if (err) goto fail;
+
+       /* We may have to initialize the block bitmap if it isn't already */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+           gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+               struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+               BUFFER_TRACE(block_bh, "get block bitmap access");
+               err = ext4_journal_get_write_access(handle, block_bh);
+               if (err) {
+                       brelse(block_bh);
+                       goto fail;
+               }
+
+               free = 0;
+               spin_lock(sb_bgl_lock(sbi, group));
+               /* recheck and clear flag under lock if we still need to */
+               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+                       free = ext4_free_blocks_after_init(sb, group, gdp);
+                       gdp->bg_free_blocks_count = cpu_to_le16(free);
+               }
+               spin_unlock(sb_bgl_lock(sbi, group));
+
+               /* Don't need to dirty bitmap block if we didn't change it */
+               if (free) {
+                       BUFFER_TRACE(block_bh, "dirty block bitmap");
+                       err = ext4_journal_dirty_metadata(handle, block_bh);
+               }
+
+               brelse(block_bh);
+               if (err)
+                       goto fail;
+       }
+
        spin_lock(sb_bgl_lock(sbi, group));
+       /* If we didn't allocate from within the initialized part of the inode
+        * table then we need to initialize up to this inode. */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+                       /* When marking the block group with
+                        * ~EXT4_BG_INODE_UNINIT we don't want to depend
+                        * on the value of bg_itable_unsed even though
+                        * mke2fs could have initialized the same for us.
+                        * Instead we calculated the value below
+                        */
+
+                       free = 0;
+               } else {
+                       free = EXT4_INODES_PER_GROUP(sb) -
+                               le16_to_cpu(gdp->bg_itable_unused);
+               }
+
+               /*
+                * Check the relative inode number against the last used
+                * relative inode number in this group. if it is greater
+                * we need to  update the bg_itable_unused count
+                *
+                */
+               if (ino > free)
+                       gdp->bg_itable_unused =
+                               cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+       }
+
        gdp->bg_free_inodes_count =
                cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
        if (S_ISDIR(mode)) {
                gdp->bg_used_dirs_count =
                        cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
        }
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
        spin_unlock(sb_bgl_lock(sbi, group));
        BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
        err = ext4_journal_dirty_metadata(handle, bh2);
                inode->i_gid = current->fsgid;
        inode->i_mode = mode;
 
-       inode->i_ino = ino;
+       inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
        inode->i_blocks = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
 
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#include "group.h"
 
 #define outside(b, first, last)        ((b) < (first) || (b) >= (last))
 #define inside(b, first, last) ((b) >= (first) && (b) < (last))
        return bh;
 }
 
-/*
- * To avoid calling the atomic setbit hundreds or thousands of times, we only
- * need to use it within a single byte (to ensure we get endianness right).
- * We can use memset for the rest of the bitmap as there are no other users.
- */
-static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
-{
-       int i;
-
-       if (start_bit >= end_bit)
-               return;
-
-       ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
-       for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
-               ext4_set_bit(i, bitmap);
-       if (i < end_bit)
-               memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
-}
-
 /*
  * Set up the block and inode bitmaps, and the inode table for the new group.
  * This doesn't need to be part of the main transaction, since we are only
        ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
        gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
        gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
        /*
         * Make the new blocks and inodes valid next.  We do this before
 
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 #include <linux/log2.h>
+#include <linux/crc16.h>
 
 #include <asm/uaccess.h>
 
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
+#include "group.h"
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
        return res;
 }
 
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+                           struct ext4_group_desc *gdp)
+{
+       __u16 crc = 0;
+
+       if (sbi->s_es->s_feature_ro_compat &
+           cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+               int offset = offsetof(struct ext4_group_desc, bg_checksum);
+               __le32 le_group = cpu_to_le32(block_group);
+
+               crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+               crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+               crc = crc16(crc, (__u8 *)gdp, offset);
+               offset += sizeof(gdp->bg_checksum); /* skip checksum */
+               /* for checksum of struct ext4_group_desc do the rest...*/
+               if ((sbi->s_es->s_feature_incompat &
+                    cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+                   offset < le16_to_cpu(sbi->s_es->s_desc_size))
+                       crc = crc16(crc, (__u8 *)gdp + offset,
+                                   le16_to_cpu(sbi->s_es->s_desc_size) -
+                                       offset);
+       }
+
+       return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+                               struct ext4_group_desc *gdp)
+{
+       if ((sbi->s_es->s_feature_ro_compat &
+            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+           (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+               return 0;
+
+       return 1;
+}
+
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors (struct super_block * sb)
 {
                                    i, inode_table);
                        return 0;
                }
+               if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+                       ext4_error(sb, __FUNCTION__,
+                                  "Checksum for group %d failed (%u!=%u)\n", i,
+                                  le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                                                                   gdp)),
+                                  le16_to_cpu(gdp->bg_checksum));
+                       return 0;
+               }
                first_block += EXT4_BLOCKS_PER_GROUP(sb);
                gdp = (struct ext4_group_desc *)
                        ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
 
  */
 struct ext4_group_desc
 {
-       __le32  bg_block_bitmap;                /* Blocks bitmap block */
-       __le32  bg_inode_bitmap;                /* Inodes bitmap block */
+       __le32  bg_block_bitmap;        /* Blocks bitmap block */
+       __le32  bg_inode_bitmap;        /* Inodes bitmap block */
        __le32  bg_inode_table;         /* Inodes table block */
        __le16  bg_free_blocks_count;   /* Free blocks count */
        __le16  bg_free_inodes_count;   /* Free inodes count */
        __le16  bg_used_dirs_count;     /* Directories count */
-       __u16   bg_flags;
-       __u32   bg_reserved[3];
+       __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
+       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
+       __le16  bg_itable_unused;       /* Unused inodes count */
+       __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
        __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
        __le32  bg_inode_bitmap_hi;     /* Inodes bitmap block MSB */
        __le32  bg_inode_table_hi;      /* Inodes table block MSB */
 };
 
+#define EXT4_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
+
 #ifdef __KERNEL__
 #include <linux/ext4_fs_i.h>
 #include <linux/ext4_fs_sb.h>
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR       0x0004
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM                0x0010
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
 
                                         EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR)