/* IO errors */
        int errors;
+       int mirror_num;
 
        /* for reads, this is the bio we are copying the data into */
        struct bio *orig_bio;
+
+       /*
+        * the start of a variable length array of checksums only
+        * used by reads
+        */
+       u32 sums;
 };
 
+static inline int compressed_bio_size(struct btrfs_root *root,
+                                     unsigned long disk_size)
+{
+       u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+       return sizeof(struct compressed_bio) +
+               ((disk_size + root->sectorsize - 1) / root->sectorsize) *
+               csum_size;
+}
+
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
                                        u64 first_byte, gfp_t gfp_flags)
 {
        return bio;
 }
 
+static int check_compressed_csum(struct inode *inode,
+                                struct compressed_bio *cb,
+                                u64 disk_start)
+{
+       int ret;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct page *page;
+       unsigned long i;
+       char *kaddr;
+       u32 csum;
+       u32 *cb_sum = &cb->sums;
+
+       if (btrfs_test_opt(root, NODATASUM) ||
+           btrfs_test_flag(inode, NODATASUM))
+               return 0;
+
+       for (i = 0; i < cb->nr_pages; i++) {
+               page = cb->compressed_pages[i];
+               csum = ~(u32)0;
+
+               kaddr = kmap_atomic(page, KM_USER0);
+               csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
+               btrfs_csum_final(csum, (char *)&csum);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               if (csum != *cb_sum) {
+                       printk("btrfs csum failed ino %lu extent %llu csum %u "
+                              "wanted %u mirror %d\n", inode->i_ino,
+                              (unsigned long long)disk_start,
+                              csum, *cb_sum, cb->mirror_num);
+                       ret = -EIO;
+                       goto fail;
+               }
+               cb_sum++;
+
+       }
+       ret = 0;
+fail:
+       return ret;
+}
+
 /* when we finish reading compressed pages from the disk, we
  * decompress them and then run the bio end_io routines on the
  * decompressed pages (in the inode address space).
        if (!atomic_dec_and_test(&cb->pending_bios))
                goto out;
 
+       inode = cb->inode;
+       ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
+       if (ret)
+               goto csum_failed;
+
        /* ok, we're the last bio for this extent, lets start
         * the decompression.
         */
-       inode = cb->inode;
        tree = &BTRFS_I(inode)->io_tree;
        ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
                                        cb->start,
                                        cb->orig_bio->bi_io_vec,
                                        cb->orig_bio->bi_vcnt,
                                        cb->compressed_len);
+csum_failed:
        if (ret)
                cb->errors = 1;
 
        /* do io completion on the original bio */
        if (cb->errors) {
                bio_io_error(cb->orig_bio);
-       } else
+       } else {
+               int bio_index = 0;
+               struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
+
+               /*
+                * we have verified the checksum already, set page
+                * checked so the end_io handlers know about it
+                */
+               while(bio_index < cb->orig_bio->bi_vcnt) {
+                       SetPageChecked(bvec->bv_page);
+                       bvec++;
+                       bio_index++;
+               }
                bio_endio(cb->orig_bio, 0);
+       }
 
        /* finally free the cb struct */
        kfree(cb->compressed_pages);
        int ret;
 
        WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
-       cb = kmalloc(sizeof(*cb), GFP_NOFS);
+       cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
        cb->start = start;
        cb->len = len;
+       cb->mirror_num = 0;
        cb->compressed_pages = compressed_pages;
        cb->compressed_len = compressed_len;
        cb->orig_bio = NULL;
 
        bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       ret = btrfs_csum_file_bytes(root, inode, start, len);
-       BUG_ON(ret);
-
        bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
                        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
                        BUG_ON(ret);
 
+                       ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+                       BUG_ON(ret);
+
                        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
                        BUG_ON(ret);
 
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
 
+       ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+       BUG_ON(ret);
+
        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
        BUG_ON(ret);
 
        u64 em_start;
        struct extent_map *em;
        int ret;
+       u32 *sums;
 
        tree = &BTRFS_I(inode)->io_tree;
        em_tree = &BTRFS_I(inode)->extent_tree;
                                   PAGE_CACHE_SIZE);
        spin_unlock(&em_tree->lock);
 
-       cb = kmalloc(sizeof(*cb), GFP_NOFS);
+       compressed_len = em->block_len;
+       cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
+       cb->mirror_num = mirror_num;
+       sums = &cb->sums;
 
        cb->start = em->orig_start;
-       compressed_len = em->block_len;
        em_len = em->len;
        em_start = em->start;
+
        free_extent_map(em);
        em = NULL;
 
 
        add_ra_bio_pages(inode, em_start + em_len, cb);
 
-       if (!btrfs_test_opt(root, NODATASUM) &&
-           !btrfs_test_flag(inode, NODATASUM)) {
-               btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
-       }
-
        /* include any pages we added in add_ra-bio_pages */
        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
        cb->len = uncompressed_len;
        for (page_index = 0; page_index < nr_pages; page_index++) {
                page = cb->compressed_pages[page_index];
                page->mapping = inode->i_mapping;
+               page->index = em_start >> PAGE_CACHE_SHIFT;
+
                if (comp_bio->bi_size)
                        ret = tree->ops->merge_bio_hook(page, 0,
                                                        PAGE_CACHE_SIZE,
                         */
                        atomic_inc(&cb->pending_bios);
 
-                       ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+                       if (!btrfs_test_opt(root, NODATASUM) &&
+                           !btrfs_test_flag(inode, NODATASUM)) {
+                               btrfs_lookup_bio_sums(root, inode, comp_bio,
+                                                     sums);
+                       }
+                       sums += (comp_bio->bi_size + root->sectorsize - 1) /
+                               root->sectorsize;
+
+                       ret = btrfs_map_bio(root, READ, comp_bio,
+                                           mirror_num, 0);
                        BUG_ON(ret);
 
                        bio_put(comp_bio);
        ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
        BUG_ON(ret);
 
-       ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+       if (!btrfs_test_opt(root, NODATASUM) &&
+           !btrfs_test_flag(inode, NODATASUM)) {
+               btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+       }
+
+       ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
        BUG_ON(ret);
 
        bio_put(comp_bio);
 
 /* directory objectid inside the root tree */
 #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
 
+/* holds checksums of all the data extents */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
 /* orhpan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
 #define BTRFS_TREE_RELOC_OBJECTID -8ULL
 #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
 
+/*
+ * extent checksums all have this objectid
+ * this allows them to share the logging tree
+ * for fsyncs
+ */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
        struct btrfs_root *chunk_root;
        struct btrfs_root *dev_root;
        struct btrfs_root *fs_root;
+       struct btrfs_root *csum_root;
 
        /* the log root tree is a directory of all the other log roots */
        struct btrfs_root *log_root_tree;
        struct btrfs_workers workers;
        struct btrfs_workers delalloc_workers;
        struct btrfs_workers endio_workers;
+       struct btrfs_workers endio_meta_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
        /*
  * extent data is for file data
  */
 #define BTRFS_EXTENT_DATA_KEY  108
+
 /*
- * csum items have the checksums for data in the extents
+ * extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
  */
-#define BTRFS_CSUM_ITEM_KEY    120
-
-
-/* reserve 21-31 for other file/dir stuff */
+#define BTRFS_EXTENT_CSUM_KEY  128
 
 /*
  * root items point to tree roots.  There are typically in the root
 
 /* file-item.c */
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
-                         struct bio *bio);
+                         struct bio *bio, u32 *dst);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
                             struct btrfs_path *path, u64 objectid,
                             u64 bytenr, int mod);
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums);
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
-                      struct bio *bio);
+                      struct bio *bio, u64 file_start, int contig);
 int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
                          u64 start, unsigned long len);
 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
-                                         u64 objectid, u64 offset,
-                                         int cow);
+                                         u64 bytenr, int cow);
 int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct btrfs_path *path,
                        u64 isize);
 
        end_io_wq->error = err;
        end_io_wq->work.func = end_workqueue_fn;
        end_io_wq->work.flags = 0;
-       if (bio->bi_rw & (1 << BIO_RW))
+
+       if (bio->bi_rw & (1 << BIO_RW)) {
                btrfs_queue_worker(&fs_info->endio_write_workers,
                                   &end_io_wq->work);
-       else
-               btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+       } else {
+               if (end_io_wq->metadata)
+                       btrfs_queue_worker(&fs_info->endio_meta_workers,
+                                          &end_io_wq->work);
+               else
+                       btrfs_queue_worker(&fs_info->endio_workers,
+                                          &end_io_wq->work);
+       }
 }
 
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
        info = (struct btrfs_fs_info *)bdi->unplug_io_data;
        list_for_each(cur, &info->fs_devices->devices) {
                device = list_entry(cur, struct btrfs_device, dev_list);
+               if (!device->bdev)
+                       continue;
+
                bdi = blk_get_backing_dev_info(device->bdev);
                if (bdi->unplug_io_fn) {
                        bdi->unplug_io_fn(bdi, page);
         * blocksize <= pagesize, it is basically a noop
         */
        if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
-               btrfs_queue_worker(&fs_info->endio_workers,
+               btrfs_queue_worker(&fs_info->endio_meta_workers,
                                   &end_io_wq->work);
                return;
        }
        struct buffer_head *bh;
        struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
                                                 GFP_NOFS);
+       struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
+                                                GFP_NOFS);
        struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
                                               GFP_NOFS);
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
        struct btrfs_super_block *disk_super;
 
        if (!extent_root || !tree_root || !fs_info ||
-           !chunk_root || !dev_root) {
+           !chunk_root || !dev_root || !csum_root) {
                err = -ENOMEM;
                goto fail;
        }
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
        fs_info->extent_root = extent_root;
+       fs_info->csum_root = csum_root;
        fs_info->chunk_root = chunk_root;
        fs_info->dev_root = dev_root;
        fs_info->fs_devices = fs_devices;
        btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
        btrfs_init_workers(&fs_info->endio_workers, "endio",
                           fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
+                          fs_info->thread_pool_size);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size);
 
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
        btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_meta_workers,
+                           fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->endio_write_workers,
                            fs_info->thread_pool_size);
 
        if (ret)
                goto fail_extent_root;
 
+       ret = find_and_setup_root(tree_root, fs_info,
+                                 BTRFS_CSUM_TREE_OBJECTID, csum_root);
+       if (ret)
+               goto fail_extent_root;
+
+       csum_root->track_dirty = 1;
+
        btrfs_read_block_groups(extent_root);
 
        fs_info->generation = generation + 1;
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (!fs_info->cleaner_kthread)
-               goto fail_extent_root;
+               goto fail_csum_root;
 
        fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                   tree_root,
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
+fail_csum_root:
+       free_extent_buffer(csum_root->node);
 fail_extent_root:
        free_extent_buffer(extent_root->node);
 fail_tree_root:
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
        kfree(fs_info);
        kfree(chunk_root);
        kfree(dev_root);
+       kfree(csum_root);
        return ERR_PTR(err);
 }
 
        if (root->fs_info->dev_root->node);
                free_extent_buffer(root->fs_info->dev_root->node);
 
+       if (root->fs_info->csum_root->node);
+               free_extent_buffer(root->fs_info->csum_root->node);
+
        btrfs_free_block_groups(root->fs_info);
 
        del_fs_roots(fs_info);
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 
        kfree(fs_info->tree_root);
        kfree(fs_info->chunk_root);
        kfree(fs_info->dev_root);
+       kfree(fs_info->csum_root);
        return 0;
 }
 
 
        int whole_page;
        int ret;
 
+       if (err)
+               uptodate = 0;
+
        do {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
                        if (ret == 0) {
                                uptodate =
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
+                               if (err)
+                                       uptodate = 0;
                                continue;
                        }
                }
 
 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
-                                         u64 objectid, u64 offset,
-                                         int cow)
+                                         u64 bytenr, int cow)
 {
        int ret;
        struct btrfs_key file_key;
                btrfs_super_csum_size(&root->fs_info->super_copy);
        int csums_in_item;
 
-       file_key.objectid = objectid;
-       file_key.offset = offset;
-       btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+       file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+       file_key.offset = bytenr;
+       btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
        ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
        if (ret < 0)
                goto fail;
                        goto fail;
                path->slots[0]--;
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
-                   found_key.objectid != objectid) {
+               if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
                        goto fail;
-               }
-               csum_offset = (offset - found_key.offset) >>
+
+               csum_offset = (bytenr - found_key.offset) >>
                                root->fs_info->sb->s_blocksize_bits;
                csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
                csums_in_item /= csum_size;
 }
 
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
-                         struct bio *bio)
+                         struct bio *bio, u32 *dst)
 {
        u32 sum;
        struct bio_vec *bvec = bio->bi_io_vec;
        u64 offset;
        u64 item_start_offset = 0;
        u64 item_last_offset = 0;
+       u64 disk_bytenr;
        u32 diff;
        u16 csum_size =
                btrfs_super_csum_size(&root->fs_info->super_copy);
 
        WARN_ON(bio->bi_vcnt <= 0);
 
+       disk_bytenr = (u64)bio->bi_sector << 9;
        while(bio_index < bio->bi_vcnt) {
                offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-               ret = btrfs_find_ordered_sum(inode, offset, &sum);
+               ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
                if (ret == 0)
                        goto found;
 
-               if (!item || offset < item_start_offset ||
-                   offset >= item_last_offset) {
+               if (!item || disk_bytenr < item_start_offset ||
+                   disk_bytenr >= item_last_offset) {
                        struct btrfs_key found_key;
                        u32 item_size;
 
                        if (item)
                                btrfs_release_path(root, path);
-                       item = btrfs_lookup_csum(NULL, root, path,
-                                                inode->i_ino, offset, 0);
+                       item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
+                                                path, disk_bytenr, 0);
                        if (IS_ERR(item)) {
                                ret = PTR_ERR(item);
                                if (ret == -ENOENT || ret == -EFBIG)
                 * this byte range must be able to fit inside
                 * a single leaf so it will also fit inside a u32
                 */
-               diff = offset - item_start_offset;
+               diff = disk_bytenr - item_start_offset;
                diff = diff / root->sectorsize;
                diff = diff * csum_size;
 
                                   ((unsigned long)item) + diff,
                                   csum_size);
 found:
-               set_state_private(io_tree, offset, sum);
+               if (dst)
+                       *dst++ = sum;
+               else
+                       set_state_private(io_tree, offset, sum);
+               disk_bytenr += bvec->bv_len;
                bio_index++;
                bvec++;
        }
        return 0;
 }
 
-int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
-                         u64 start, unsigned long len)
-{
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       struct btrfs_ordered_extent *ordered;
-       char *data;
-       struct page *page;
-       unsigned long total_bytes = 0;
-       unsigned long this_sum_bytes = 0;
-
-       sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
-       if (!sums)
-               return -ENOMEM;
-
-       sector_sum = sums->sums;
-       sums->file_offset = start;
-       sums->len = len;
-       INIT_LIST_HEAD(&sums->list);
-       ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
-       BUG_ON(!ordered);
-
-       while(len > 0) {
-               if (start >= ordered->file_offset + ordered->len ||
-                   start < ordered->file_offset) {
-                       sums->len = this_sum_bytes;
-                       this_sum_bytes = 0;
-                       btrfs_add_ordered_sum(inode, ordered, sums);
-                       btrfs_put_ordered_extent(ordered);
-
-                       sums = kzalloc(btrfs_ordered_sum_size(root, len),
-                                      GFP_NOFS);
-                       BUG_ON(!sums);
-                       sector_sum = sums->sums;
-                       sums->len = len;
-                       sums->file_offset = start;
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                     sums->file_offset);
-                       BUG_ON(!ordered);
-               }
-
-               page = find_get_page(inode->i_mapping,
-                                    start >> PAGE_CACHE_SHIFT);
-
-               data = kmap_atomic(page, KM_USER0);
-               sector_sum->sum = ~(u32)0;
-               sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum,
-                                                 PAGE_CACHE_SIZE);
-               kunmap_atomic(data, KM_USER0);
-               btrfs_csum_final(sector_sum->sum,
-                                (char *)§or_sum->sum);
-               sector_sum->offset = page_offset(page);
-               page_cache_release(page);
-
-               sector_sum++;
-               total_bytes += PAGE_CACHE_SIZE;
-               this_sum_bytes += PAGE_CACHE_SIZE;
-               start += PAGE_CACHE_SIZE;
-
-               WARN_ON(len < PAGE_CACHE_SIZE);
-               len -= PAGE_CACHE_SIZE;
-       }
-       btrfs_add_ordered_sum(inode, ordered, sums);
-       btrfs_put_ordered_extent(ordered);
-       return 0;
-}
-
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
-                      struct bio *bio)
+                      struct bio *bio, u64 file_start, int contig)
 {
        struct btrfs_ordered_sum *sums;
        struct btrfs_sector_sum *sector_sum;
        unsigned long total_bytes = 0;
        unsigned long this_sum_bytes = 0;
        u64 offset;
+       u64 disk_bytenr;
 
        WARN_ON(bio->bi_vcnt <= 0);
        sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
                return -ENOMEM;
 
        sector_sum = sums->sums;
-       sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+       disk_bytenr = (u64)bio->bi_sector << 9;
        sums->len = bio->bi_size;
        INIT_LIST_HEAD(&sums->list);
-       ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
+
+       if (contig)
+               offset = file_start;
+       else
+               offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+       ordered = btrfs_lookup_ordered_extent(inode, offset);
        BUG_ON(!ordered);
+       sums->bytenr = ordered->start;
 
        while(bio_index < bio->bi_vcnt) {
-               offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-               if (offset >= ordered->file_offset + ordered->len ||
-                   offset < ordered->file_offset) {
+               if (!contig)
+                       offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+               if (!contig && (offset >= ordered->file_offset + ordered->len ||
+                   offset < ordered->file_offset)) {
                        unsigned long bytes_left;
                        sums->len = this_sum_bytes;
                        this_sum_bytes = 0;
                        BUG_ON(!sums);
                        sector_sum = sums->sums;
                        sums->len = bytes_left;
-                       sums->file_offset = offset;
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                     sums->file_offset);
+                       ordered = btrfs_lookup_ordered_extent(inode, offset);
                        BUG_ON(!ordered);
+                       sums->bytenr = ordered->start;
                }
 
                data = kmap_atomic(bvec->bv_page, KM_USER0);
                kunmap_atomic(data, KM_USER0);
                btrfs_csum_final(sector_sum->sum,
                                 (char *)§or_sum->sum);
-               sector_sum->offset = page_offset(bvec->bv_page) +
-                       bvec->bv_offset;
+               sector_sum->bytenr = disk_bytenr;
 
                sector_sum++;
                bio_index++;
                total_bytes += bvec->bv_len;
                this_sum_bytes += bvec->bv_len;
+               disk_bytenr += bvec->bv_len;
+               offset += bvec->bv_len;
                bvec++;
        }
        this_sum_bytes = 0;
 }
 
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums)
 {
-       u64 objectid = inode->i_ino;
-       u64 offset;
+       u64 bytenr;
        int ret;
        struct btrfs_key file_key;
        struct btrfs_key found_key;
 again:
        next_offset = (u64)-1;
        found_next = 0;
-       offset = sector_sum->offset;
-       file_key.objectid = objectid;
-       file_key.offset = offset;
-       btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+       file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+       file_key.offset = sector_sum->bytenr;
+       bytenr = sector_sum->bytenr;
+       btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
 
-       mutex_lock(&BTRFS_I(inode)->csum_mutex);
-       item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
+       item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
        if (!IS_ERR(item)) {
                leaf = path->nodes[0];
                ret = 0;
                        slot = 0;
                }
                btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
-               if (found_key.objectid != objectid ||
-                   found_key.type != BTRFS_CSUM_ITEM_KEY) {
+               if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+                   found_key.type != BTRFS_EXTENT_CSUM_KEY) {
                        found_next = 1;
                        goto insert;
                }
        path->slots[0]--;
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-       csum_offset = (offset - found_key.offset) >>
+       csum_offset = (bytenr - found_key.offset) >>
                        root->fs_info->sb->s_blocksize_bits;
-       if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
-           found_key.objectid != objectid ||
+       if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
+           found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
            csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
                goto insert;
        }
        btrfs_release_path(root, path);
        csum_offset = 0;
        if (found_next) {
-               u64 tmp = min((u64)i_size_read(inode), next_offset);
-               tmp -= offset & ~((u64)root->sectorsize -1);
+               u64 tmp = total_bytes + root->sectorsize;
+               u64 next_sector = sector_sum->bytenr;
+               struct btrfs_sector_sum *next = sector_sum + 1;
+
+               while(tmp < sums->len) {
+                       if (next_sector + root->sectorsize != next->bytenr)
+                               break;
+                       tmp += root->sectorsize;
+                       next_sector = next->bytenr;
+                       next++;
+               }
+               tmp = min(tmp, next_offset - file_key.offset);
                tmp >>= root->fs_info->sb->s_blocksize_bits;
                tmp = max((u64)1, tmp);
                tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
        eb_token = NULL;
-       mutex_unlock(&BTRFS_I(inode)->csum_mutex);
        cond_resched();
 next_sector:
 
        if (total_bytes < sums->len) {
                item = (struct btrfs_csum_item *)((char *)item +
                                                  csum_size);
-               if (item < item_end && offset + PAGE_CACHE_SIZE ==
-                   sector_sum->offset) {
-                           offset = sector_sum->offset;
+               if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
+                   sector_sum->bytenr) {
+                       bytenr = sector_sum->bytenr;
                        goto next_sector;
                }
        }
        return ret;
 
 fail_unlock:
-       mutex_unlock(&BTRFS_I(inode)->csum_mutex);
        goto out;
 }
 
 
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
 
-       ret = btrfs_csum_one_bio(root, inode, bio);
+       ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
        BUG_ON(ret);
        return 0;
 }
                btrfs_test_flag(inode, NODATASUM);
 
        if (!(rw & (1 << BIO_RW))) {
-
-               if (bio_flags & EXTENT_BIO_COMPRESSED)
+               if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                                                    mirror_num, bio_flags);
-               else if (!skip_sum)
-                       btrfs_lookup_bio_sums(root, inode, bio);
+               } else if (!skip_sum)
+                       btrfs_lookup_bio_sums(root, inode, bio, NULL);
                goto mapit;
        } else if (!skip_sum) {
                /* we're doing a write, do the async checksumming */
        btrfs_set_trans_block_group(trans, inode);
        list_for_each(cur, list) {
                sum = list_entry(cur, struct btrfs_ordered_sum, list);
-               btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
-                                      inode, sum);
+               btrfs_csum_file_blocks(trans,
+                      BTRFS_I(inode)->root->fs_info->csum_root, sum);
        }
        return 0;
 }
        u64 start;
        u64 len;
        u64 logical;
+       unsigned long bio_flags;
        int last_mirror;
 };
 
        int ret;
        int rw;
        u64 logical;
-       unsigned long bio_flags = 0;
 
        ret = get_state_private(failure_tree, start, &private);
        if (ret) {
                failrec->start = start;
                failrec->len = end - start + 1;
                failrec->last_mirror = 0;
+               failrec->bio_flags = 0;
 
                spin_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, failrec->len);
                }
                logical = start - em->start;
                logical = em->block_start + logical;
-               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
-                       bio_flags = EXTENT_BIO_COMPRESSED;
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+                       logical = em->block_start;
+                       failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+               }
                failrec->logical = logical;
                free_extent_map(em);
                set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
        bio->bi_sector = failrec->logical >> 9;
        bio->bi_bdev = failed_bio->bi_bdev;
        bio->bi_size = 0;
+
        bio_add_page(bio, page, failrec->len, start - page_offset(page));
        if (failed_bio->bi_rw & (1 << BIO_RW))
                rw = WRITE;
 
        BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
                                                      failrec->last_mirror,
-                                                     bio_flags);
+                                                     failrec->bio_flags);
        return 0;
 }
 
        u32 csum = ~(u32)0;
        unsigned long flags;
 
+       if (PageChecked(page)) {
+               ClearPageChecked(page);
+               goto good;
+       }
        if (btrfs_test_opt(root, NODATASUM) ||
            btrfs_test_flag(inode, NODATASUM))
                return 0;
+
        if (state && state->start == start) {
                private = state->private;
                ret = 0;
        }
        kunmap_atomic(kaddr, KM_IRQ0);
        local_irq_restore(flags);
-
+good:
        /* if the io failure tree for this inode is non-empty,
         * check to see if we've recovered from a failed IO
         */
        return err;
 }
 
+#if 0
 /*
  * when truncating bytes in a file, it is possible to avoid reading
  * the leaves that contain only checksum items.  This can be the
        return ret;
 }
 
+#endif
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
 
        btrfs_init_path(path);
 
-       ret = drop_csum_leaves(trans, root, path, inode, new_size);
-       BUG_ON(ret);
-
 search_again:
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0) {
                        }
                        item_end--;
                }
-               if (found_type == BTRFS_CSUM_ITEM_KEY) {
-                       ret = btrfs_csum_truncate(trans, root, path,
-                                                 new_size);
-                       BUG_ON(ret);
-               }
                if (item_end < new_size) {
                        if (found_type == BTRFS_DIR_ITEM_KEY) {
                                found_type = BTRFS_INODE_ITEM_KEY;
                        } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
-                               found_type = BTRFS_CSUM_ITEM_KEY;
+                               found_type = BTRFS_EXTENT_DATA_KEY;
                        } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
                                found_type = BTRFS_XATTR_ITEM_KEY;
                        } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
 
        u64 len = olen;
        u64 bs = root->fs_info->sb->s_blocksize;
        u64 hint_byte;
-       u16 csum_size =
-               btrfs_super_csum_size(&root->fs_info->super_copy);
+
        /*
         * TODO:
         * - split compressed inline extents.  annoying: we need to
                slot = path->slots[0];
 
                btrfs_item_key_to_cpu(leaf, &key, slot);
-               if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
+               if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
                    key.objectid != src->i_ino)
                        break;
 
                        btrfs_mark_buffer_dirty(leaf);
                }
 
-               if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
-                       u32 size;
-                       struct btrfs_key new_key;
-                       u64 coverslen;
-                       int coff, clen;
-
-                       size = btrfs_item_size_nr(leaf, slot);
-                       coverslen = (size / csum_size) <<
-                               root->fs_info->sb->s_blocksize_bits;
-                       printk("csums for %llu~%llu\n",
-                              key.offset, coverslen);
-                       if (key.offset + coverslen < off ||
-                           key.offset >= off+len)
-                               goto next;
-
-                       read_extent_buffer(leaf, buf,
-                                          btrfs_item_ptr_offset(leaf, slot),
-                                          size);
-                       btrfs_release_path(root, path);
-
-                       coff = 0;
-                       if (off > key.offset)
-                               coff = ((off - key.offset) >>
-                                       root->fs_info->sb->s_blocksize_bits) *
-                                       csum_size;
-                       clen = size - coff;
-                       if (key.offset + coverslen > off+len)
-                               clen -= ((key.offset+coverslen-off-len) >>
-                                        root->fs_info->sb->s_blocksize_bits) *
-                                       csum_size;
-                       printk(" will dup %d~%d of %d\n",
-                              coff, clen, size);
-
-                       memcpy(&new_key, &key, sizeof(new_key));
-                       new_key.objectid = inode->i_ino;
-                       new_key.offset = key.offset + destoff - off;
-
-                       ret = btrfs_insert_empty_item(trans, root, path,
-                                                     &new_key, clen);
-                       if (ret)
-                               goto out;
-
-                       leaf = path->nodes[0];
-                       slot = path->slots[0];
-                       write_extent_buffer(leaf, buf + coff,
-                                           btrfs_item_ptr_offset(leaf, slot),
-                                           clen);
-                       btrfs_mark_buffer_dirty(leaf);
-               }
-
        next:
                btrfs_release_path(root, path);
                key.offset++;
 
  * try to find a checksum.  This is used because we allow pages to
  * be reclaimed before their checksum is actually put into the btree
  */
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+                          u32 *sum)
 {
        struct btrfs_ordered_sum *ordered_sum;
        struct btrfs_sector_sum *sector_sums;
        mutex_lock(&tree->mutex);
        list_for_each_prev(cur, &ordered->list) {
                ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
-               if (offset >= ordered_sum->file_offset) {
+               if (disk_bytenr >= ordered_sum->bytenr) {
                        num_sectors = ordered_sum->len / sectorsize;
                        sector_sums = ordered_sum->sums;
                        for (i = 0; i < num_sectors; i++) {
-                               if (sector_sums[i].offset == offset) {
+                               if (sector_sums[i].bytenr == disk_bytenr) {
                                        *sum = sector_sums[i].sum;
                                        ret = 0;
                                        goto out;
 
  * the ordered extent are on disk
  */
 struct btrfs_sector_sum {
-       u64 offset;
+       /* bytenr on disk */
+       u64 bytenr;
        u32 sum;
 };
 
 struct btrfs_ordered_sum {
-       u64 file_offset;
+       /* bytenr is the start of this extent on disk */
+       u64 bytenr;
+
        /*
         * this is the length in bytes covered by the sums array below.
-        * But, the sums array may not be contiguous in the file.
         */
        unsigned long len;
        struct list_head list;
 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
 int btrfs_ordered_update_i_size(struct inode *inode,
                                struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
 int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
                                       pgoff_t start, pgoff_t end);
 int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
 
        unsigned long file_bytes;
        struct btrfs_ordered_sum *sums;
        struct btrfs_sector_sum *sector_sum;
-       struct inode *inode;
        unsigned long ptr;
 
        file_bytes = (item_size / csum_size) * root->sectorsize;
-       inode = read_one_inode(root, key->objectid);
-       if (!inode) {
-               return -EIO;
-       }
-
        sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
        if (!sums) {
-               iput(inode);
                return -ENOMEM;
        }
 
        INIT_LIST_HEAD(&sums->list);
        sums->len = file_bytes;
-       sums->file_offset = key->offset;
+       sums->bytenr = key->offset;
 
        /*
         * copy all the sums into the ordered sum struct
        cur_offset = key->offset;
        ptr = btrfs_item_ptr_offset(eb, slot);
        while(item_size > 0) {
-               sector_sum->offset = cur_offset;
+               sector_sum->bytenr = cur_offset;
                read_extent_buffer(eb, §or_sum->sum, ptr, csum_size);
                sector_sum++;
                item_size -= csum_size;
        }
 
        /* let btrfs_csum_file_blocks add them into the file */
-       ret = btrfs_csum_file_blocks(trans, root, inode, sums);
+       ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
        BUG_ON(ret);
        kfree(sums);
-       iput(inode);
-
        return 0;
 }
 /*
                        ret = replay_one_extent(wc->trans, root, path,
                                                eb, i, &key);
                        BUG_ON(ret);
-               } else if (key.type == BTRFS_CSUM_ITEM_KEY) {
+               } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
                        ret = replay_one_csum(wc->trans, root, path,
                                              eb, i, &key);
                        BUG_ON(ret);
        return 0;
 }
 
+static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
+                                     struct list_head *list,
+                                     struct btrfs_root *root,
+                                     u64 disk_bytenr, u64 len)
+{
+       struct btrfs_ordered_sum *sums;
+       struct btrfs_sector_sum *sector_sum;
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_csum_item *item = NULL;
+       u64 end = disk_bytenr + len;
+       u64 item_start_offset = 0;
+       u64 item_last_offset = 0;
+       u32 diff;
+       u32 sum;
+       u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+
+       sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
+
+       sector_sum = sums->sums;
+       sums->bytenr = disk_bytenr;
+       sums->len = len;
+       list_add_tail(&sums->list, list);
+
+       path = btrfs_alloc_path();
+       while(disk_bytenr < end) {
+               if (!item || disk_bytenr < item_start_offset ||
+                   disk_bytenr >= item_last_offset) {
+                       struct btrfs_key found_key;
+                       u32 item_size;
+
+                       if (item)
+                               btrfs_release_path(root, path);
+                       item = btrfs_lookup_csum(NULL, root, path,
+                                                disk_bytenr, 0);
+                       if (IS_ERR(item)) {
+                               ret = PTR_ERR(item);
+                               if (ret == -ENOENT || ret == -EFBIG)
+                                       ret = 0;
+                               sum = 0;
+                               printk("log no csum found for byte %llu\n",
+                                      (unsigned long long)disk_bytenr);
+                               item = NULL;
+                               btrfs_release_path(root, path);
+                               goto found;
+                       }
+                       btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+                                             path->slots[0]);
+
+                       item_start_offset = found_key.offset;
+                       item_size = btrfs_item_size_nr(path->nodes[0],
+                                                      path->slots[0]);
+                       item_last_offset = item_start_offset +
+                               (item_size / csum_size) *
+                               root->sectorsize;
+                       item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                             struct btrfs_csum_item);
+               }
+               /*
+                * this byte range must be able to fit inside
+                * a single leaf so it will also fit inside a u32
+                */
+               diff = disk_bytenr - item_start_offset;
+               diff = diff / root->sectorsize;
+               diff = diff * csum_size;
+
+               read_extent_buffer(path->nodes[0], &sum,
+                                  ((unsigned long)item) + diff,
+                                  csum_size);
+found:
+               sector_sum->bytenr = disk_bytenr;
+               sector_sum->sum = sum;
+               disk_bytenr += root->sectorsize;
+               sector_sum++;
+       }
+       btrfs_free_path(path);
+       return 0;
+}
+
 static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *log,
                               struct btrfs_path *dst_path,
        u32 *ins_sizes;
        char *ins_data;
        int i;
+       struct list_head ordered_sums;
+
+       INIT_LIST_HEAD(&ordered_sums);
 
        ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
                           nr * sizeof(u32), GFP_NOFS);
                                                                   extent);
                                u64 dl = btrfs_file_extent_disk_num_bytes(src,
                                                                      extent);
+                               u64 cs = btrfs_file_extent_offset(src, extent);
+                               u64 cl = btrfs_file_extent_num_bytes(src,
+                                                                    extent);;
                                /* ds == 0 is a hole */
                                if (ds != 0) {
                                        ret = btrfs_inc_extent_ref(trans, log,
                                                   trans->transid,
                                                   ins_keys[i].objectid);
                                        BUG_ON(ret);
+                                       ret = copy_extent_csums(trans,
+                                               &ordered_sums,
+                                               log->fs_info->csum_root,
+                                               ds + cs, cl);
+                                       BUG_ON(ret);
                                }
                        }
                }
        btrfs_mark_buffer_dirty(dst_path->nodes[0]);
        btrfs_release_path(log, dst_path);
        kfree(ins_data);
+
+       /*
+        * we have to do this after the loop above to avoid changing the
+        * log tree while trying to change the log tree.
+        */
+       while(!list_empty(&ordered_sums)) {
+               struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+                                                  struct btrfs_ordered_sum,
+                                                  list);
+               ret = btrfs_csum_file_blocks(trans, log, sums);
+               BUG_ON(ret);
+               list_del(&sums->list);
+               kfree(sums);
+       }
        return 0;
 }
 
 
        device->work.func = pending_bios_fn;
        fs_devices->num_devices++;
        spin_lock_init(&device->io_lock);
+       INIT_LIST_HEAD(&device->dev_alloc_list);
        memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
        return device;
 }