struct btrfs_workers delalloc_workers;
        struct btrfs_workers endio_workers;
        struct btrfs_workers endio_meta_workers;
+       struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
        /*
 
        end_io_wq->work.flags = 0;
 
        if (bio->bi_rw & (1 << BIO_RW)) {
-               btrfs_queue_worker(&fs_info->endio_write_workers,
-                                  &end_io_wq->work);
+               if (end_io_wq->metadata)
+                       btrfs_queue_worker(&fs_info->endio_meta_write_workers,
+                                          &end_io_wq->work);
+               else
+                       btrfs_queue_worker(&fs_info->endio_write_workers,
+                                          &end_io_wq->work);
        } else {
                if (end_io_wq->metadata)
                        btrfs_queue_worker(&fs_info->endio_meta_workers,
 static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags)
 {
-       /*
-        * kthread helpers are used to submit writes so that checksumming
-        * can happen in parallel across all CPUs
-        */
+       int ret;
+
+       ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
+                                         bio, 1);
+       BUG_ON(ret);
+
        if (!(rw & (1 << BIO_RW))) {
-               int ret;
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
                 */
-               ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
-                                         bio, 1);
-               BUG_ON(ret);
-
                return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
                                     mirror_num, 0);
        }
+       /*
+        * kthread helpers are used to submit writes so that checksumming
+        * can happen in parallel across all CPUs
+        */
        return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num, 0,
                                   __btree_submit_bio_start,
        bio = end_io_wq->bio;
        fs_info = end_io_wq->info;
 
-       /* metadata bios are special because the whole tree block must
+       /* metadata bio reads are special because the whole tree block must
         * be checksummed at once.  This makes sure the entire block is in
         * ram and up to date before trying to verify things.  For
         * blocksize <= pagesize, it is basically a noop
         */
-       if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
+       if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+           !bio_ready_for_csum(bio)) {
                btrfs_queue_worker(&fs_info->endio_meta_workers,
                                   &end_io_wq->work);
                return;
                           fs_info->thread_pool_size);
        btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
                           fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->endio_meta_write_workers,
+                          "endio-meta-write", fs_info->thread_pool_size);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size);
 
         */
        fs_info->endio_workers.idle_thresh = 4;
        fs_info->endio_write_workers.idle_thresh = 64;
+       fs_info->endio_meta_write_workers.idle_thresh = 64;
 
        btrfs_start_workers(&fs_info->workers, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->endio_meta_workers,
                            fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_meta_write_workers,
+                           fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->endio_write_workers,
                            fs_info->thread_pool_size);
 
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
        btrfs_stop_workers(&fs_info->endio_meta_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
        btrfs_stop_workers(&fs_info->endio_meta_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 
 
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
-       unsigned long flags;
        int err;
        int set = 0;
 
                        return -ENOMEM;
        }
 
-       spin_lock_irqsave(&tree->lock, flags);
+       spin_lock(&tree->lock);
        /*
         * this search will find the extents that end after
         * our range starts
        goto search_again;
 
 out:
-       spin_unlock_irqrestore(&tree->lock, flags);
+       spin_unlock(&tree->lock);
        if (prealloc)
                free_extent_state(prealloc);
 
 search_again:
        if (start > end)
                goto out;
-       spin_unlock_irqrestore(&tree->lock, flags);
+       spin_unlock(&tree->lock);
        if (mask & __GFP_WAIT)
                cond_resched();
        goto again;
 {
        DEFINE_WAIT(wait);
        prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        schedule();
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
        finish_wait(&state->wq, &wait);
        return 0;
 }
        struct extent_state *state;
        struct rb_node *node;
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
 again:
        while (1) {
                /*
                        break;
 
                if (need_resched()) {
-                       spin_unlock_irq(&tree->lock);
+                       spin_unlock(&tree->lock);
                        cond_resched();
-                       spin_lock_irq(&tree->lock);
+                       spin_lock(&tree->lock);
                }
        }
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return 0;
 }
 EXPORT_SYMBOL(wait_extent_bit);
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
-       unsigned long flags;
        int err = 0;
        int set;
        u64 last_start;
                        return -ENOMEM;
        }
 
-       spin_lock_irqsave(&tree->lock, flags);
+       spin_lock(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
        goto search_again;
 
 out:
-       spin_unlock_irqrestore(&tree->lock, flags);
+       spin_unlock(&tree->lock);
        if (prealloc)
                free_extent_state(prealloc);
 
 search_again:
        if (start > end)
                goto out;
-       spin_unlock_irqrestore(&tree->lock, flags);
+       spin_unlock(&tree->lock);
        if (mask & __GFP_WAIT)
                cond_resched();
        goto again;
        struct extent_state *state;
        int ret = 1;
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
                        break;
        }
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return ret;
 }
 EXPORT_SYMBOL(find_first_extent_bit);
        u64 found = 0;
        u64 total_bytes = 0;
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
 
        /*
         * this search will find all the extents that end after
                        break;
        }
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return found;
 }
 
                return 0;
        }
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
        if (cur_start == 0 && bits == EXTENT_DIRTY) {
                total_bytes = tree->dirty_bytes;
                goto out;
                        break;
        }
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return total_bytes;
 }
 
        struct extent_state *state;
        int ret = 0;
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
        }
        state->private = private;
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return ret;
 }
 
        struct extent_state *state;
        int ret = 0;
 
-       spin_lock_irq(&tree->lock);
+       spin_lock(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
        }
        *private = state->private;
 out:
-       spin_unlock_irq(&tree->lock);
+       spin_unlock(&tree->lock);
        return ret;
 }
 
        struct extent_state *state = NULL;
        struct rb_node *node;
        int bitset = 0;
-       unsigned long flags;
 
-       spin_lock_irqsave(&tree->lock, flags);
+       spin_lock(&tree->lock);
        node = tree_search(tree, start);
        while (node && start <= end) {
                state = rb_entry(node, struct extent_state, rb_node);
                        break;
                }
        }
-       spin_unlock_irqrestore(&tree->lock, flags);
+       spin_unlock(&tree->lock);
        return bitset;
 }
 EXPORT_SYMBOL(test_range_bit);
 
 }
 
 /*
- * extent_io.c submission hook. This does the right thing for csum calculation on write,
- * or reading the csums from the tree before a read
+ * extent_io.c submission hook. This does the right thing for csum calculation
+ * on write, or reading the csums from the tree before a read
  */
 static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                          int mirror_num, unsigned long bio_flags)
        int ret = 0;
        int skip_sum;
 
+       skip_sum = btrfs_test_flag(inode, NODATASUM);
+
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
 
-       skip_sum = btrfs_test_flag(inode, NODATASUM);
-
        if (!(rw & (1 << BIO_RW))) {
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                              failrec->logical, failrec->len);
        failrec->last_mirror++;
        if (!state) {
-               spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
+               spin_lock(&BTRFS_I(inode)->io_tree.lock);
                state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
                                                    failrec->start,
                                                    EXTENT_LOCKED);
                if (state && state->start != failrec->start)
                        state = NULL;
-               spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
+               spin_unlock(&BTRFS_I(inode)->io_tree.lock);
        }
        if (!state || failrec->last_mirror > num_copies) {
                set_state_private(failure_tree, failrec->start, 0);