GFP_NOFS);
        int ret;
        int err = -EINVAL;
+
        struct btrfs_super_block *disk_super;
 
        if (!extent_root || !tree_root || !fs_info) {
        fs_info->btree_inode = new_inode(sb);
        fs_info->btree_inode->i_ino = 1;
        fs_info->btree_inode->i_nlink = 1;
+       fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);
 
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
        mutex_init(&fs_info->trans_mutex);
        mutex_init(&fs_info->fs_mutex);
 
-       /* we need to start all the end_io workers up front because the
-        * queue work function gets called at interrupt time.  The endio
-        * workers don't normally start IO, so some number of them <= the
-        * number of cpus is fine.  They handle checksumming after a read.
-        *
-        * The other worker threads do start IO, so the max is larger than
-        * the number of CPUs.  FIXME, tune this for huge machines
-        */
-       btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2);
-       btrfs_init_workers(&fs_info->endio_workers, num_online_cpus());
-       btrfs_start_workers(&fs_info->workers, 1);
-       btrfs_start_workers(&fs_info->endio_workers, num_online_cpus());
-
 #if 0
        ret = add_hasher(fs_info, "crc32c");
        if (ret) {
        if (err)
                goto fail_sb_buffer;
 
+       /*
+        * we need to start all the end_io workers up front because the
+        * queue work function gets called at interrupt time, and so it
+        * cannot dynamically grow.
+        */
+       btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->workers, 1);
+       btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+
+
        err = -EINVAL;
        if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
                printk("Btrfs: wanted %llu devices, but found %llu\n",
        mutex_unlock(&fs_info->fs_mutex);
 fail_sb_buffer:
        extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
-fail_iput:
-       iput(fs_info->btree_inode);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+fail_iput:
+       iput(fs_info->btree_inode);
 fail:
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
 enum {
        Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
        Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
-       Opt_ssd, Opt_err,
+       Opt_ssd, Opt_thread_pool, Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_max_extent, "max_extent=%s"},
        {Opt_max_inline, "max_inline=%s"},
        {Opt_alloc_start, "alloc_start=%s"},
+       {Opt_thread_pool, "thread_pool=%d"},
        {Opt_ssd, "ssd"},
        {Opt_err, NULL}
 };
        struct btrfs_fs_info *info = root->fs_info;
        substring_t args[MAX_OPT_ARGS];
        char *p, *num;
+       int intarg;
 
        if (!options)
                return 0;
                        printk(KERN_INFO "btrfs: turning off barriers\n");
                        btrfs_set_opt(info->mount_opt, NOBARRIER);
                        break;
+               case Opt_thread_pool:
+                       intarg = 0;
+                       match_int(&args[0], &intarg);
+                       if (intarg) {
+                               info->thread_pool_size = intarg;
+                               printk(KERN_INFO "btrfs: thread pool %d\n",
+                                      info->thread_pool_size);
+                       }
+                       break;
                case Opt_max_extent:
                        num = match_strdup(&args[0]);
                        if (num) {