Extent alloctions are still protected by a large alloc_mutex.
Objectid allocations are covered by a objectid mutex
Other btree operations are protected by a lock on individual btree nodes
Signed-off-by: Chris Mason <chris.mason@oracle.com>
 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
 {
        int i;
+       int skip = p->skip_locking;
+       int keep = p->keep_locks;
+
        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
                if (!p->nodes[i])
                        continue;
                free_extent_buffer(p->nodes[i]);
        }
        memset(p, 0, sizeof(*p));
+       p->skip_locking = skip;
+       p->keep_locks = keep;
 }
 
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
                        u32 nritems;
                        t = path->nodes[i];
                        nritems = btrfs_header_nritems(t);
-                       if (path->slots[i] >= nritems - 1) {
+                       if (nritems < 2 || path->slots[i] >= nritems - 2) {
+if (path->keep_locks) {
+//printk("path %p skip level now %d\n", path, skip_level);
+}
                                skip_level = i + 1;
                                continue;
                        }
                }
                t = path->nodes[i];
                if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+if (path->keep_locks) {
+//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level);
+}
                        btrfs_tree_unlock(t);
                        path->locks[i] = 0;
                }
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len);
        WARN_ON(p->nodes[0] != NULL);
-       // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
        WARN_ON(root == root->fs_info->extent_root &&
                !mutex_is_locked(&root->fs_info->alloc_mutex));
        WARN_ON(root == root->fs_info->chunk_root &&
                        b = read_node_slot(root, b, slot);
                        if (!p->skip_locking)
                                btrfs_tree_lock(b);
-                       unlock_up(p, level, lowest_unlock);
+                       unlock_up(p, level + 1, lowest_unlock);
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
        if (slot >= btrfs_header_nritems(upper) - 1)
                return 1;
 
+       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
        right = read_node_slot(root, upper, slot + 1);
        btrfs_tree_lock(right);
        free_space = btrfs_leaf_free_space(root, right);
                return 1;
        }
 
+       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
        left = read_node_slot(root, path->nodes[1], slot - 1);
        btrfs_tree_lock(left);
        free_space = btrfs_leaf_free_space(root, left);
 
        btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
 
-       path->keep_locks = 1;
        btrfs_release_path(root, path);
+       path->keep_locks = 1;
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        path->keep_locks = 0;
 
        if (ret < 0)
                return ret;
 
-       if (path->slots[0] < nritems - 1) {
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       if (nritems > 0 && path->slots[0] < nritems - 1) {
                goto done;
        }
 
                        reada_for_search(root, path, level, slot, 0);
 
                next = read_node_slot(root, c, slot);
-               if (!path->skip_locking)
+               if (!path->skip_locking) {
+                       if (!btrfs_tree_locked(c)) {
+                               int i;
+                               WARN_ON(1);
+printk("path %p no lock on level %d\n", path, level);
+for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i]));
+}
+                       }
                        btrfs_tree_lock(next);
+               }
                break;
        }
        path->slots[level] = slot;
                if (level == 1 && path->locks[1] && path->reada)
                        reada_for_search(root, path, level, slot, 0);
                next = read_node_slot(root, next, 0);
-               if (!path->skip_locking)
+               if (!path->skip_locking) {
+                       WARN_ON(!btrfs_tree_locked(path->nodes[level]));
                        btrfs_tree_lock(next);
+               }
        }
 done:
        unlock_up(path, 0, 1);
 
        struct backing_dev_info bdi;
        spinlock_t hash_lock;
        struct mutex trans_mutex;
-       struct mutex fs_mutex;
        struct mutex alloc_mutex;
        struct mutex chunk_mutex;
+       struct mutex drop_mutex;
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
        struct completion kobj_unregister;
        int do_barriers;
        int closing;
-       unsigned long throttles;
+       atomic_t throttles;
 
        u64 total_pinned;
        struct list_head dirty_cowonly_roots;
        struct inode *inode;
        struct kobject root_kobj;
        struct completion kobj_unregister;
+       struct mutex objectid_mutex;
        u64 objectid;
        u64 last_trans;
 
 
 
        INIT_LIST_HEAD(&root->dirty_list);
        spin_lock_init(&root->node_lock);
+       mutex_init(&root->objectid_mutex);
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
        memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
        INIT_LIST_HEAD(&fs_info->space_info);
        btrfs_mapping_init(&fs_info->mapping_tree);
        atomic_set(&fs_info->nr_async_submits, 0);
+       atomic_set(&fs_info->throttles, 0);
        fs_info->sb = sb;
        fs_info->max_extent = (u64)-1;
        fs_info->max_inline = 8192 * 1024;
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        mutex_init(&fs_info->trans_mutex);
-       mutex_init(&fs_info->fs_mutex);
+       mutex_init(&fs_info->drop_mutex);
        mutex_init(&fs_info->alloc_mutex);
        mutex_init(&fs_info->chunk_mutex);
 
                goto fail_sb_buffer;
        }
 
-       mutex_lock(&fs_info->fs_mutex);
-
        mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
        fs_info->metadata_alloc_profile = (u64)-1;
        fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
 
-       mutex_unlock(&fs_info->fs_mutex);
        return tree_root;
 
 fail_extent_root:
 fail_tree_root:
        free_extent_buffer(tree_root->node);
 fail_sys_array:
-       mutex_unlock(&fs_info->fs_mutex);
 fail_sb_buffer:
        extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
        btrfs_stop_workers(&fs_info->workers);
        struct btrfs_fs_info *fs_info = root->fs_info;
 
        fs_info->closing = 1;
+       smp_mb();
+
        btrfs_transaction_flush_work(root);
-       mutex_lock(&fs_info->fs_mutex);
        btrfs_defrag_dirty_roots(root->fs_info);
        trans = btrfs_start_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
        BUG_ON(ret);
 
        write_ctree_super(NULL, root);
-       mutex_unlock(&fs_info->fs_mutex);
 
        btrfs_transaction_flush_work(root);
 
        struct backing_dev_info *bdi;
 
        bdi = &root->fs_info->bdi;
-       if (root->fs_info->throttles && bdi_write_congested(bdi)) {
+       if (atomic_read(&root->fs_info->throttles) &&
+           bdi_write_congested(bdi)) {
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
                congestion_wait(WRITE, HZ/20);
 #else
 
                }
 
                /* block accounting for super block */
+               spin_lock_irq(&info->delalloc_lock);
                super_used = btrfs_super_bytes_used(&info->super_copy);
                btrfs_set_super_bytes_used(&info->super_copy,
                                           super_used - num_bytes);
+               spin_unlock_irq(&info->delalloc_lock);
 
                /* block accounting for root item */
                root_used = btrfs_root_used(&root->root_item);
        }
 
        /* block accounting for super block */
+       spin_lock_irq(&info->delalloc_lock);
        super_used = btrfs_super_bytes_used(&info->super_copy);
        btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
+       spin_unlock_irq(&info->delalloc_lock);
 
        /* block accounting for root item */
        root_used = btrfs_root_used(&root->root_item);
                                continue;
                        }
                }
-               mutex_unlock(&root->fs_info->fs_mutex);
+               mutex_unlock(&root->fs_info->alloc_mutex);
                ret = readahead_tree_block(root, bytenr, blocksize,
                                           btrfs_node_ptr_generation(node, i));
                last = bytenr + blocksize;
                cond_resched();
-               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->alloc_mutex);
                if (ret)
                        break;
        }
                        free_extent_buffer(next);
                        reada_walk_down(root, cur, path->slots[*level]);
 
-                       mutex_unlock(&root->fs_info->fs_mutex);
                        mutex_unlock(&root->fs_info->alloc_mutex);
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
-                       mutex_lock(&root->fs_info->fs_mutex);
                        mutex_lock(&root->fs_info->alloc_mutex);
 
                        /* we've dropped the lock, double check */
        int orig_level;
        struct btrfs_root_item *root_item = &root->root_item;
 
+       WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
                    *last_file_root == ref_root)
                        goto out;
 
-               mutex_unlock(&extent_root->fs_info->fs_mutex);
                inode = btrfs_iget_locked(extent_root->fs_info->sb,
                                          ref_objectid, found_root);
                if (inode->i_state & I_NEW) {
                 * the latest version of the tree root
                 */
                if (is_bad_inode(inode)) {
-                       mutex_lock(&extent_root->fs_info->fs_mutex);
                        goto out;
                }
                *last_file_objectid = inode->i_ino;
 
                relocate_inode_pages(inode, ref_offset, extent_key->offset);
                iput(inode);
-               mutex_lock(&extent_root->fs_info->fs_mutex);
        } else {
                struct btrfs_trans_handle *trans;
                struct extent_buffer *eb;
 
                if (progress && need_resched()) {
                        memcpy(&key, &found_key, sizeof(key));
-                       mutex_unlock(&root->fs_info->fs_mutex);
                        cond_resched();
-                       mutex_lock(&root->fs_info->fs_mutex);
                        btrfs_release_path(root, path);
                        btrfs_search_slot(NULL, root, &key, path, 0, 0);
                        progress = 0;
                trans = btrfs_start_transaction(tree_root, 1);
                btrfs_commit_transaction(trans, tree_root);
 
-               mutex_unlock(&root->fs_info->fs_mutex);
                btrfs_clean_old_snapshots(tree_root);
-               mutex_lock(&root->fs_info->fs_mutex);
 
                trans = btrfs_start_transaction(tree_root, 1);
                btrfs_commit_transaction(trans, tree_root);
 
        end_of_last_block = start_pos + num_bytes - 1;
 
        lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
        if (!trans) {
                err = -ENOMEM;
 failed:
        err = btrfs_end_transaction(trans, root);
 out_unlock:
-       mutex_unlock(&root->fs_info->fs_mutex);
        unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
        return err;
 }
                WARN_ON(num_pages > nrptrs);
                memset(pages, 0, sizeof(pages));
 
-               mutex_lock(&root->fs_info->fs_mutex);
                ret = btrfs_check_free_space(root, write_bytes, 0);
-               mutex_unlock(&root->fs_info->fs_mutex);
                if (ret)
                        goto out;
 
         * check the transaction that last modified this inode
         * and see if its already been committed
         */
-       mutex_lock(&root->fs_info->fs_mutex);
        if (!BTRFS_I(inode)->last_trans)
                goto out;
+
        mutex_lock(&root->fs_info->trans_mutex);
        if (BTRFS_I(inode)->last_trans <=
            root->fs_info->last_trans_committed) {
        }
        ret = btrfs_commit_transaction(trans, root);
 out:
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret > 0 ? EIO : ret;
 }
 
 
        struct btrfs_key search_key;
        u64 search_start = dirid;
 
+       mutex_lock(&root->objectid_mutex);
+       if (root->last_inode_alloc) {
+               *objectid = ++root->last_inode_alloc;
+               mutex_unlock(&root->objectid_mutex);
+               return 0;
+       }
        path = btrfs_alloc_path();
        BUG_ON(!path);
        search_start = root->last_inode_alloc;
        btrfs_release_path(root, path);
        btrfs_free_path(path);
        BUG_ON(*objectid < search_start);
+       mutex_unlock(&root->objectid_mutex);
        return 0;
 error:
        btrfs_release_path(root, path);
        btrfs_free_path(path);
+       mutex_unlock(&root->objectid_mutex);
        return ret;
 }
 
 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
                           int for_del)
 {
-       u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
-       u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
+       u64 total;
+       u64 used;
        u64 thresh;
        unsigned long flags;
        int ret = 0;
 
+       spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
+       total = btrfs_super_total_bytes(&root->fs_info->super_copy);
+       used = btrfs_super_bytes_used(&root->fs_info->super_copy);
        if (for_del)
                thresh = total * 90;
        else
 
        do_div(thresh, 100);
 
-       spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
        if (used + root->fs_info->delalloc_bytes + num_required > thresh)
                ret = -ENOSPC;
        spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
        trans = btrfs_start_transaction(root, 1);
        BUG_ON(!trans);
        btrfs_set_trans_block_group(trans, inode);
-       mutex_unlock(&root->fs_info->fs_mutex);
 
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
        btrfs_add_ordered_inode(inode);
        btrfs_update_inode(trans, root, inode);
 out:
-       mutex_lock(&root->fs_info->fs_mutex);
        btrfs_end_transaction(trans, root);
        return ret;
 }
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
-       mutex_lock(&root->fs_info->fs_mutex);
+
        if (btrfs_test_opt(root, NODATACOW) ||
            btrfs_test_flag(inode, NODATACOW))
                ret = run_delalloc_nocow(inode, start, end);
        else
                ret = cow_file_range(inode, start, end);
 
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
        ret = btrfs_csum_one_bio(root, bio, &sums);
        BUG_ON(ret);
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
-       mutex_unlock(&root->fs_info->fs_mutex);
 
        btrfs_set_trans_block_group(trans, inode);
        btrfs_csum_file_blocks(trans, root, inode, bio, sums);
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_end_transaction(trans, root);
        BUG_ON(ret);
-       mutex_unlock(&root->fs_info->fs_mutex);
 
        kfree(sums);
 
            btrfs_test_flag(inode, NODATASUM))
                return 0;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        path = btrfs_alloc_path();
        item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
        if (IS_ERR(item)) {
 out:
        if (path)
                btrfs_free_path(path);
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       mutex_lock(&root->fs_info->fs_mutex);
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
        btrfs_free_path(path);
        inode_item = NULL;
 
-       mutex_unlock(&root->fs_info->fs_mutex);
-
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_mapping->a_ops = &btrfs_aops;
        return;
 
 make_bad:
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
-       mutex_unlock(&root->fs_info->fs_mutex);
        make_bad_inode(inode);
 }
 
        btrfs_set_inode_last_trans(trans, inode);
        ret = 0;
 failed:
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
        return ret;
 }
        unsigned long nr = 0;
 
        root = BTRFS_I(dir)->root;
-       mutex_lock(&root->fs_info->fs_mutex);
 
        ret = btrfs_check_free_space(root, 1, 1);
        if (ret)
 
        btrfs_end_transaction(trans, root);
 fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
        return ret;
                return -ENOTEMPTY;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, 1, 1);
        if (ret)
                goto fail;
        nr = trans->blocks_used;
        ret = btrfs_end_transaction(trans, root);
 fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
 
                ret = btrfs_del_items(trans, root, path, pending_del_slot,
                                      pending_del_nr);
        }
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
        inode->i_sb->s_dirt = 1;
        return ret;
                if (attr->ia_size <= hole_start)
                        goto out;
 
-               mutex_lock(&root->fs_info->fs_mutex);
                err = btrfs_check_free_space(root, 1, 0);
-               mutex_unlock(&root->fs_info->fs_mutex);
                if (err)
                        goto fail;
 
                lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
                hole_size = block_end - hole_start;
 
-               mutex_lock(&root->fs_info->fs_mutex);
                trans = btrfs_start_transaction(root, 1);
                btrfs_set_trans_block_group(trans, inode);
                err = btrfs_drop_extents(trans, root, inode,
                        btrfs_check_file(root, inode);
                }
                btrfs_end_transaction(trans, root);
-               mutex_unlock(&root->fs_info->fs_mutex);
                unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
                if (err)
                        return err;
        }
 
        inode->i_size = 0;
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
 
        btrfs_set_trans_block_group(trans, inode);
        clear_inode(inode);
 
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
        return;
 no_delete_lock:
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
 no_delete:
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       mutex_lock(&root->fs_info->fs_mutex);
 
        *sub_root = btrfs_read_fs_root(root->fs_info, location,
                                        dentry->d_name.name,
        location->offset = 0;
 
        btrfs_free_path(path);
-       mutex_unlock(&root->fs_info->fs_mutex);
        return 0;
 }
 
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_inode_by_name(dir, dentry, &location);
-       mutex_unlock(&root->fs_info->fs_mutex);
 
        if (ret < 0)
                return ERR_PTR(ret);
                filp->f_pos = 1;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        key.objectid = inode->i_ino;
        path = btrfs_alloc_path();
        path->reada = 2;
 nopos:
        ret = 0;
 err:
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
        int ret = 0;
 
        if (wait) {
-               mutex_lock(&root->fs_info->fs_mutex);
                trans = btrfs_start_transaction(root, 1);
                btrfs_set_trans_block_group(trans, inode);
                ret = btrfs_commit_transaction(trans, root);
-               mutex_unlock(&root->fs_info->fs_mutex);
        }
        return ret;
 }
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
        btrfs_update_inode(trans, root, inode);
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
 }
 
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (!new_valid_dev(rdev))
                return -EINVAL;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        err = btrfs_check_free_space(root, 1, 0);
        if (err)
                goto fail;
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
 fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
-
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        unsigned long nr = 0;
        u64 objectid;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        err = btrfs_check_free_space(root, 1, 0);
        if (err)
                goto fail;
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
 fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
-
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
 #else
        inc_nlink(inode);
 #endif
-       mutex_lock(&root->fs_info->fs_mutex);
        err = btrfs_check_free_space(root, 1, 0);
        if (err)
                goto fail;
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
 fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
-
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        u64 objectid = 0;
        unsigned long nr = 1;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        err = btrfs_check_free_space(root, 1, 0);
        if (err)
                goto out_unlock;
        btrfs_end_transaction(trans, root);
 
 out_unlock:
-       mutex_unlock(&root->fs_info->fs_mutex);
        if (drop_on_err)
                iput(inode);
        btrfs_btree_balance_dirty(root, nr);
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       mutex_lock(&root->fs_info->fs_mutex);
 
 again:
        spin_lock(&em_tree->lock);
                if (!err)
                        err = ret;
        }
-       mutex_unlock(&root->fs_info->fs_mutex);
        if (err) {
                free_extent_map(em);
                WARN_ON(1);
        int ret;
        u64 page_start;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
-       mutex_unlock(&root->fs_info->fs_mutex);
        if (ret)
                goto out;
 
 
        btrfs_truncate_page(inode->i_mapping, inode->i_size);
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
 
 
        ret = btrfs_end_transaction(trans, root);
        BUG_ON(ret);
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
 }
        struct inode *new_inode = new_dentry->d_inode;
        struct inode *old_inode = old_dentry->d_inode;
        struct timespec ctime = CURRENT_TIME;
-       struct btrfs_path *path;
        int ret;
 
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
                return -ENOTEMPTY;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, 1, 0);
        if (ret)
                goto out_unlock;
        trans = btrfs_start_transaction(root, 1);
 
        btrfs_set_trans_block_group(trans, new_dir);
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto out_fail;
-       }
 
        old_dentry->d_inode->i_nlink++;
        old_dir->i_ctime = old_dir->i_mtime = ctime;
                goto out_fail;
 
 out_fail:
-       btrfs_free_path(path);
        btrfs_end_transaction(trans, root);
 out_unlock:
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
        if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
                return -ENAMETOOLONG;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        err = btrfs_check_free_space(root, 1, 0);
        if (err)
                goto out_fail;
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
 out_fail:
-       mutex_unlock(&root->fs_info->fs_mutex);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
 
        u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
        unsigned long nr = 1;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, 1, 0);
        if (ret)
                goto fail_commit;
        if (err && !ret)
                ret = err;
 fail_commit:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
        return ret;
        if (!root->ref_cows)
                return -EINVAL;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, 1, 0);
        if (ret)
                goto fail_unlock;
        err = btrfs_commit_transaction(trans, root);
 
 fail_unlock:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_btree_balance_dirty(root, nr);
        btrfs_throttle(root);
        return ret;
        unsigned long i;
        int ret;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, inode->i_size, 0);
-       mutex_unlock(&root->fs_info->fs_mutex);
        if (ret)
                return -ENOSPC;
 
                goto out;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
+       mutex_lock(&root->fs_info->alloc_mutex);
+       mutex_lock(&root->fs_info->chunk_mutex);
        sizestr = vol_args->name;
        devstr = strchr(sizestr, ':');
        if (devstr) {
        }
 
 out_unlock:
-       mutex_unlock(&root->fs_info->fs_mutex);
+       mutex_lock(&root->fs_info->alloc_mutex);
+       mutex_lock(&root->fs_info->chunk_mutex);
 out:
        kfree(vol_args);
        return ret;
        }
 
        root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
-       mutex_lock(&root->fs_info->fs_mutex);
        di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
                            path, root_dirid,
                            vol_args->name, namelen, 0);
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_free_path(path);
 
        if (di && !IS_ERR(di)) {
                goto out;
        }
 
+       mutex_lock(&root->fs_info->drop_mutex);
        if (root == root->fs_info->tree_root)
                ret = create_subvol(root, vol_args->name, namelen);
        else
                ret = create_snapshot(root, vol_args->name, namelen);
+       mutex_unlock(&root->fs_info->drop_mutex);
 out:
        kfree(vol_args);
        return ret;
 
        switch (inode->i_mode & S_IFMT) {
        case S_IFDIR:
-               mutex_lock(&root->fs_info->fs_mutex);
                btrfs_defrag_root(root, 0);
                btrfs_defrag_root(root->fs_info->extent_root, 0);
-               mutex_unlock(&root->fs_info->fs_mutex);
                break;
        case S_IFREG:
                btrfs_defrag_file(file);
                unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 0);
        path = btrfs_alloc_path();
        if (!path) {
        unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
 
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
 
 out_unlock:
        mutex_unlock(&src->i_mutex);
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        if (file->private_data) {
                ret = -EINPROGRESS;
                goto out;
                ret = -ENOMEM;
        /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
 out:
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
        struct btrfs_trans_handle *trans;
        int ret = 0;
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = file->private_data;
        if (!trans) {
                ret = -EINVAL;
        btrfs_end_transaction(trans, root);
        file->private_data = 0;
 out:
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
 
                return 0;
        }
        btrfs_clean_old_snapshots(root);
-       mutex_lock(&root->fs_info->fs_mutex);
        btrfs_defrag_dirty_roots(root->fs_info);
        trans = btrfs_start_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
        sb->s_dirt = 0;
-       mutex_unlock(&root->fs_info->fs_mutex);
        return ret;
 }
 
 
        struct btrfs_trans_handle *trans;
        unsigned long nr;
 
+       smp_mb();
        if (root->defrag_running)
                return 0;
        trans = btrfs_start_transaction(root, 1);
                ret = btrfs_defrag_leaves(trans, root, cacheonly);
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
-               mutex_unlock(&info->fs_mutex);
                btrfs_btree_balance_dirty(info->tree_root, nr);
                cond_resched();
 
-               mutex_lock(&info->fs_mutex);
                trans = btrfs_start_transaction(root, 1);
                if (ret != -EAGAIN)
                        break;
        }
        root->defrag_running = 0;
+       smp_mb();
        radix_tree_tag_clear(&info->fs_roots_radix,
                     (unsigned long)root->root_key.objectid,
                     BTRFS_ROOT_DEFRAG_TAG);
        while(!list_empty(list)) {
                struct btrfs_root *root;
 
-               mutex_lock(&tree_root->fs_info->fs_mutex);
                dirty = list_entry(list->next, struct dirty_root, list);
                list_del_init(&dirty->list);
 
                num_bytes = btrfs_root_used(&dirty->root->root_item);
                root = dirty->latest_root;
-               root->fs_info->throttles++;
+               atomic_inc(&root->fs_info->throttles);
 
+               mutex_lock(&root->fs_info->drop_mutex);
                while(1) {
                        trans = btrfs_start_transaction(tree_root, 1);
                        ret = btrfs_drop_snapshot(trans, dirty->root);
                        nr = trans->blocks_used;
                        ret = btrfs_end_transaction(trans, tree_root);
                        BUG_ON(ret);
-                       mutex_unlock(&tree_root->fs_info->fs_mutex);
+
+                       mutex_unlock(&root->fs_info->drop_mutex);
                        btrfs_btree_balance_dirty(tree_root, nr);
                        cond_resched();
-                       mutex_lock(&tree_root->fs_info->fs_mutex);
+                       mutex_lock(&root->fs_info->drop_mutex);
                }
                BUG_ON(ret);
-               root->fs_info->throttles--;
+               atomic_dec(&root->fs_info->throttles);
 
+               mutex_lock(&root->fs_info->alloc_mutex);
                num_bytes -= btrfs_root_used(&dirty->root->root_item);
                bytes_used = btrfs_root_used(&root->root_item);
                if (num_bytes) {
                        btrfs_set_root_used(&root->root_item,
                                            bytes_used - num_bytes);
                }
+               mutex_unlock(&root->fs_info->alloc_mutex);
+
                ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
                if (ret) {
                        BUG();
                        break;
                }
+               mutex_unlock(&root->fs_info->drop_mutex);
+
                nr = trans->blocks_used;
                ret = btrfs_end_transaction(trans, tree_root);
                BUG_ON(ret);
                free_extent_buffer(dirty->root->node);
                kfree(dirty->root);
                kfree(dirty);
-               mutex_unlock(&tree_root->fs_info->fs_mutex);
 
                btrfs_btree_balance_dirty(tree_root, nr);
                cond_resched();
        u64 objectid = 0;
        int ret;
 
-       root->fs_info->throttles++;
+       atomic_inc(&root->fs_info->throttles);
        while(1) {
                ret = btrfs_find_first_ordered_inode(
                                &cur_trans->ordered_inode_tree,
                        break;
 
                mutex_unlock(&root->fs_info->trans_mutex);
-               mutex_unlock(&root->fs_info->fs_mutex);
 
                if (S_ISREG(inode->i_mode)) {
                        atomic_inc(&BTRFS_I(inode)->ordered_writeback);
                }
                iput(inode);
 
-               mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
        }
        while(1) {
                if (!ret)
                        break;
                mutex_unlock(&root->fs_info->trans_mutex);
-               mutex_unlock(&root->fs_info->fs_mutex);
 
                if (S_ISREG(inode->i_mode)) {
                        atomic_inc(&BTRFS_I(inode)->ordered_writeback);
                atomic_dec(&inode->i_count);
                iput(inode);
 
-               mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
        }
-       root->fs_info->throttles--;
+       atomic_dec(&root->fs_info->throttles);
        return 0;
 }
 
                mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
 
-               mutex_unlock(&root->fs_info->fs_mutex);
                ret = wait_for_commit(root, cur_trans);
                BUG_ON(ret);
 
                put_transaction(cur_trans);
                mutex_unlock(&root->fs_info->trans_mutex);
 
-               mutex_lock(&root->fs_info->fs_mutex);
                return 0;
        }
 
                                        struct btrfs_transaction, list);
                if (!prev_trans->commit_done) {
                        prev_trans->use_count++;
-                       mutex_unlock(&root->fs_info->fs_mutex);
                        mutex_unlock(&root->fs_info->trans_mutex);
 
                        wait_for_commit(root, prev_trans);
 
-                       mutex_lock(&root->fs_info->fs_mutex);
                        mutex_lock(&root->fs_info->trans_mutex);
                        put_transaction(prev_trans);
                }
                else
                        timeout = 1;
 
-               mutex_unlock(&root->fs_info->fs_mutex);
                mutex_unlock(&root->fs_info->trans_mutex);
 
                schedule_timeout(timeout);
 
-               mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
                ret = btrfs_write_ordered_inodes(trans, root);
        btrfs_copy_pinned(root, pinned_copy);
 
        mutex_unlock(&root->fs_info->trans_mutex);
-       mutex_unlock(&root->fs_info->fs_mutex);
        ret = btrfs_write_and_wait_transaction(trans, root);
        BUG_ON(ret);
        write_ctree_super(trans, root);
 
-       mutex_lock(&root->fs_info->fs_mutex);
        btrfs_finish_extent_commit(trans, root, pinned_copy);
        mutex_lock(&root->fs_info->trans_mutex);
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
        if (root->fs_info->closing) {
-               mutex_unlock(&root->fs_info->fs_mutex);
                drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
-               mutex_lock(&root->fs_info->fs_mutex);
        }
        return ret;
 }
        unsigned long delay = HZ * 30;
        int ret;
 
-       mutex_lock(&root->fs_info->fs_mutex);
+       smp_mb();
        if (root->fs_info->closing)
                goto out;
 
        trans = btrfs_start_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
 out:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_clean_old_snapshots(root);
        btrfs_transaction_queue_work(root, delay);
 }
 
        u64 devid;
        int ret = 0;
 
-       mutex_lock(&root->fs_info->fs_mutex);
+       mutex_lock(&root->fs_info->alloc_mutex);
+       mutex_lock(&root->fs_info->chunk_mutex);
        mutex_lock(&uuid_mutex);
 
        all_avail = root->fs_info->avail_data_alloc_bits |
                close_bdev_excl(bdev);
 out:
        mutex_unlock(&uuid_mutex);
-       mutex_unlock(&root->fs_info->fs_mutex);
+       mutex_unlock(&root->fs_info->chunk_mutex);
+       mutex_unlock(&root->fs_info->alloc_mutex);
        return ret;
 }
 
        if (!bdev) {
                return -EIO;
        }
-       mutex_lock(&root->fs_info->fs_mutex);
+
+       mutex_lock(&root->fs_info->alloc_mutex);
+       mutex_lock(&root->fs_info->chunk_mutex);
+
        trans = btrfs_start_transaction(root, 1);
        devices = &root->fs_info->fs_devices->devices;
        list_for_each(cur, devices) {
        root->fs_info->fs_devices->open_devices++;
 out:
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
+       mutex_unlock(&root->fs_info->chunk_mutex);
+       mutex_unlock(&root->fs_info->alloc_mutex);
+
        return ret;
 
 out_close_bdev:
        struct btrfs_key found_key;
 
 
+       BUG(); /* FIXME, needs locking */
+
        dev_root = dev_root->fs_info->dev_root;
 
-       mutex_lock(&dev_root->fs_info->fs_mutex);
        /* step one make some room on all the devices */
        list_for_each(cur, devices) {
                device = list_entry(cur, struct btrfs_device, dev_list);
        ret = 0;
 error:
        btrfs_free_path(path);
-       mutex_unlock(&dev_root->fs_info->fs_mutex);
        return ret;
 }
 
 
                return -ENOMEM;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        /* lookup the xattr by name */
        di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
                                strlen(name), 0);
        ret = btrfs_dir_data_len(leaf, di);
 
 out:
-       mutex_unlock(&root->fs_info->fs_mutex);
        kfree(name);
        btrfs_free_path(path);
        return ret;
                return -ENOMEM;
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
 
        }
 
        btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
        kfree(name);
        btrfs_free_path(path);
 
                return -ENOMEM;
        path->reada = 2;
 
-       mutex_lock(&root->fs_info->fs_mutex);
-
        /* search for our xattrs */
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
        ret = total_size;
 
 err:
-       mutex_unlock(&root->fs_info->fs_mutex);
        btrfs_free_path(path);
 
        return ret;
 }
 
 /*
- * delete all the xattrs associated with the inode.  fs_mutex should be
- * held when we come into here
+ * delete all the xattrs associated with the inode.
  */
 int btrfs_delete_xattrs(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct inode *inode)