]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/super.c
Btrfs: d_type optimization
[linux-2.6-omap-h63xx.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20 #include "print-tree.h"
21
22 struct btrfs_iget_args {
23         u64 ino;
24         struct btrfs_root *root;
25 };
26
27 #define BTRFS_SUPER_MAGIC 0x9123682E
28
29 static struct inode_operations btrfs_dir_inode_operations;
30 static struct inode_operations btrfs_symlink_inode_operations;
31 static struct inode_operations btrfs_dir_ro_inode_operations;
32 static struct super_operations btrfs_super_ops;
33 static struct file_operations btrfs_dir_file_operations;
34 static struct inode_operations btrfs_file_inode_operations;
35 static struct address_space_operations btrfs_aops;
36 static struct address_space_operations btrfs_symlink_aops;
37 static struct file_operations btrfs_file_operations;
38
39 static int drop_extents(struct btrfs_trans_handle *trans,
40                           struct btrfs_root *root,
41                           struct inode *inode,
42                           u64 start, u64 end, u64 *hint_block);
43 static int btrfs_get_block(struct inode *inode, sector_t iblock,
44                            struct buffer_head *result, int create);
45
46
47 #define S_SHIFT 12
48 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
49         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
50         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
51         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
52         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
53         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
54         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
55         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
56 };
57
58 static void btrfs_read_locked_inode(struct inode *inode)
59 {
60         struct btrfs_path *path;
61         struct btrfs_inode_item *inode_item;
62         struct btrfs_root *root = BTRFS_I(inode)->root;
63         struct btrfs_key location;
64         struct btrfs_block_group_cache *alloc_group;
65         u64 alloc_group_block;
66         int ret;
67
68         path = btrfs_alloc_path();
69         BUG_ON(!path);
70         btrfs_init_path(path);
71         mutex_lock(&root->fs_info->fs_mutex);
72
73         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
74         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
75         if (ret) {
76                 btrfs_free_path(path);
77                 goto make_bad;
78         }
79         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
80                                   path->slots[0],
81                                   struct btrfs_inode_item);
82
83         inode->i_mode = btrfs_inode_mode(inode_item);
84         inode->i_nlink = btrfs_inode_nlink(inode_item);
85         inode->i_uid = btrfs_inode_uid(inode_item);
86         inode->i_gid = btrfs_inode_gid(inode_item);
87         inode->i_size = btrfs_inode_size(inode_item);
88         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
89         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
90         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
91         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
92         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
93         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
94         inode->i_blocks = btrfs_inode_nblocks(inode_item);
95         inode->i_generation = btrfs_inode_generation(inode_item);
96         alloc_group_block = btrfs_inode_block_group(inode_item);
97         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
98                                      (void **)&alloc_group,
99                                      alloc_group_block, 1);
100         BUG_ON(!ret);
101         BTRFS_I(inode)->block_group = alloc_group;
102
103         btrfs_free_path(path);
104         inode_item = NULL;
105
106         mutex_unlock(&root->fs_info->fs_mutex);
107
108         switch (inode->i_mode & S_IFMT) {
109 #if 0
110         default:
111                 init_special_inode(inode, inode->i_mode,
112                                    btrfs_inode_rdev(inode_item));
113                 break;
114 #endif
115         case S_IFREG:
116                 inode->i_mapping->a_ops = &btrfs_aops;
117                 inode->i_fop = &btrfs_file_operations;
118                 inode->i_op = &btrfs_file_inode_operations;
119                 break;
120         case S_IFDIR:
121                 inode->i_fop = &btrfs_dir_file_operations;
122                 if (root == root->fs_info->tree_root)
123                         inode->i_op = &btrfs_dir_ro_inode_operations;
124                 else
125                         inode->i_op = &btrfs_dir_inode_operations;
126                 break;
127         case S_IFLNK:
128                 inode->i_op = &btrfs_symlink_inode_operations;
129                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
130                 break;
131         }
132         return;
133
134 make_bad:
135         btrfs_release_path(root, path);
136         btrfs_free_path(path);
137         mutex_unlock(&root->fs_info->fs_mutex);
138         make_bad_inode(inode);
139 }
140
141 static void fill_inode_item(struct btrfs_inode_item *item,
142                             struct inode *inode)
143 {
144         btrfs_set_inode_uid(item, inode->i_uid);
145         btrfs_set_inode_gid(item, inode->i_gid);
146         btrfs_set_inode_size(item, inode->i_size);
147         btrfs_set_inode_mode(item, inode->i_mode);
148         btrfs_set_inode_nlink(item, inode->i_nlink);
149         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
150         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
151         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
152         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
153         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
154         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
155         btrfs_set_inode_nblocks(item, inode->i_blocks);
156         btrfs_set_inode_generation(item, inode->i_generation);
157         btrfs_set_inode_block_group(item,
158                                     BTRFS_I(inode)->block_group->key.objectid);
159 }
160
161 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
162                               struct btrfs_root *root,
163                               struct inode *inode)
164 {
165         struct btrfs_inode_item *inode_item;
166         struct btrfs_path *path;
167         int ret;
168
169         path = btrfs_alloc_path();
170         BUG_ON(!path);
171         btrfs_init_path(path);
172         ret = btrfs_lookup_inode(trans, root, path,
173                                  &BTRFS_I(inode)->location, 1);
174         if (ret) {
175                 if (ret > 0)
176                         ret = -ENOENT;
177                 goto failed;
178         }
179
180         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
181                                   path->slots[0],
182                                   struct btrfs_inode_item);
183
184         fill_inode_item(inode_item, inode);
185         btrfs_mark_buffer_dirty(path->nodes[0]);
186         ret = 0;
187 failed:
188         btrfs_release_path(root, path);
189         btrfs_free_path(path);
190         return ret;
191 }
192
193
194 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
195                               struct btrfs_root *root,
196                               struct inode *dir,
197                               struct dentry *dentry)
198 {
199         struct btrfs_path *path;
200         const char *name = dentry->d_name.name;
201         int name_len = dentry->d_name.len;
202         int ret = 0;
203         u64 objectid;
204         struct btrfs_dir_item *di;
205
206         path = btrfs_alloc_path();
207         BUG_ON(!path);
208         btrfs_init_path(path);
209         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
210                                     name, name_len, -1);
211         if (IS_ERR(di)) {
212                 ret = PTR_ERR(di);
213                 goto err;
214         }
215         if (!di) {
216                 ret = -ENOENT;
217                 goto err;
218         }
219         objectid = btrfs_disk_key_objectid(&di->location);
220         ret = btrfs_delete_one_dir_name(trans, root, path, di);
221         BUG_ON(ret);
222         btrfs_release_path(root, path);
223
224         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
225                                          objectid, name, name_len, -1);
226         if (IS_ERR(di)) {
227                 ret = PTR_ERR(di);
228                 goto err;
229         }
230         if (!di) {
231                 ret = -ENOENT;
232                 goto err;
233         }
234         ret = btrfs_delete_one_dir_name(trans, root, path, di);
235         BUG_ON(ret);
236
237         dentry->d_inode->i_ctime = dir->i_ctime;
238 err:
239         btrfs_free_path(path);
240         if (!ret) {
241                 dir->i_size -= name_len * 2;
242                 btrfs_update_inode(trans, root, dir);
243                 drop_nlink(dentry->d_inode);
244                 btrfs_update_inode(trans, root, dentry->d_inode);
245                 dir->i_sb->s_dirt = 1;
246         }
247         return ret;
248 }
249
250 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
251 {
252         struct btrfs_root *root;
253         struct btrfs_trans_handle *trans;
254         int ret;
255
256         root = BTRFS_I(dir)->root;
257         mutex_lock(&root->fs_info->fs_mutex);
258         trans = btrfs_start_transaction(root, 1);
259         btrfs_set_trans_block_group(trans, dir);
260         ret = btrfs_unlink_trans(trans, root, dir, dentry);
261         btrfs_end_transaction(trans, root);
262         mutex_unlock(&root->fs_info->fs_mutex);
263         btrfs_btree_balance_dirty(root);
264         return ret;
265 }
266
267 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
268 {
269         struct inode *inode = dentry->d_inode;
270         int err;
271         int ret;
272         struct btrfs_root *root = BTRFS_I(dir)->root;
273         struct btrfs_path *path;
274         struct btrfs_key key;
275         struct btrfs_trans_handle *trans;
276         struct btrfs_key found_key;
277         int found_type;
278         struct btrfs_leaf *leaf;
279         char *goodnames = "..";
280
281         path = btrfs_alloc_path();
282         BUG_ON(!path);
283         btrfs_init_path(path);
284         mutex_lock(&root->fs_info->fs_mutex);
285         trans = btrfs_start_transaction(root, 1);
286         btrfs_set_trans_block_group(trans, dir);
287         key.objectid = inode->i_ino;
288         key.offset = (u64)-1;
289         key.flags = (u32)-1;
290         while(1) {
291                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
292                 if (ret < 0) {
293                         err = ret;
294                         goto out;
295                 }
296                 BUG_ON(ret == 0);
297                 if (path->slots[0] == 0) {
298                         err = -ENOENT;
299                         goto out;
300                 }
301                 path->slots[0]--;
302                 leaf = btrfs_buffer_leaf(path->nodes[0]);
303                 btrfs_disk_key_to_cpu(&found_key,
304                                       &leaf->items[path->slots[0]].key);
305                 found_type = btrfs_key_type(&found_key);
306                 if (found_key.objectid != inode->i_ino) {
307                         err = -ENOENT;
308                         goto out;
309                 }
310                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
311                      found_type != BTRFS_DIR_INDEX_KEY) ||
312                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
313                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
314                         err = -ENOTEMPTY;
315                         goto out;
316                 }
317                 ret = btrfs_del_item(trans, root, path);
318                 BUG_ON(ret);
319
320                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
321                         break;
322                 btrfs_release_path(root, path);
323         }
324         ret = 0;
325         btrfs_release_path(root, path);
326
327         /* now the directory is empty */
328         err = btrfs_unlink_trans(trans, root, dir, dentry);
329         if (!err) {
330                 inode->i_size = 0;
331         }
332 out:
333         btrfs_release_path(root, path);
334         btrfs_free_path(path);
335         mutex_unlock(&root->fs_info->fs_mutex);
336         ret = btrfs_end_transaction(trans, root);
337         btrfs_btree_balance_dirty(root);
338         if (ret && !err)
339                 err = ret;
340         return err;
341 }
342
343 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
344                             struct btrfs_root *root,
345                             struct inode *inode)
346 {
347         struct btrfs_path *path;
348         int ret;
349
350         clear_inode(inode);
351
352         path = btrfs_alloc_path();
353         BUG_ON(!path);
354         btrfs_init_path(path);
355         ret = btrfs_lookup_inode(trans, root, path,
356                                  &BTRFS_I(inode)->location, -1);
357         BUG_ON(ret);
358         ret = btrfs_del_item(trans, root, path);
359         BUG_ON(ret);
360         btrfs_free_path(path);
361         return ret;
362 }
363
364 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
365                            u64 objectid)
366 {
367         struct btrfs_node *node;
368         int i;
369         int nritems;
370         u64 item_objectid;
371         u64 blocknr;
372         int slot;
373         int ret;
374
375         if (!path->nodes[1])
376                 return;
377         node = btrfs_buffer_node(path->nodes[1]);
378         slot = path->slots[1];
379         if (slot == 0)
380                 return;
381         nritems = btrfs_header_nritems(&node->header);
382         for (i = slot - 1; i >= 0; i--) {
383                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
384                 if (item_objectid != objectid)
385                         break;
386                 blocknr = btrfs_node_blockptr(node, i);
387                 ret = readahead_tree_block(root, blocknr);
388                 if (ret)
389                         break;
390         }
391 }
392
393 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
394                                    struct btrfs_root *root,
395                                    struct inode *inode)
396 {
397         int ret;
398         struct btrfs_path *path;
399         struct btrfs_key key;
400         struct btrfs_disk_key *found_key;
401         u32 found_type;
402         struct btrfs_leaf *leaf;
403         struct btrfs_file_extent_item *fi;
404         u64 extent_start = 0;
405         u64 extent_num_blocks = 0;
406         u64 item_end = 0;
407         int found_extent;
408         int del_item;
409
410         path = btrfs_alloc_path();
411         BUG_ON(!path);
412         /* FIXME, add redo link to tree so we don't leak on crash */
413         key.objectid = inode->i_ino;
414         key.offset = (u64)-1;
415         key.flags = (u32)-1;
416         while(1) {
417                 btrfs_init_path(path);
418                 fi = NULL;
419                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
420                 if (ret < 0) {
421                         goto error;
422                 }
423                 if (ret > 0) {
424                         BUG_ON(path->slots[0] == 0);
425                         path->slots[0]--;
426                 }
427                 reada_truncate(root, path, inode->i_ino);
428                 leaf = btrfs_buffer_leaf(path->nodes[0]);
429                 found_key = &leaf->items[path->slots[0]].key;
430                 found_type = btrfs_disk_key_type(found_key);
431                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
432                         break;
433                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
434                     found_type != BTRFS_DIR_ITEM_KEY &&
435                     found_type != BTRFS_DIR_INDEX_KEY &&
436                     found_type != BTRFS_EXTENT_DATA_KEY)
437                         break;
438                 item_end = btrfs_disk_key_offset(found_key);
439                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
440                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
441                                             path->slots[0],
442                                             struct btrfs_file_extent_item);
443                         if (btrfs_file_extent_type(fi) !=
444                             BTRFS_FILE_EXTENT_INLINE) {
445                                 item_end += btrfs_file_extent_num_blocks(fi) <<
446                                                 inode->i_blkbits;
447                         }
448                 }
449                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
450                         ret = btrfs_csum_truncate(trans, root, path,
451                                                   inode->i_size);
452                         BUG_ON(ret);
453                 }
454                 if (item_end < inode->i_size) {
455                         if (found_type) {
456                                 btrfs_set_key_type(&key, found_type - 1);
457                                 continue;
458                         }
459                         break;
460                 }
461                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
462                         del_item = 1;
463                 else
464                         del_item = 0;
465                 found_extent = 0;
466
467                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
468                            btrfs_file_extent_type(fi) !=
469                            BTRFS_FILE_EXTENT_INLINE) {
470                         u64 num_dec;
471                         if (!del_item) {
472                                 u64 orig_num_blocks =
473                                         btrfs_file_extent_num_blocks(fi);
474                                 extent_num_blocks = inode->i_size -
475                                         btrfs_disk_key_offset(found_key) +
476                                         root->blocksize - 1;
477                                 extent_num_blocks >>= inode->i_blkbits;
478                                 btrfs_set_file_extent_num_blocks(fi,
479                                                          extent_num_blocks);
480                                 inode->i_blocks -= (orig_num_blocks -
481                                         extent_num_blocks) << 3;
482                                 mark_buffer_dirty(path->nodes[0]);
483                         } else {
484                                 extent_start =
485                                         btrfs_file_extent_disk_blocknr(fi);
486                                 extent_num_blocks =
487                                         btrfs_file_extent_disk_num_blocks(fi);
488                                 /* FIXME blocksize != 4096 */
489                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
490                                 if (extent_start != 0) {
491                                         found_extent = 1;
492                                         inode->i_blocks -= num_dec;
493                                 }
494                         }
495                 }
496                 if (del_item) {
497                         ret = btrfs_del_item(trans, root, path);
498                         BUG_ON(ret);
499                 } else {
500                         break;
501                 }
502                 btrfs_release_path(root, path);
503                 if (found_extent) {
504                         ret = btrfs_free_extent(trans, root, extent_start,
505                                                 extent_num_blocks, 0);
506                         BUG_ON(ret);
507                 }
508         }
509         ret = 0;
510 error:
511         btrfs_release_path(root, path);
512         btrfs_free_path(path);
513         inode->i_sb->s_dirt = 1;
514         return ret;
515 }
516
517 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
518 {
519         struct inode *inode = mapping->host;
520         unsigned blocksize = 1 << inode->i_blkbits;
521         pgoff_t index = from >> PAGE_CACHE_SHIFT;
522         unsigned offset = from & (PAGE_CACHE_SIZE-1);
523         struct page *page;
524         char *kaddr;
525         int ret = 0;
526         struct btrfs_root *root = BTRFS_I(inode)->root;
527         u64 alloc_hint;
528         struct btrfs_key ins;
529         struct btrfs_trans_handle *trans;
530
531         if ((offset & (blocksize - 1)) == 0)
532                 goto out;
533
534         ret = -ENOMEM;
535         page = grab_cache_page(mapping, index);
536         if (!page)
537                 goto out;
538
539         if (!PageUptodate(page)) {
540                 ret = mpage_readpage(page, btrfs_get_block);
541                 lock_page(page);
542                 if (!PageUptodate(page)) {
543                         ret = -EIO;
544                         goto out;
545                 }
546         }
547         mutex_lock(&root->fs_info->fs_mutex);
548         trans = btrfs_start_transaction(root, 1);
549         btrfs_set_trans_block_group(trans, inode);
550
551         ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT,
552                            (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint);
553         BUG_ON(ret);
554         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
555                                  alloc_hint, (u64)-1, &ins, 1);
556         BUG_ON(ret);
557         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
558                                        page->index << PAGE_CACHE_SHIFT,
559                                        ins.objectid, 1, 1);
560         BUG_ON(ret);
561         SetPageChecked(page);
562         kaddr = kmap(page);
563         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
564         flush_dcache_page(page);
565         btrfs_csum_file_block(trans, root, inode->i_ino,
566                               page->index << PAGE_CACHE_SHIFT,
567                               kaddr, PAGE_CACHE_SIZE);
568         kunmap(page);
569         btrfs_end_transaction(trans, root);
570         mutex_unlock(&root->fs_info->fs_mutex);
571
572         set_page_dirty(page);
573         unlock_page(page);
574         page_cache_release(page);
575 out:
576         return ret;
577 }
578
579 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
580 {
581         struct inode *inode = dentry->d_inode;
582         int err;
583
584         err = inode_change_ok(inode, attr);
585         if (err)
586                 return err;
587
588         if (S_ISREG(inode->i_mode) &&
589             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
590                 struct btrfs_trans_handle *trans;
591                 struct btrfs_root *root = BTRFS_I(inode)->root;
592                 u64 mask = root->blocksize - 1;
593                 u64 pos = (inode->i_size + mask) & ~mask;
594                 u64 hole_size;
595
596                 if (attr->ia_size <= pos)
597                         goto out;
598
599                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
600
601                 hole_size = (attr->ia_size - pos + mask) & ~mask;
602                 hole_size >>= inode->i_blkbits;
603
604                 mutex_lock(&root->fs_info->fs_mutex);
605                 trans = btrfs_start_transaction(root, 1);
606                 btrfs_set_trans_block_group(trans, inode);
607                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
608                                                pos, 0, 0, hole_size);
609                 BUG_ON(err);
610                 btrfs_end_transaction(trans, root);
611                 mutex_unlock(&root->fs_info->fs_mutex);
612         }
613 out:
614         err = inode_setattr(inode, attr);
615
616         return err;
617 }
618 static void btrfs_delete_inode(struct inode *inode)
619 {
620         struct btrfs_trans_handle *trans;
621         struct btrfs_root *root = BTRFS_I(inode)->root;
622         int ret;
623
624         truncate_inode_pages(&inode->i_data, 0);
625         if (is_bad_inode(inode)) {
626                 goto no_delete;
627         }
628         inode->i_size = 0;
629         mutex_lock(&root->fs_info->fs_mutex);
630         trans = btrfs_start_transaction(root, 1);
631         btrfs_set_trans_block_group(trans, inode);
632         ret = btrfs_truncate_in_trans(trans, root, inode);
633         BUG_ON(ret);
634         btrfs_free_inode(trans, root, inode);
635         btrfs_end_transaction(trans, root);
636         mutex_unlock(&root->fs_info->fs_mutex);
637         btrfs_btree_balance_dirty(root);
638         return;
639 no_delete:
640         clear_inode(inode);
641 }
642
643 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
644                                struct btrfs_key *location)
645 {
646         const char *name = dentry->d_name.name;
647         int namelen = dentry->d_name.len;
648         struct btrfs_dir_item *di;
649         struct btrfs_path *path;
650         struct btrfs_root *root = BTRFS_I(dir)->root;
651         int ret;
652
653         path = btrfs_alloc_path();
654         BUG_ON(!path);
655         btrfs_init_path(path);
656         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
657                                     namelen, 0);
658         if (!di || IS_ERR(di)) {
659                 location->objectid = 0;
660                 ret = 0;
661                 goto out;
662         }
663         btrfs_disk_key_to_cpu(location, &di->location);
664 out:
665         btrfs_release_path(root, path);
666         btrfs_free_path(path);
667         return ret;
668 }
669
670 static int fixup_tree_root_location(struct btrfs_root *root,
671                              struct btrfs_key *location,
672                              struct btrfs_root **sub_root)
673 {
674         struct btrfs_path *path;
675         struct btrfs_root_item *ri;
676
677         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
678                 return 0;
679         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
680                 return 0;
681
682         path = btrfs_alloc_path();
683         BUG_ON(!path);
684         mutex_lock(&root->fs_info->fs_mutex);
685
686         *sub_root = btrfs_read_fs_root(root->fs_info, location);
687         if (IS_ERR(*sub_root))
688                 return PTR_ERR(*sub_root);
689
690         ri = &(*sub_root)->root_item;
691         location->objectid = btrfs_root_dirid(ri);
692         location->flags = 0;
693         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
694         location->offset = 0;
695
696         btrfs_free_path(path);
697         mutex_unlock(&root->fs_info->fs_mutex);
698         return 0;
699 }
700
701 static int btrfs_init_locked_inode(struct inode *inode, void *p)
702 {
703         struct btrfs_iget_args *args = p;
704         inode->i_ino = args->ino;
705         BTRFS_I(inode)->root = args->root;
706         return 0;
707 }
708
709 static int btrfs_find_actor(struct inode *inode, void *opaque)
710 {
711         struct btrfs_iget_args *args = opaque;
712         return (args->ino == inode->i_ino &&
713                 args->root == BTRFS_I(inode)->root);
714 }
715
716 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
717                                        struct btrfs_root *root)
718 {
719         struct inode *inode;
720         struct btrfs_iget_args args;
721         args.ino = objectid;
722         args.root = root;
723
724         inode = iget5_locked(s, objectid, btrfs_find_actor,
725                              btrfs_init_locked_inode,
726                              (void *)&args);
727         return inode;
728 }
729
730 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
731                                    struct nameidata *nd)
732 {
733         struct inode * inode;
734         struct btrfs_inode *bi = BTRFS_I(dir);
735         struct btrfs_root *root = bi->root;
736         struct btrfs_root *sub_root = root;
737         struct btrfs_key location;
738         int ret;
739
740         if (dentry->d_name.len > BTRFS_NAME_LEN)
741                 return ERR_PTR(-ENAMETOOLONG);
742         mutex_lock(&root->fs_info->fs_mutex);
743         ret = btrfs_inode_by_name(dir, dentry, &location);
744         mutex_unlock(&root->fs_info->fs_mutex);
745         if (ret < 0)
746                 return ERR_PTR(ret);
747         inode = NULL;
748         if (location.objectid) {
749                 ret = fixup_tree_root_location(root, &location, &sub_root);
750                 if (ret < 0)
751                         return ERR_PTR(ret);
752                 if (ret > 0)
753                         return ERR_PTR(-ENOENT);
754                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
755                                           sub_root);
756                 if (!inode)
757                         return ERR_PTR(-EACCES);
758                 if (inode->i_state & I_NEW) {
759                         if (sub_root != root) {
760 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
761                                 igrab(inode);
762                                 sub_root->inode = inode;
763                         }
764                         BTRFS_I(inode)->root = sub_root;
765                         memcpy(&BTRFS_I(inode)->location, &location,
766                                sizeof(location));
767                         btrfs_read_locked_inode(inode);
768                         unlock_new_inode(inode);
769                 }
770         }
771         return d_splice_alias(inode, dentry);
772 }
773
774 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
775                          u64 objectid)
776 {
777         struct btrfs_node *node;
778         int i;
779         u32 nritems;
780         u64 item_objectid;
781         u64 blocknr;
782         int slot;
783         int ret;
784
785         if (!path->nodes[1])
786                 return;
787         node = btrfs_buffer_node(path->nodes[1]);
788         slot = path->slots[1];
789         nritems = btrfs_header_nritems(&node->header);
790         for (i = slot + 1; i < nritems; i++) {
791                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
792                 if (item_objectid != objectid)
793                         break;
794                 blocknr = btrfs_node_blockptr(node, i);
795                 ret = readahead_tree_block(root, blocknr);
796                 if (ret)
797                         break;
798         }
799 }
800 static unsigned char btrfs_filetype_table[] = {
801         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
802 };
803
804 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
805 {
806         struct inode *inode = filp->f_path.dentry->d_inode;
807         struct btrfs_root *root = BTRFS_I(inode)->root;
808         struct btrfs_item *item;
809         struct btrfs_dir_item *di;
810         struct btrfs_key key;
811         struct btrfs_path *path;
812         int ret;
813         u32 nritems;
814         struct btrfs_leaf *leaf;
815         int slot;
816         int advance;
817         unsigned char d_type;
818         int over = 0;
819         u32 di_cur;
820         u32 di_total;
821         u32 di_len;
822         int key_type = BTRFS_DIR_INDEX_KEY;
823
824         /* FIXME, use a real flag for deciding about the key type */
825         if (root->fs_info->tree_root == root)
826                 key_type = BTRFS_DIR_ITEM_KEY;
827         mutex_lock(&root->fs_info->fs_mutex);
828         key.objectid = inode->i_ino;
829         key.flags = 0;
830         btrfs_set_key_type(&key, key_type);
831         key.offset = filp->f_pos;
832         path = btrfs_alloc_path();
833         btrfs_init_path(path);
834         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
835         if (ret < 0)
836                 goto err;
837         advance = 0;
838         reada_leaves(root, path, inode->i_ino);
839         while(1) {
840                 leaf = btrfs_buffer_leaf(path->nodes[0]);
841                 nritems = btrfs_header_nritems(&leaf->header);
842                 slot = path->slots[0];
843                 if (advance || slot >= nritems) {
844                         if (slot >= nritems -1) {
845                                 reada_leaves(root, path, inode->i_ino);
846                                 ret = btrfs_next_leaf(root, path);
847                                 if (ret)
848                                         break;
849                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
850                                 nritems = btrfs_header_nritems(&leaf->header);
851                                 slot = path->slots[0];
852                         } else {
853                                 slot++;
854                                 path->slots[0]++;
855                         }
856                 }
857                 advance = 1;
858                 item = leaf->items + slot;
859                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
860                         break;
861                 if (btrfs_disk_key_type(&item->key) != key_type)
862                         break;
863                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
864                         continue;
865                 filp->f_pos = btrfs_disk_key_offset(&item->key);
866                 advance = 1;
867                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
868                 di_cur = 0;
869                 di_total = btrfs_item_size(leaf->items + slot);
870                 while(di_cur < di_total) {
871                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
872                         over = filldir(dirent, (const char *)(di + 1),
873                                        btrfs_dir_name_len(di),
874                                        btrfs_disk_key_offset(&item->key),
875                                        btrfs_disk_key_objectid(&di->location),
876                                        d_type);
877                         if (over)
878                                 goto nopos;
879                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
880                         di_cur += di_len;
881                         di = (struct btrfs_dir_item *)((char *)di + di_len);
882                 }
883         }
884         filp->f_pos++;
885 nopos:
886         ret = 0;
887 err:
888         btrfs_release_path(root, path);
889         btrfs_free_path(path);
890         mutex_unlock(&root->fs_info->fs_mutex);
891         return ret;
892 }
893
894 static void btrfs_put_super (struct super_block * sb)
895 {
896         struct btrfs_root *root = btrfs_sb(sb);
897         int ret;
898
899         ret = close_ctree(root);
900         if (ret) {
901                 printk("close ctree returns %d\n", ret);
902         }
903         sb->s_fs_info = NULL;
904 }
905
906 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
907 {
908         struct inode * inode;
909         struct dentry * root_dentry;
910         struct btrfs_super_block *disk_super;
911         struct btrfs_root *tree_root;
912         struct btrfs_inode *bi;
913
914         sb->s_maxbytes = MAX_LFS_FILESIZE;
915         sb->s_magic = BTRFS_SUPER_MAGIC;
916         sb->s_op = &btrfs_super_ops;
917         sb->s_time_gran = 1;
918
919         tree_root = open_ctree(sb);
920
921         if (!tree_root) {
922                 printk("btrfs: open_ctree failed\n");
923                 return -EIO;
924         }
925         sb->s_fs_info = tree_root;
926         disk_super = tree_root->fs_info->disk_super;
927         printk("read in super total blocks %Lu root %Lu\n",
928                btrfs_super_total_blocks(disk_super),
929                btrfs_super_root_dir(disk_super));
930
931         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
932                                   tree_root);
933         bi = BTRFS_I(inode);
934         bi->location.objectid = inode->i_ino;
935         bi->location.offset = 0;
936         bi->location.flags = 0;
937         bi->root = tree_root;
938         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
939
940         if (!inode)
941                 return -ENOMEM;
942         if (inode->i_state & I_NEW) {
943                 btrfs_read_locked_inode(inode);
944                 unlock_new_inode(inode);
945         }
946
947         root_dentry = d_alloc_root(inode);
948         if (!root_dentry) {
949                 iput(inode);
950                 return -ENOMEM;
951         }
952         sb->s_root = root_dentry;
953
954         return 0;
955 }
956
957 static int btrfs_write_inode(struct inode *inode, int wait)
958 {
959         struct btrfs_root *root = BTRFS_I(inode)->root;
960         struct btrfs_trans_handle *trans;
961         int ret = 0;
962
963         if (wait) {
964                 mutex_lock(&root->fs_info->fs_mutex);
965                 trans = btrfs_start_transaction(root, 1);
966                 btrfs_set_trans_block_group(trans, inode);
967                 ret = btrfs_commit_transaction(trans, root);
968                 mutex_unlock(&root->fs_info->fs_mutex);
969         }
970         return ret;
971 }
972
973 static void btrfs_dirty_inode(struct inode *inode)
974 {
975         struct btrfs_root *root = BTRFS_I(inode)->root;
976         struct btrfs_trans_handle *trans;
977
978         mutex_lock(&root->fs_info->fs_mutex);
979         trans = btrfs_start_transaction(root, 1);
980         btrfs_set_trans_block_group(trans, inode);
981         btrfs_update_inode(trans, root, inode);
982         btrfs_end_transaction(trans, root);
983         mutex_unlock(&root->fs_info->fs_mutex);
984         btrfs_btree_balance_dirty(root);
985 }
986
987 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
988                                      struct btrfs_root *root,
989                                      u64 objectid,
990                                      struct btrfs_block_group_cache *group,
991                                      int mode)
992 {
993         struct inode *inode;
994         struct btrfs_inode_item inode_item;
995         struct btrfs_key *location;
996         int ret;
997         int owner;
998
999         inode = new_inode(root->fs_info->sb);
1000         if (!inode)
1001                 return ERR_PTR(-ENOMEM);
1002
1003         BTRFS_I(inode)->root = root;
1004         if (mode & S_IFDIR)
1005                 owner = 0;
1006         else
1007                 owner = 1;
1008         group = btrfs_find_block_group(root, group, 0, 0, owner);
1009         BTRFS_I(inode)->block_group = group;
1010
1011         inode->i_uid = current->fsuid;
1012         inode->i_gid = current->fsgid;
1013         inode->i_mode = mode;
1014         inode->i_ino = objectid;
1015         inode->i_blocks = 0;
1016         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1017         fill_inode_item(&inode_item, inode);
1018         location = &BTRFS_I(inode)->location;
1019         location->objectid = objectid;
1020         location->flags = 0;
1021         location->offset = 0;
1022         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1023
1024         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1025         BUG_ON(ret);
1026
1027         insert_inode_hash(inode);
1028         return inode;
1029 }
1030
1031 static inline u8 btrfs_inode_type(struct inode *inode)
1032 {
1033         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1034 }
1035
1036 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1037                             struct dentry *dentry, struct inode *inode)
1038 {
1039         int ret;
1040         struct btrfs_key key;
1041         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1042         key.objectid = inode->i_ino;
1043         key.flags = 0;
1044         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1045         key.offset = 0;
1046
1047         ret = btrfs_insert_dir_item(trans, root,
1048                                     dentry->d_name.name, dentry->d_name.len,
1049                                     dentry->d_parent->d_inode->i_ino,
1050                                     &key, btrfs_inode_type(inode));
1051         if (ret == 0) {
1052                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1053                 ret = btrfs_update_inode(trans, root,
1054                                          dentry->d_parent->d_inode);
1055         }
1056         return ret;
1057 }
1058
1059 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1060                             struct dentry *dentry, struct inode *inode)
1061 {
1062         int err = btrfs_add_link(trans, dentry, inode);
1063         if (!err) {
1064                 d_instantiate(dentry, inode);
1065                 return 0;
1066         }
1067         if (err > 0)
1068                 err = -EEXIST;
1069         return err;
1070 }
1071
1072 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1073                         int mode, struct nameidata *nd)
1074 {
1075         struct btrfs_trans_handle *trans;
1076         struct btrfs_root *root = BTRFS_I(dir)->root;
1077         struct inode *inode;
1078         int err;
1079         int drop_inode = 0;
1080         u64 objectid;
1081
1082         mutex_lock(&root->fs_info->fs_mutex);
1083         trans = btrfs_start_transaction(root, 1);
1084         btrfs_set_trans_block_group(trans, dir);
1085
1086         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1087         if (err) {
1088                 err = -ENOSPC;
1089                 goto out_unlock;
1090         }
1091
1092         inode = btrfs_new_inode(trans, root, objectid,
1093                                 BTRFS_I(dir)->block_group, mode);
1094         err = PTR_ERR(inode);
1095         if (IS_ERR(inode))
1096                 goto out_unlock;
1097
1098         btrfs_set_trans_block_group(trans, inode);
1099         err = btrfs_add_nondir(trans, dentry, inode);
1100         if (err)
1101                 drop_inode = 1;
1102         else {
1103                 inode->i_mapping->a_ops = &btrfs_aops;
1104                 inode->i_fop = &btrfs_file_operations;
1105                 inode->i_op = &btrfs_file_inode_operations;
1106         }
1107         dir->i_sb->s_dirt = 1;
1108         btrfs_update_inode_block_group(trans, inode);
1109         btrfs_update_inode_block_group(trans, dir);
1110 out_unlock:
1111         btrfs_end_transaction(trans, root);
1112         mutex_unlock(&root->fs_info->fs_mutex);
1113
1114         if (drop_inode) {
1115                 inode_dec_link_count(inode);
1116                 iput(inode);
1117         }
1118         btrfs_btree_balance_dirty(root);
1119         return err;
1120 }
1121
1122 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1123                       struct dentry *dentry)
1124 {
1125         struct btrfs_trans_handle *trans;
1126         struct btrfs_root *root = BTRFS_I(dir)->root;
1127         struct inode *inode = old_dentry->d_inode;
1128         int err;
1129         int drop_inode = 0;
1130
1131         if (inode->i_nlink == 0)
1132                 return -ENOENT;
1133
1134         inc_nlink(inode);
1135         mutex_lock(&root->fs_info->fs_mutex);
1136         trans = btrfs_start_transaction(root, 1);
1137         btrfs_set_trans_block_group(trans, dir);
1138         atomic_inc(&inode->i_count);
1139         err = btrfs_add_nondir(trans, dentry, inode);
1140         if (err)
1141                 drop_inode = 1;
1142         dir->i_sb->s_dirt = 1;
1143         btrfs_update_inode_block_group(trans, dir);
1144         btrfs_update_inode(trans, root, inode);
1145
1146         btrfs_end_transaction(trans, root);
1147         mutex_unlock(&root->fs_info->fs_mutex);
1148
1149         if (drop_inode) {
1150                 inode_dec_link_count(inode);
1151                 iput(inode);
1152         }
1153         btrfs_btree_balance_dirty(root);
1154         return err;
1155 }
1156
1157 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1158                                 struct btrfs_root *root,
1159                                 u64 objectid, u64 dirid)
1160 {
1161         int ret;
1162         char buf[2];
1163         struct btrfs_key key;
1164
1165         buf[0] = '.';
1166         buf[1] = '.';
1167
1168         key.objectid = objectid;
1169         key.offset = 0;
1170         key.flags = 0;
1171         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1172
1173         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1174                                     &key, BTRFS_FT_DIR);
1175         if (ret)
1176                 goto error;
1177         key.objectid = dirid;
1178         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1179                                     &key, BTRFS_FT_DIR);
1180         if (ret)
1181                 goto error;
1182 error:
1183         return ret;
1184 }
1185
1186 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1187 {
1188         struct inode *inode;
1189         struct btrfs_trans_handle *trans;
1190         struct btrfs_root *root = BTRFS_I(dir)->root;
1191         int err = 0;
1192         int drop_on_err = 0;
1193         u64 objectid;
1194
1195         mutex_lock(&root->fs_info->fs_mutex);
1196         trans = btrfs_start_transaction(root, 1);
1197         btrfs_set_trans_block_group(trans, dir);
1198         if (IS_ERR(trans)) {
1199                 err = PTR_ERR(trans);
1200                 goto out_unlock;
1201         }
1202
1203         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1204         if (err) {
1205                 err = -ENOSPC;
1206                 goto out_unlock;
1207         }
1208
1209         inode = btrfs_new_inode(trans, root, objectid,
1210                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1211         if (IS_ERR(inode)) {
1212                 err = PTR_ERR(inode);
1213                 goto out_fail;
1214         }
1215         drop_on_err = 1;
1216         inode->i_op = &btrfs_dir_inode_operations;
1217         inode->i_fop = &btrfs_dir_file_operations;
1218         btrfs_set_trans_block_group(trans, inode);
1219
1220         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1221         if (err)
1222                 goto out_fail;
1223
1224         inode->i_size = 6;
1225         err = btrfs_update_inode(trans, root, inode);
1226         if (err)
1227                 goto out_fail;
1228         err = btrfs_add_link(trans, dentry, inode);
1229         if (err)
1230                 goto out_fail;
1231         d_instantiate(dentry, inode);
1232         drop_on_err = 0;
1233         dir->i_sb->s_dirt = 1;
1234         btrfs_update_inode_block_group(trans, inode);
1235         btrfs_update_inode_block_group(trans, dir);
1236
1237 out_fail:
1238         btrfs_end_transaction(trans, root);
1239 out_unlock:
1240         mutex_unlock(&root->fs_info->fs_mutex);
1241         if (drop_on_err)
1242                 iput(inode);
1243         btrfs_btree_balance_dirty(root);
1244         return err;
1245 }
1246
1247 static int btrfs_sync_file(struct file *file,
1248                            struct dentry *dentry, int datasync)
1249 {
1250         struct inode *inode = dentry->d_inode;
1251         struct btrfs_root *root = BTRFS_I(inode)->root;
1252         int ret;
1253         struct btrfs_trans_handle *trans;
1254
1255         mutex_lock(&root->fs_info->fs_mutex);
1256         trans = btrfs_start_transaction(root, 1);
1257         if (!trans) {
1258                 ret = -ENOMEM;
1259                 goto out;
1260         }
1261         ret = btrfs_commit_transaction(trans, root);
1262         mutex_unlock(&root->fs_info->fs_mutex);
1263 out:
1264         return ret > 0 ? EIO : ret;
1265 }
1266
1267 static int btrfs_sync_fs(struct super_block *sb, int wait)
1268 {
1269         struct btrfs_trans_handle *trans;
1270         struct btrfs_root *root;
1271         int ret;
1272         root = btrfs_sb(sb);
1273
1274         sb->s_dirt = 0;
1275         if (!wait) {
1276                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1277                 return 0;
1278         }
1279         mutex_lock(&root->fs_info->fs_mutex);
1280         trans = btrfs_start_transaction(root, 1);
1281         ret = btrfs_commit_transaction(trans, root);
1282         sb->s_dirt = 0;
1283         BUG_ON(ret);
1284 printk("btrfs sync_fs\n");
1285         mutex_unlock(&root->fs_info->fs_mutex);
1286         return 0;
1287 }
1288
1289 #define BTRFS_GET_BLOCK_NO_CREATE 0
1290 #define BTRFS_GET_BLOCK_CREATE 1
1291 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1292
1293 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1294                            struct buffer_head *result, int create)
1295 {
1296         int ret;
1297         int err = 0;
1298         u64 blocknr;
1299         u64 extent_start = 0;
1300         u64 extent_end = 0;
1301         u64 objectid = inode->i_ino;
1302         u32 found_type;
1303         u64 alloc_hint = 0;
1304         struct btrfs_path *path;
1305         struct btrfs_root *root = BTRFS_I(inode)->root;
1306         struct btrfs_file_extent_item *item;
1307         struct btrfs_leaf *leaf;
1308         struct btrfs_disk_key *found_key;
1309         struct btrfs_trans_handle *trans = NULL;
1310
1311         path = btrfs_alloc_path();
1312         BUG_ON(!path);
1313         btrfs_init_path(path);
1314         if (create & BTRFS_GET_BLOCK_CREATE) {
1315                 WARN_ON(1);
1316                 /* this almost but not quite works */
1317                 trans = btrfs_start_transaction(root, 1);
1318                 if (!trans) {
1319                         err = -ENOMEM;
1320                         goto out;
1321                 }
1322                 ret = drop_extents(trans, root, inode,
1323                                    iblock << inode->i_blkbits,
1324                                    (iblock + 1) << inode->i_blkbits,
1325                                    &alloc_hint);
1326                 BUG_ON(ret);
1327         }
1328
1329         ret = btrfs_lookup_file_extent(NULL, root, path,
1330                                        inode->i_ino,
1331                                        iblock << inode->i_blkbits, 0);
1332         if (ret < 0) {
1333                 err = ret;
1334                 goto out;
1335         }
1336
1337         if (ret != 0) {
1338                 if (path->slots[0] == 0) {
1339                         btrfs_release_path(root, path);
1340                         goto not_found;
1341                 }
1342                 path->slots[0]--;
1343         }
1344
1345         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1346                               struct btrfs_file_extent_item);
1347         leaf = btrfs_buffer_leaf(path->nodes[0]);
1348         blocknr = btrfs_file_extent_disk_blocknr(item);
1349         blocknr += btrfs_file_extent_offset(item);
1350
1351         /* are we inside the extent that was found? */
1352         found_key = &leaf->items[path->slots[0]].key;
1353         found_type = btrfs_disk_key_type(found_key);
1354         if (btrfs_disk_key_objectid(found_key) != objectid ||
1355             found_type != BTRFS_EXTENT_DATA_KEY) {
1356                 extent_end = 0;
1357                 extent_start = 0;
1358                 goto not_found;
1359         }
1360         found_type = btrfs_file_extent_type(item);
1361         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1362         if (found_type == BTRFS_FILE_EXTENT_REG) {
1363                 extent_start = extent_start >> inode->i_blkbits;
1364                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1365                 err = 0;
1366                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1367                         goto out;
1368                 if (iblock >= extent_start && iblock < extent_end) {
1369                         btrfs_map_bh_to_logical(root, result, blocknr +
1370                                                 iblock - extent_start);
1371                         goto out;
1372                 }
1373         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1374                 char *ptr;
1375                 char *map;
1376                 u32 size;
1377
1378                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1379                         err = -EINVAL;
1380                         goto out;
1381                 }
1382                 size = btrfs_file_extent_inline_len(leaf->items +
1383                                                     path->slots[0]);
1384                 extent_end = (extent_start + size) >> inode->i_blkbits;
1385                 extent_start >>= inode->i_blkbits;
1386                 if (iblock < extent_start || iblock > extent_end) {
1387                         goto not_found;
1388                 }
1389                 ptr = btrfs_file_extent_inline_start(item);
1390                 map = kmap(result->b_page);
1391                 memcpy(map, ptr, size);
1392                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1393                 flush_dcache_page(result->b_page);
1394                 kunmap(result->b_page);
1395                 set_buffer_uptodate(result);
1396                 SetPageChecked(result->b_page);
1397                 btrfs_map_bh_to_logical(root, result, 0);
1398         }
1399 not_found:
1400         if (create & BTRFS_GET_BLOCK_CREATE) {
1401                 struct btrfs_key ins;
1402                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1403                                          1, alloc_hint, (u64)-1,
1404                                          &ins, 1);
1405                 BUG_ON(ret);
1406                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1407                                                iblock << inode->i_blkbits,
1408                                                ins.objectid, ins.offset,
1409                                                ins.offset);
1410                 BUG_ON(ret);
1411                 SetPageChecked(result->b_page);
1412                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1413         }
1414 out:
1415         if (trans)
1416                 err = btrfs_end_transaction(trans, root);
1417         btrfs_free_path(path);
1418         return err;
1419 }
1420
1421 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1422                            struct buffer_head *result, int create)
1423 {
1424         int err;
1425         struct btrfs_root *root = BTRFS_I(inode)->root;
1426         mutex_lock(&root->fs_info->fs_mutex);
1427         err = btrfs_get_block_lock(inode, iblock, result, create);
1428         mutex_unlock(&root->fs_info->fs_mutex);
1429         return err;
1430 }
1431
1432 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1433                            struct buffer_head *result, int create)
1434 {
1435         struct btrfs_root *root = BTRFS_I(inode)->root;
1436         mutex_lock(&root->fs_info->fs_mutex);
1437         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1438         mutex_unlock(&root->fs_info->fs_mutex);
1439         return 0;
1440 }
1441
1442 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1443 {
1444         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1445 }
1446
1447 static int btrfs_prepare_write(struct file *file, struct page *page,
1448                                unsigned from, unsigned to)
1449 {
1450         return block_prepare_write(page, from, to, btrfs_get_block);
1451 }
1452
1453 static void btrfs_write_super(struct super_block *sb)
1454 {
1455         btrfs_sync_fs(sb, 1);
1456 }
1457
1458 static int btrfs_readpage(struct file *file, struct page *page)
1459 {
1460         return mpage_readpage(page, btrfs_get_block);
1461 }
1462
1463 /*
1464  * While block_write_full_page is writing back the dirty buffers under
1465  * the page lock, whoever dirtied the buffers may decide to clean them
1466  * again at any time.  We handle that by only looking at the buffer
1467  * state inside lock_buffer().
1468  *
1469  * If block_write_full_page() is called for regular writeback
1470  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1471  * locked buffer.   This only can happen if someone has written the buffer
1472  * directly, with submit_bh().  At the address_space level PageWriteback
1473  * prevents this contention from occurring.
1474  */
1475 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1476                                    struct writeback_control *wbc)
1477 {
1478         int err;
1479         sector_t block;
1480         sector_t last_block;
1481         struct buffer_head *bh, *head;
1482         const unsigned blocksize = 1 << inode->i_blkbits;
1483         int nr_underway = 0;
1484
1485         BUG_ON(!PageLocked(page));
1486
1487         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1488
1489         if (!page_has_buffers(page)) {
1490                 create_empty_buffers(page, blocksize,
1491                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1492         }
1493
1494         /*
1495          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1496          * here, and the (potentially unmapped) buffers may become dirty at
1497          * any time.  If a buffer becomes dirty here after we've inspected it
1498          * then we just miss that fact, and the page stays dirty.
1499          *
1500          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1501          * handle that here by just cleaning them.
1502          */
1503
1504         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1505         head = page_buffers(page);
1506         bh = head;
1507
1508         /*
1509          * Get all the dirty buffers mapped to disk addresses and
1510          * handle any aliases from the underlying blockdev's mapping.
1511          */
1512         do {
1513                 if (block > last_block) {
1514                         /*
1515                          * mapped buffers outside i_size will occur, because
1516                          * this page can be outside i_size when there is a
1517                          * truncate in progress.
1518                          */
1519                         /*
1520                          * The buffer was zeroed by block_write_full_page()
1521                          */
1522                         clear_buffer_dirty(bh);
1523                         set_buffer_uptodate(bh);
1524                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1525                         WARN_ON(bh->b_size != blocksize);
1526                         err = btrfs_get_block(inode, block, bh, 0);
1527                         if (err) {
1528 printk("writepage going to recovery err %d\n", err);
1529                                 goto recover;
1530                         }
1531                         if (buffer_new(bh)) {
1532                                 /* blockdev mappings never come here */
1533                                 clear_buffer_new(bh);
1534                         }
1535                 }
1536                 bh = bh->b_this_page;
1537                 block++;
1538         } while (bh != head);
1539
1540         do {
1541                 if (!buffer_mapped(bh))
1542                         continue;
1543                 /*
1544                  * If it's a fully non-blocking write attempt and we cannot
1545                  * lock the buffer then redirty the page.  Note that this can
1546                  * potentially cause a busy-wait loop from pdflush and kswapd
1547                  * activity, but those code paths have their own higher-level
1548                  * throttling.
1549                  */
1550                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1551                         lock_buffer(bh);
1552                 } else if (test_set_buffer_locked(bh)) {
1553                         redirty_page_for_writepage(wbc, page);
1554                         continue;
1555                 }
1556                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1557                         mark_buffer_async_write(bh);
1558                 } else {
1559                         unlock_buffer(bh);
1560                 }
1561         } while ((bh = bh->b_this_page) != head);
1562
1563         /*
1564          * The page and its buffers are protected by PageWriteback(), so we can
1565          * drop the bh refcounts early.
1566          */
1567         BUG_ON(PageWriteback(page));
1568         set_page_writeback(page);
1569
1570         do {
1571                 struct buffer_head *next = bh->b_this_page;
1572                 if (buffer_async_write(bh)) {
1573                         submit_bh(WRITE, bh);
1574                         nr_underway++;
1575                 }
1576                 bh = next;
1577         } while (bh != head);
1578         unlock_page(page);
1579
1580         err = 0;
1581 done:
1582         if (nr_underway == 0) {
1583                 /*
1584                  * The page was marked dirty, but the buffers were
1585                  * clean.  Someone wrote them back by hand with
1586                  * ll_rw_block/submit_bh.  A rare case.
1587                  */
1588                 int uptodate = 1;
1589                 do {
1590                         if (!buffer_uptodate(bh)) {
1591                                 uptodate = 0;
1592                                 break;
1593                         }
1594                         bh = bh->b_this_page;
1595                 } while (bh != head);
1596                 if (uptodate)
1597                         SetPageUptodate(page);
1598                 end_page_writeback(page);
1599         }
1600         return err;
1601
1602 recover:
1603         /*
1604          * ENOSPC, or some other error.  We may already have added some
1605          * blocks to the file, so we need to write these out to avoid
1606          * exposing stale data.
1607          * The page is currently locked and not marked for writeback
1608          */
1609         bh = head;
1610         /* Recovery: lock and submit the mapped buffers */
1611         do {
1612                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1613                         lock_buffer(bh);
1614                         mark_buffer_async_write(bh);
1615                 } else {
1616                         /*
1617                          * The buffer may have been set dirty during
1618                          * attachment to a dirty page.
1619                          */
1620                         clear_buffer_dirty(bh);
1621                 }
1622         } while ((bh = bh->b_this_page) != head);
1623         SetPageError(page);
1624         BUG_ON(PageWriteback(page));
1625         set_page_writeback(page);
1626         do {
1627                 struct buffer_head *next = bh->b_this_page;
1628                 if (buffer_async_write(bh)) {
1629                         clear_buffer_dirty(bh);
1630                         submit_bh(WRITE, bh);
1631                         nr_underway++;
1632                 }
1633                 bh = next;
1634         } while (bh != head);
1635         unlock_page(page);
1636         goto done;
1637 }
1638
1639 /*
1640  * The generic ->writepage function for buffer-backed address_spaces
1641  */
1642 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1643 {
1644         struct inode * const inode = page->mapping->host;
1645         loff_t i_size = i_size_read(inode);
1646         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1647         unsigned offset;
1648         void *kaddr;
1649
1650         /* Is the page fully inside i_size? */
1651         if (page->index < end_index)
1652                 return __btrfs_write_full_page(inode, page, wbc);
1653
1654         /* Is the page fully outside i_size? (truncate in progress) */
1655         offset = i_size & (PAGE_CACHE_SIZE-1);
1656         if (page->index >= end_index+1 || !offset) {
1657                 /*
1658                  * The page may have dirty, unmapped buffers.  For example,
1659                  * they may have been added in ext3_writepage().  Make them
1660                  * freeable here, so the page does not leak.
1661                  */
1662                 block_invalidatepage(page, 0);
1663                 unlock_page(page);
1664                 return 0; /* don't care */
1665         }
1666
1667         /*
1668          * The page straddles i_size.  It must be zeroed out on each and every
1669          * writepage invokation because it may be mmapped.  "A file is mapped
1670          * in multiples of the page size.  For a file that is not a multiple of
1671          * the  page size, the remaining memory is zeroed when mapped, and
1672          * writes to that region are not written out to the file."
1673          */
1674         kaddr = kmap_atomic(page, KM_USER0);
1675         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1676         flush_dcache_page(page);
1677         kunmap_atomic(kaddr, KM_USER0);
1678         return __btrfs_write_full_page(inode, page, wbc);
1679 }
1680
1681 static void btrfs_truncate(struct inode *inode)
1682 {
1683         struct btrfs_root *root = BTRFS_I(inode)->root;
1684         int ret;
1685         struct btrfs_trans_handle *trans;
1686
1687         if (!S_ISREG(inode->i_mode))
1688                 return;
1689         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1690                 return;
1691
1692         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1693
1694         mutex_lock(&root->fs_info->fs_mutex);
1695         trans = btrfs_start_transaction(root, 1);
1696         btrfs_set_trans_block_group(trans, inode);
1697
1698         /* FIXME, add redo link to tree so we don't leak on crash */
1699         ret = btrfs_truncate_in_trans(trans, root, inode);
1700         BUG_ON(ret);
1701         btrfs_update_inode(trans, root, inode);
1702         ret = btrfs_end_transaction(trans, root);
1703         BUG_ON(ret);
1704         mutex_unlock(&root->fs_info->fs_mutex);
1705         btrfs_btree_balance_dirty(root);
1706 }
1707
1708 static int btrfs_commit_write(struct file *file, struct page *page,
1709                               unsigned from, unsigned to)
1710 {
1711         struct inode *inode = page->mapping->host;
1712         struct buffer_head *bh;
1713         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1714
1715         SetPageUptodate(page);
1716         bh = page_buffers(page);
1717         set_buffer_uptodate(bh);
1718         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1719                 set_page_dirty(page);
1720         }
1721         if (pos > inode->i_size) {
1722                 i_size_write(inode, pos);
1723                 mark_inode_dirty(inode);
1724         }
1725         return 0;
1726 }
1727
1728 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1729                                 struct page **prepared_pages,
1730                                 const char __user * buf)
1731 {
1732         long page_fault = 0;
1733         int i;
1734         int offset = pos & (PAGE_CACHE_SIZE - 1);
1735
1736         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1737                 size_t count = min_t(size_t,
1738                                      PAGE_CACHE_SIZE - offset, write_bytes);
1739                 struct page *page = prepared_pages[i];
1740                 fault_in_pages_readable(buf, count);
1741
1742                 /* Copy data from userspace to the current page */
1743                 kmap(page);
1744                 page_fault = __copy_from_user(page_address(page) + offset,
1745                                               buf, count);
1746                 /* Flush processor's dcache for this page */
1747                 flush_dcache_page(page);
1748                 kunmap(page);
1749                 buf += count;
1750                 write_bytes -= count;
1751
1752                 if (page_fault)
1753                         break;
1754         }
1755         return page_fault ? -EFAULT : 0;
1756 }
1757
1758 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1759 {
1760         size_t i;
1761         for (i = 0; i < num_pages; i++) {
1762                 if (!pages[i])
1763                         break;
1764                 unlock_page(pages[i]);
1765                 mark_page_accessed(pages[i]);
1766                 page_cache_release(pages[i]);
1767         }
1768 }
1769 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1770                                    struct btrfs_root *root,
1771                                    struct file *file,
1772                                    struct page **pages,
1773                                    size_t num_pages,
1774                                    loff_t pos,
1775                                    size_t write_bytes)
1776 {
1777         int i;
1778         int offset;
1779         int err = 0;
1780         int ret;
1781         int this_write;
1782         struct inode *inode = file->f_path.dentry->d_inode;
1783         struct buffer_head *bh;
1784         struct btrfs_file_extent_item *ei;
1785
1786         for (i = 0; i < num_pages; i++) {
1787                 offset = pos & (PAGE_CACHE_SIZE -1);
1788                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1789                 /* FIXME, one block at a time */
1790
1791                 mutex_lock(&root->fs_info->fs_mutex);
1792                 trans = btrfs_start_transaction(root, 1);
1793                 btrfs_set_trans_block_group(trans, inode);
1794
1795                 bh = page_buffers(pages[i]);
1796                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1797                         struct btrfs_key key;
1798                         struct btrfs_path *path;
1799                         char *ptr;
1800                         u32 datasize;
1801
1802                         path = btrfs_alloc_path();
1803                         BUG_ON(!path);
1804                         key.objectid = inode->i_ino;
1805                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1806                         key.flags = 0;
1807                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1808                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1809                         datasize = offset +
1810                                 btrfs_file_extent_calc_inline_size(write_bytes);
1811                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1812                                                       datasize);
1813                         BUG_ON(ret);
1814                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1815                                path->slots[0], struct btrfs_file_extent_item);
1816                         btrfs_set_file_extent_generation(ei, trans->transid);
1817                         btrfs_set_file_extent_type(ei,
1818                                                    BTRFS_FILE_EXTENT_INLINE);
1819                         ptr = btrfs_file_extent_inline_start(ei);
1820                         btrfs_memcpy(root, path->nodes[0]->b_data,
1821                                      ptr, bh->b_data, offset + write_bytes);
1822                         mark_buffer_dirty(path->nodes[0]);
1823                         btrfs_free_path(path);
1824                 } else if (buffer_mapped(bh)) {
1825                         btrfs_csum_file_block(trans, root, inode->i_ino,
1826                                       pages[i]->index << PAGE_CACHE_SHIFT,
1827                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1828                         kunmap(pages[i]);
1829                 }
1830                 SetPageChecked(pages[i]);
1831                 // btrfs_update_inode_block_group(trans, inode);
1832                 ret = btrfs_end_transaction(trans, root);
1833                 BUG_ON(ret);
1834                 mutex_unlock(&root->fs_info->fs_mutex);
1835
1836                 ret = btrfs_commit_write(file, pages[i], offset,
1837                                          offset + this_write);
1838                 pos += this_write;
1839                 if (ret) {
1840                         err = ret;
1841                         goto failed;
1842                 }
1843                 WARN_ON(this_write > write_bytes);
1844                 write_bytes -= this_write;
1845         }
1846 failed:
1847         return err;
1848 }
1849
1850 static int drop_extents(struct btrfs_trans_handle *trans,
1851                           struct btrfs_root *root,
1852                           struct inode *inode,
1853                           u64 start, u64 end, u64 *hint_block)
1854 {
1855         int ret;
1856         struct btrfs_key key;
1857         struct btrfs_leaf *leaf;
1858         int slot;
1859         struct btrfs_file_extent_item *extent;
1860         u64 extent_end = 0;
1861         int keep;
1862         struct btrfs_file_extent_item old;
1863         struct btrfs_path *path;
1864         u64 search_start = start;
1865         int bookend;
1866         int found_type;
1867         int found_extent;
1868         int found_inline;
1869
1870         path = btrfs_alloc_path();
1871         if (!path)
1872                 return -ENOMEM;
1873         while(1) {
1874                 btrfs_release_path(root, path);
1875                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1876                                                search_start, -1);
1877                 if (ret < 0)
1878                         goto out;
1879                 if (ret > 0) {
1880                         if (path->slots[0] == 0) {
1881                                 ret = 0;
1882                                 goto out;
1883                         }
1884                         path->slots[0]--;
1885                 }
1886                 keep = 0;
1887                 bookend = 0;
1888                 found_extent = 0;
1889                 found_inline = 0;
1890                 extent = NULL;
1891                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1892                 slot = path->slots[0];
1893                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1894                 if (key.offset >= end || key.objectid != inode->i_ino) {
1895                         ret = 0;
1896                         goto out;
1897                 }
1898                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1899                         ret = 0;
1900                         goto out;
1901                 }
1902                 extent = btrfs_item_ptr(leaf, slot,
1903                                         struct btrfs_file_extent_item);
1904                 found_type = btrfs_file_extent_type(extent);
1905                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1906                         extent_end = key.offset +
1907                                 (btrfs_file_extent_num_blocks(extent) <<
1908                                  inode->i_blkbits);
1909                         found_extent = 1;
1910                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1911                         found_inline = 1;
1912                         extent_end = key.offset +
1913                              btrfs_file_extent_inline_len(leaf->items + slot);
1914                 }
1915
1916                 if (!found_extent && !found_inline) {
1917                         ret = 0;
1918                         goto out;
1919                 }
1920
1921                 if (search_start >= extent_end) {
1922                         ret = 0;
1923                         goto out;
1924                 }
1925
1926                 if (found_inline) {
1927                         u64 mask = root->blocksize - 1;
1928                         search_start = (extent_end + mask) & ~mask;
1929                 } else
1930                         search_start = extent_end;
1931
1932                 if (end < extent_end && end >= key.offset) {
1933                         if (found_extent) {
1934                                 u64 disk_blocknr =
1935                                         btrfs_file_extent_disk_blocknr(extent);
1936                                 u64 disk_num_blocks =
1937                                       btrfs_file_extent_disk_num_blocks(extent);
1938                                 memcpy(&old, extent, sizeof(old));
1939                                 if (disk_blocknr != 0) {
1940                                         ret = btrfs_inc_extent_ref(trans, root,
1941                                                  disk_blocknr, disk_num_blocks);
1942                                         BUG_ON(ret);
1943                                 }
1944                         }
1945                         WARN_ON(found_inline);
1946                         bookend = 1;
1947                 }
1948
1949                 if (start > key.offset) {
1950                         u64 new_num;
1951                         u64 old_num;
1952                         /* truncate existing extent */
1953                         keep = 1;
1954                         WARN_ON(start & (root->blocksize - 1));
1955                         if (found_extent) {
1956                                 new_num = (start - key.offset) >>
1957                                         inode->i_blkbits;
1958                                 old_num = btrfs_file_extent_num_blocks(extent);
1959                                 *hint_block =
1960                                         btrfs_file_extent_disk_blocknr(extent);
1961                                 if (btrfs_file_extent_disk_blocknr(extent)) {
1962                                         inode->i_blocks -=
1963                                                 (old_num - new_num) << 3;
1964                                 }
1965                                 btrfs_set_file_extent_num_blocks(extent,
1966                                                                  new_num);
1967                                 mark_buffer_dirty(path->nodes[0]);
1968                         } else {
1969                                 WARN_ON(1);
1970                         }
1971                 }
1972                 if (!keep) {
1973                         u64 disk_blocknr = 0;
1974                         u64 disk_num_blocks = 0;
1975                         u64 extent_num_blocks = 0;
1976                         if (found_extent) {
1977                                 disk_blocknr =
1978                                       btrfs_file_extent_disk_blocknr(extent);
1979                                 disk_num_blocks =
1980                                       btrfs_file_extent_disk_num_blocks(extent);
1981                                 extent_num_blocks =
1982                                       btrfs_file_extent_num_blocks(extent);
1983                                 *hint_block =
1984                                         btrfs_file_extent_disk_blocknr(extent);
1985                         }
1986                         ret = btrfs_del_item(trans, root, path);
1987                         BUG_ON(ret);
1988                         btrfs_release_path(root, path);
1989                         extent = NULL;
1990                         if (found_extent && disk_blocknr != 0) {
1991                                 inode->i_blocks -= extent_num_blocks << 3;
1992                                 ret = btrfs_free_extent(trans, root,
1993                                                         disk_blocknr,
1994                                                         disk_num_blocks, 0);
1995                         }
1996
1997                         BUG_ON(ret);
1998                         if (!bookend && search_start >= end) {
1999                                 ret = 0;
2000                                 goto out;
2001                         }
2002                         if (!bookend)
2003                                 continue;
2004                 }
2005                 if (bookend && found_extent) {
2006                         /* create bookend */
2007                         struct btrfs_key ins;
2008                         ins.objectid = inode->i_ino;
2009                         ins.offset = end;
2010                         ins.flags = 0;
2011                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
2012
2013                         btrfs_release_path(root, path);
2014                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
2015                                                       sizeof(*extent));
2016                         BUG_ON(ret);
2017                         extent = btrfs_item_ptr(
2018                                     btrfs_buffer_leaf(path->nodes[0]),
2019                                     path->slots[0],
2020                                     struct btrfs_file_extent_item);
2021                         btrfs_set_file_extent_disk_blocknr(extent,
2022                                     btrfs_file_extent_disk_blocknr(&old));
2023                         btrfs_set_file_extent_disk_num_blocks(extent,
2024                                     btrfs_file_extent_disk_num_blocks(&old));
2025
2026                         btrfs_set_file_extent_offset(extent,
2027                                     btrfs_file_extent_offset(&old) +
2028                                     ((end - key.offset) >> inode->i_blkbits));
2029                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
2030                                 (extent_end - end) >> inode->i_blkbits);
2031                         btrfs_set_file_extent_num_blocks(extent,
2032                                     (extent_end - end) >> inode->i_blkbits);
2033
2034                         btrfs_set_file_extent_type(extent,
2035                                                    BTRFS_FILE_EXTENT_REG);
2036                         btrfs_set_file_extent_generation(extent,
2037                                     btrfs_file_extent_generation(&old));
2038                         btrfs_mark_buffer_dirty(path->nodes[0]);
2039                         if (btrfs_file_extent_disk_blocknr(&old) != 0) {
2040                                 inode->i_blocks +=
2041                                       btrfs_file_extent_num_blocks(extent) << 3;
2042                         }
2043                         ret = 0;
2044                         goto out;
2045                 }
2046         }
2047 out:
2048         btrfs_free_path(path);
2049         return ret;
2050 }
2051
2052 static int prepare_pages(struct btrfs_root *root,
2053                          struct file *file,
2054                          struct page **pages,
2055                          size_t num_pages,
2056                          loff_t pos,
2057                          unsigned long first_index,
2058                          unsigned long last_index,
2059                          size_t write_bytes,
2060                          u64 alloc_extent_start)
2061 {
2062         int i;
2063         unsigned long index = pos >> PAGE_CACHE_SHIFT;
2064         struct inode *inode = file->f_path.dentry->d_inode;
2065         int offset;
2066         int err = 0;
2067         int this_write;
2068         struct buffer_head *bh;
2069         struct buffer_head *head;
2070         loff_t isize = i_size_read(inode);
2071
2072         memset(pages, 0, num_pages * sizeof(struct page *));
2073
2074         for (i = 0; i < num_pages; i++) {
2075                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
2076                 if (!pages[i]) {
2077                         err = -ENOMEM;
2078                         goto failed_release;
2079                 }
2080                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
2081                 wait_on_page_writeback(pages[i]);
2082                 offset = pos & (PAGE_CACHE_SIZE -1);
2083                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
2084                 if (!page_has_buffers(pages[i])) {
2085                         create_empty_buffers(pages[i],
2086                                              root->fs_info->sb->s_blocksize,
2087                                              (1 << BH_Uptodate));
2088                 }
2089                 head = page_buffers(pages[i]);
2090                 bh = head;
2091                 do {
2092                         err = btrfs_map_bh_to_logical(root, bh,
2093                                                       alloc_extent_start);
2094                         BUG_ON(err);
2095                         if (err)
2096                                 goto failed_truncate;
2097                         bh = bh->b_this_page;
2098                         if (alloc_extent_start)
2099                                 alloc_extent_start++;
2100                 } while (bh != head);
2101                 pos += this_write;
2102                 WARN_ON(this_write > write_bytes);
2103                 write_bytes -= this_write;
2104         }
2105         return 0;
2106
2107 failed_release:
2108         btrfs_drop_pages(pages, num_pages);
2109         return err;
2110
2111 failed_truncate:
2112         btrfs_drop_pages(pages, num_pages);
2113         if (pos > isize)
2114                 vmtruncate(inode, isize);
2115         return err;
2116 }
2117
2118 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
2119                                 size_t count, loff_t *ppos)
2120 {
2121         loff_t pos;
2122         size_t num_written = 0;
2123         int err = 0;
2124         int ret = 0;
2125         struct inode *inode = file->f_path.dentry->d_inode;
2126         struct btrfs_root *root = BTRFS_I(inode)->root;
2127         struct page *pages[8];
2128         struct page *pinned[2];
2129         unsigned long first_index;
2130         unsigned long last_index;
2131         u64 start_pos;
2132         u64 num_blocks;
2133         u64 alloc_extent_start;
2134         u64 hint_block;
2135         struct btrfs_trans_handle *trans;
2136         struct btrfs_key ins;
2137         pinned[0] = NULL;
2138         pinned[1] = NULL;
2139         if (file->f_flags & O_DIRECT)
2140                 return -EINVAL;
2141         pos = *ppos;
2142         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2143         current->backing_dev_info = inode->i_mapping->backing_dev_info;
2144         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2145         if (err)
2146                 goto out;
2147         if (count == 0)
2148                 goto out;
2149         err = remove_suid(file->f_path.dentry);
2150         if (err)
2151                 goto out;
2152         file_update_time(file);
2153
2154         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
2155         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
2156                         inode->i_blkbits;
2157
2158         mutex_lock(&inode->i_mutex);
2159         first_index = pos >> PAGE_CACHE_SHIFT;
2160         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
2161
2162         if ((pos & (PAGE_CACHE_SIZE - 1))) {
2163                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
2164                 if (!PageUptodate(pinned[0])) {
2165                         ret = mpage_readpage(pinned[0], btrfs_get_block);
2166                         BUG_ON(ret);
2167                         wait_on_page_locked(pinned[0]);
2168                 } else {
2169                         unlock_page(pinned[0]);
2170                 }
2171         }
2172         if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
2173                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
2174                 if (!PageUptodate(pinned[1])) {
2175                         ret = mpage_readpage(pinned[1], btrfs_get_block);
2176                         BUG_ON(ret);
2177                         wait_on_page_locked(pinned[1]);
2178                 } else {
2179                         unlock_page(pinned[1]);
2180                 }
2181         }
2182
2183         mutex_lock(&root->fs_info->fs_mutex);
2184         trans = btrfs_start_transaction(root, 1);
2185         if (!trans) {
2186                 err = -ENOMEM;
2187                 mutex_unlock(&root->fs_info->fs_mutex);
2188                 goto out_unlock;
2189         }
2190         btrfs_set_trans_block_group(trans, inode);
2191         /* FIXME blocksize != 4096 */
2192         inode->i_blocks += num_blocks << 3;
2193         hint_block = 0;
2194         if (start_pos < inode->i_size) {
2195                 /* FIXME blocksize != pagesize */
2196                 ret = drop_extents(trans, root, inode,
2197                                    start_pos,
2198                                    (pos + count + root->blocksize -1) &
2199                                    ~((u64)root->blocksize - 1), &hint_block);
2200                 BUG_ON(ret);
2201         }
2202         if (inode->i_size < start_pos) {
2203                 u64 last_pos_in_file;
2204                 u64 hole_size;
2205                 u64 mask = root->blocksize - 1;
2206                 last_pos_in_file = (inode->i_size + mask) & ~mask;
2207                 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2208                 hole_size >>= inode->i_blkbits;
2209                 if (last_pos_in_file < start_pos) {
2210                         ret = btrfs_insert_file_extent(trans, root,
2211                                                        inode->i_ino,
2212                                                        last_pos_in_file,
2213                                                        0, 0, hole_size);
2214                 }
2215                 BUG_ON(ret);
2216         }
2217         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2218             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2219                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2220                                          num_blocks, hint_block, (u64)-1,
2221                                          &ins, 1);
2222                 BUG_ON(ret);
2223                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2224                                        start_pos, ins.objectid, ins.offset,
2225                                        ins.offset);
2226                 BUG_ON(ret);
2227         } else {
2228                 ins.offset = 0;
2229                 ins.objectid = 0;
2230         }
2231         BUG_ON(ret);
2232         alloc_extent_start = ins.objectid;
2233         // btrfs_update_inode_block_group(trans, inode);
2234         ret = btrfs_end_transaction(trans, root);
2235         mutex_unlock(&root->fs_info->fs_mutex);
2236
2237         while(count > 0) {
2238                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2239                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2240                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2241                                         PAGE_CACHE_SHIFT;
2242
2243                 memset(pages, 0, sizeof(pages));
2244                 ret = prepare_pages(root, file, pages, num_pages,
2245                                     pos, first_index, last_index,
2246                                     write_bytes, alloc_extent_start);
2247                 BUG_ON(ret);
2248
2249                 /* FIXME blocks != pagesize */
2250                 if (alloc_extent_start)
2251                         alloc_extent_start += num_pages;
2252                 ret = btrfs_copy_from_user(pos, num_pages,
2253                                            write_bytes, pages, buf);
2254                 BUG_ON(ret);
2255
2256                 ret = dirty_and_release_pages(NULL, root, file, pages,
2257                                               num_pages, pos, write_bytes);
2258                 BUG_ON(ret);
2259                 btrfs_drop_pages(pages, num_pages);
2260
2261                 buf += write_bytes;
2262                 count -= write_bytes;
2263                 pos += write_bytes;
2264                 num_written += write_bytes;
2265
2266                 balance_dirty_pages_ratelimited(inode->i_mapping);
2267                 btrfs_btree_balance_dirty(root);
2268                 cond_resched();
2269         }
2270 out_unlock:
2271         mutex_unlock(&inode->i_mutex);
2272 out:
2273         if (pinned[0])
2274                 page_cache_release(pinned[0]);
2275         if (pinned[1])
2276                 page_cache_release(pinned[1]);
2277         *ppos = pos;
2278         current->backing_dev_info = NULL;
2279         mark_inode_dirty(inode);
2280         return num_written ? num_written : err;
2281 }
2282
2283 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2284                         unsigned long offset, unsigned long size)
2285 {
2286         char *kaddr;
2287         unsigned long left, count = desc->count;
2288         struct inode *inode = page->mapping->host;
2289
2290         if (size > count)
2291                 size = count;
2292
2293         if (!PageChecked(page)) {
2294                 /* FIXME, do it per block */
2295                 struct btrfs_root *root = BTRFS_I(inode)->root;
2296                 int ret;
2297                 struct buffer_head *bh;
2298
2299                 if (page_has_buffers(page)) {
2300                         bh = page_buffers(page);
2301                         if (!buffer_mapped(bh)) {
2302                                 SetPageChecked(page);
2303                                 goto checked;
2304                         }
2305                 }
2306
2307                 ret = btrfs_csum_verify_file_block(root,
2308                                   page->mapping->host->i_ino,
2309                                   page->index << PAGE_CACHE_SHIFT,
2310                                   kmap(page), PAGE_CACHE_SIZE);
2311                 if (ret) {
2312                         if (ret != -ENOENT) {
2313                                 printk("failed to verify ino %lu page %lu ret %d\n",
2314                                        page->mapping->host->i_ino,
2315                                        page->index, ret);
2316                                 memset(page_address(page), 1, PAGE_CACHE_SIZE);
2317                                 flush_dcache_page(page);
2318                         }
2319                 }
2320                 SetPageChecked(page);
2321                 kunmap(page);
2322         }
2323 checked:
2324         /*
2325          * Faults on the destination of a read are common, so do it before
2326          * taking the kmap.
2327          */
2328         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2329                 kaddr = kmap_atomic(page, KM_USER0);
2330                 left = __copy_to_user_inatomic(desc->arg.buf,
2331                                                 kaddr + offset, size);
2332                 kunmap_atomic(kaddr, KM_USER0);
2333                 if (left == 0)
2334                         goto success;
2335         }
2336
2337         /* Do it the slow way */
2338         kaddr = kmap(page);
2339         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2340         kunmap(page);
2341
2342         if (left) {
2343                 size -= left;
2344                 desc->error = -EFAULT;
2345         }
2346 success:
2347         desc->count = count - size;
2348         desc->written += size;
2349         desc->arg.buf += size;
2350         return size;
2351 }
2352
2353 /**
2354  * btrfs_file_aio_read - filesystem read routine
2355  * @iocb:       kernel I/O control block
2356  * @iov:        io vector request
2357  * @nr_segs:    number of segments in the iovec
2358  * @pos:        current file position
2359  */
2360 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2361                                    unsigned long nr_segs, loff_t pos)
2362 {
2363         struct file *filp = iocb->ki_filp;
2364         ssize_t retval;
2365         unsigned long seg;
2366         size_t count;
2367         loff_t *ppos = &iocb->ki_pos;
2368
2369         count = 0;
2370         for (seg = 0; seg < nr_segs; seg++) {
2371                 const struct iovec *iv = &iov[seg];
2372
2373                 /*
2374                  * If any segment has a negative length, or the cumulative
2375                  * length ever wraps negative then return -EINVAL.
2376                  */
2377                 count += iv->iov_len;
2378                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2379                         return -EINVAL;
2380                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2381                         continue;
2382                 if (seg == 0)
2383                         return -EFAULT;
2384                 nr_segs = seg;
2385                 count -= iv->iov_len;   /* This segment is no good */
2386                 break;
2387         }
2388         retval = 0;
2389         if (count) {
2390                 for (seg = 0; seg < nr_segs; seg++) {
2391                         read_descriptor_t desc;
2392
2393                         desc.written = 0;
2394                         desc.arg.buf = iov[seg].iov_base;
2395                         desc.count = iov[seg].iov_len;
2396                         if (desc.count == 0)
2397                                 continue;
2398                         desc.error = 0;
2399                         do_generic_file_read(filp, ppos, &desc,
2400                                              btrfs_read_actor);
2401                         retval += desc.written;
2402                         if (desc.error) {
2403                                 retval = retval ?: desc.error;
2404                                 break;
2405                         }
2406                 }
2407         }
2408         return retval;
2409 }
2410
2411 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2412 {
2413         struct btrfs_trans_handle *trans;
2414         struct btrfs_key key;
2415         struct btrfs_root_item root_item;
2416         struct btrfs_inode_item *inode_item;
2417         struct buffer_head *subvol;
2418         struct btrfs_leaf *leaf;
2419         struct btrfs_root *new_root;
2420         struct inode *inode;
2421         struct inode *dir;
2422         int ret;
2423         u64 objectid;
2424         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2425
2426         mutex_lock(&root->fs_info->fs_mutex);
2427         trans = btrfs_start_transaction(root, 1);
2428         BUG_ON(!trans);
2429
2430         subvol = btrfs_alloc_free_block(trans, root, 0);
2431         if (subvol == NULL)
2432                 return -ENOSPC;
2433         leaf = btrfs_buffer_leaf(subvol);
2434         btrfs_set_header_nritems(&leaf->header, 0);
2435         btrfs_set_header_level(&leaf->header, 0);
2436         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2437         btrfs_set_header_generation(&leaf->header, trans->transid);
2438         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2439         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2440                sizeof(leaf->header.fsid));
2441         mark_buffer_dirty(subvol);
2442
2443         inode_item = &root_item.inode;
2444         memset(inode_item, 0, sizeof(*inode_item));
2445         btrfs_set_inode_generation(inode_item, 1);
2446         btrfs_set_inode_size(inode_item, 3);
2447         btrfs_set_inode_nlink(inode_item, 1);
2448         btrfs_set_inode_nblocks(inode_item, 1);
2449         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2450
2451         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2452         btrfs_set_root_refs(&root_item, 1);
2453         brelse(subvol);
2454         subvol = NULL;
2455
2456         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2457                                        0, &objectid);
2458         BUG_ON(ret);
2459
2460         btrfs_set_root_dirid(&root_item, new_dirid);
2461
2462         key.objectid = objectid;
2463         key.offset = 1;
2464         key.flags = 0;
2465         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2466         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2467                                 &root_item);
2468         BUG_ON(ret);
2469
2470         /*
2471          * insert the directory item
2472          */
2473         key.offset = (u64)-1;
2474         dir = root->fs_info->sb->s_root->d_inode;
2475         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2476                                     name, namelen, dir->i_ino, &key,
2477                                     BTRFS_FT_DIR);
2478         BUG_ON(ret);
2479
2480         ret = btrfs_commit_transaction(trans, root);
2481         BUG_ON(ret);
2482
2483         new_root = btrfs_read_fs_root(root->fs_info, &key);
2484         BUG_ON(!new_root);
2485
2486         trans = btrfs_start_transaction(new_root, 1);
2487         BUG_ON(!trans);
2488
2489         inode = btrfs_new_inode(trans, new_root, new_dirid,
2490                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2491         inode->i_op = &btrfs_dir_inode_operations;
2492         inode->i_fop = &btrfs_dir_file_operations;
2493
2494         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2495         BUG_ON(ret);
2496
2497         inode->i_nlink = 1;
2498         inode->i_size = 6;
2499         ret = btrfs_update_inode(trans, new_root, inode);
2500         BUG_ON(ret);
2501
2502         ret = btrfs_commit_transaction(trans, new_root);
2503         BUG_ON(ret);
2504
2505         iput(inode);
2506
2507         mutex_unlock(&root->fs_info->fs_mutex);
2508         btrfs_btree_balance_dirty(root);
2509         return 0;
2510 }
2511
2512 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2513 {
2514         struct btrfs_trans_handle *trans;
2515         struct btrfs_key key;
2516         struct btrfs_root_item new_root_item;
2517         int ret;
2518         u64 objectid;
2519
2520         if (!root->ref_cows)
2521                 return -EINVAL;
2522
2523         mutex_lock(&root->fs_info->fs_mutex);
2524         trans = btrfs_start_transaction(root, 1);
2525         BUG_ON(!trans);
2526
2527         ret = btrfs_update_inode(trans, root, root->inode);
2528         BUG_ON(ret);
2529
2530         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2531                                        0, &objectid);
2532         BUG_ON(ret);
2533
2534         memcpy(&new_root_item, &root->root_item,
2535                sizeof(new_root_item));
2536
2537         key.objectid = objectid;
2538         key.offset = 1;
2539         key.flags = 0;
2540         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2541         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2542
2543         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2544                                 &new_root_item);
2545         BUG_ON(ret);
2546
2547         /*
2548          * insert the directory item
2549          */
2550         key.offset = (u64)-1;
2551         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2552                                     name, namelen,
2553                                     root->fs_info->sb->s_root->d_inode->i_ino,
2554                                     &key, BTRFS_FT_DIR);
2555
2556         BUG_ON(ret);
2557
2558         ret = btrfs_inc_root_ref(trans, root);
2559         BUG_ON(ret);
2560
2561         ret = btrfs_commit_transaction(trans, root);
2562         BUG_ON(ret);
2563         mutex_unlock(&root->fs_info->fs_mutex);
2564         btrfs_btree_balance_dirty(root);
2565         return 0;
2566 }
2567
2568 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2569 {
2570         struct block_device *bdev;
2571         struct btrfs_path *path;
2572         struct super_block *sb = root->fs_info->sb;
2573         struct btrfs_root *dev_root = root->fs_info->dev_root;
2574         struct btrfs_trans_handle *trans;
2575         struct btrfs_device_item *dev_item;
2576         struct btrfs_key key;
2577         u16 item_size;
2578         u64 num_blocks;
2579         u64 new_blocks;
2580         u64 device_id;
2581         int ret;
2582
2583 printk("adding disk %s\n", name);
2584         path = btrfs_alloc_path();
2585         if (!path)
2586                 return -ENOMEM;
2587         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2588         bdev = open_bdev_excl(name, O_RDWR, sb);
2589         if (IS_ERR(bdev)) {
2590                 ret = PTR_ERR(bdev);
2591 printk("open bdev excl failed ret %d\n", ret);
2592                 goto out_nolock;
2593         }
2594         set_blocksize(bdev, sb->s_blocksize);
2595         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2596         key.objectid = num_blocks;
2597         key.offset = new_blocks;
2598         key.flags = 0;
2599         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2600
2601         mutex_lock(&dev_root->fs_info->fs_mutex);
2602         trans = btrfs_start_transaction(dev_root, 1);
2603         item_size = sizeof(*dev_item) + namelen;
2604 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2605         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2606         if (ret) {
2607 printk("insert failed %d\n", ret);
2608                 close_bdev_excl(bdev);
2609                 if (ret > 0)
2610                         ret = -EEXIST;
2611                 goto out;
2612         }
2613         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2614                                   path->slots[0], struct btrfs_device_item);
2615         btrfs_set_device_pathlen(dev_item, namelen);
2616         memcpy(dev_item + 1, name, namelen);
2617
2618         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2619         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2620         btrfs_set_device_id(dev_item, device_id);
2621         mark_buffer_dirty(path->nodes[0]);
2622
2623         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2624                                      new_blocks);
2625
2626         if (!ret) {
2627                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2628                                              num_blocks + new_blocks);
2629                 i_size_write(root->fs_info->btree_inode,
2630                              (num_blocks + new_blocks) <<
2631                              root->fs_info->btree_inode->i_blkbits);
2632         }
2633
2634 out:
2635         ret = btrfs_commit_transaction(trans, dev_root);
2636         BUG_ON(ret);
2637         mutex_unlock(&root->fs_info->fs_mutex);
2638 out_nolock:
2639         btrfs_free_path(path);
2640         btrfs_btree_balance_dirty(root);
2641
2642         return ret;
2643 }
2644
2645 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2646                        cmd, unsigned long arg)
2647 {
2648         struct btrfs_root *root = BTRFS_I(inode)->root;
2649         struct btrfs_ioctl_vol_args vol_args;
2650         int ret = 0;
2651         struct btrfs_dir_item *di;
2652         int namelen;
2653         struct btrfs_path *path;
2654         u64 root_dirid;
2655
2656         switch (cmd) {
2657         case BTRFS_IOC_SNAP_CREATE:
2658                 if (copy_from_user(&vol_args,
2659                                    (struct btrfs_ioctl_vol_args __user *)arg,
2660                                    sizeof(vol_args)))
2661                         return -EFAULT;
2662                 namelen = strlen(vol_args.name);
2663                 if (namelen > BTRFS_VOL_NAME_MAX)
2664                         return -EINVAL;
2665                 path = btrfs_alloc_path();
2666                 if (!path)
2667                         return -ENOMEM;
2668                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2669                 mutex_lock(&root->fs_info->fs_mutex);
2670                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2671                                     path, root_dirid,
2672                                     vol_args.name, namelen, 0);
2673                 mutex_unlock(&root->fs_info->fs_mutex);
2674                 btrfs_free_path(path);
2675                 if (di && !IS_ERR(di))
2676                         return -EEXIST;
2677
2678                 if (root == root->fs_info->tree_root)
2679                         ret = create_subvol(root, vol_args.name, namelen);
2680                 else
2681                         ret = create_snapshot(root, vol_args.name, namelen);
2682                 WARN_ON(ret);
2683                 break;
2684         case BTRFS_IOC_ADD_DISK:
2685                 if (copy_from_user(&vol_args,
2686                                    (struct btrfs_ioctl_vol_args __user *)arg,
2687                                    sizeof(vol_args)))
2688                         return -EFAULT;
2689                 namelen = strlen(vol_args.name);
2690                 if (namelen > BTRFS_VOL_NAME_MAX)
2691                         return -EINVAL;
2692                 vol_args.name[namelen] = '\0';
2693                 ret = add_disk(root, vol_args.name, namelen);
2694                 break;
2695         default:
2696                 return -ENOTTY;
2697         }
2698         return ret;
2699 }
2700
2701 static struct kmem_cache *btrfs_inode_cachep;
2702 struct kmem_cache *btrfs_trans_handle_cachep;
2703 struct kmem_cache *btrfs_transaction_cachep;
2704 struct kmem_cache *btrfs_bit_radix_cachep;
2705 struct kmem_cache *btrfs_path_cachep;
2706
2707 /*
2708  * Called inside transaction, so use GFP_NOFS
2709  */
2710 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2711 {
2712         struct btrfs_inode *ei;
2713
2714         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2715         if (!ei)
2716                 return NULL;
2717         return &ei->vfs_inode;
2718 }
2719
2720 static void btrfs_destroy_inode(struct inode *inode)
2721 {
2722         WARN_ON(!list_empty(&inode->i_dentry));
2723         WARN_ON(inode->i_data.nrpages);
2724
2725         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2726 }
2727
2728 static void init_once(void * foo, struct kmem_cache * cachep,
2729                       unsigned long flags)
2730 {
2731         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2732
2733         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2734             SLAB_CTOR_CONSTRUCTOR) {
2735                 inode_init_once(&ei->vfs_inode);
2736         }
2737 }
2738
2739 static int init_inodecache(void)
2740 {
2741         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2742                                              sizeof(struct btrfs_inode),
2743                                              0, (SLAB_RECLAIM_ACCOUNT|
2744                                                 SLAB_MEM_SPREAD),
2745                                              init_once, NULL);
2746         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2747                                              sizeof(struct btrfs_trans_handle),
2748                                              0, (SLAB_RECLAIM_ACCOUNT|
2749                                                 SLAB_MEM_SPREAD),
2750                                              NULL, NULL);
2751         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2752                                              sizeof(struct btrfs_transaction),
2753                                              0, (SLAB_RECLAIM_ACCOUNT|
2754                                                 SLAB_MEM_SPREAD),
2755                                              NULL, NULL);
2756         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2757                                              sizeof(struct btrfs_transaction),
2758                                              0, (SLAB_RECLAIM_ACCOUNT|
2759                                                 SLAB_MEM_SPREAD),
2760                                              NULL, NULL);
2761         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2762                                              256,
2763                                              0, (SLAB_RECLAIM_ACCOUNT|
2764                                                 SLAB_MEM_SPREAD |
2765                                                 SLAB_DESTROY_BY_RCU),
2766                                              NULL, NULL);
2767         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2768             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2769                 return -ENOMEM;
2770         return 0;
2771 }
2772
2773 static void destroy_inodecache(void)
2774 {
2775         kmem_cache_destroy(btrfs_inode_cachep);
2776         kmem_cache_destroy(btrfs_trans_handle_cachep);
2777         kmem_cache_destroy(btrfs_transaction_cachep);
2778         kmem_cache_destroy(btrfs_bit_radix_cachep);
2779         kmem_cache_destroy(btrfs_path_cachep);
2780 }
2781
2782 static int btrfs_get_sb(struct file_system_type *fs_type,
2783         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2784 {
2785         return get_sb_bdev(fs_type, flags, dev_name, data,
2786                            btrfs_fill_super, mnt);
2787 }
2788
2789 static int btrfs_getattr(struct vfsmount *mnt,
2790                          struct dentry *dentry, struct kstat *stat)
2791 {
2792         struct inode *inode = dentry->d_inode;
2793         generic_fillattr(inode, stat);
2794         stat->blksize = 256 * 1024;
2795         return 0;
2796 }
2797
2798 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2799 {
2800         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2801         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2802
2803         buf->f_namelen = BTRFS_NAME_LEN;
2804         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2805         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2806         buf->f_bavail = buf->f_bfree;
2807         buf->f_bsize = dentry->d_sb->s_blocksize;
2808         buf->f_type = BTRFS_SUPER_MAGIC;
2809         return 0;
2810 }
2811
2812 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2813                            struct inode * new_dir,struct dentry *new_dentry)
2814 {
2815         struct btrfs_trans_handle *trans;
2816         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2817         struct inode *new_inode = new_dentry->d_inode;
2818         struct inode *old_inode = old_dentry->d_inode;
2819         struct timespec ctime = CURRENT_TIME;
2820         struct btrfs_path *path;
2821         struct btrfs_dir_item *di;
2822         int ret;
2823
2824         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2825             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2826                 return -ENOTEMPTY;
2827         }
2828         mutex_lock(&root->fs_info->fs_mutex);
2829         trans = btrfs_start_transaction(root, 1);
2830         btrfs_set_trans_block_group(trans, new_dir);
2831         path = btrfs_alloc_path();
2832         if (!path) {
2833                 ret = -ENOMEM;
2834                 goto out_fail;
2835         }
2836
2837         old_dentry->d_inode->i_nlink++;
2838         old_dir->i_ctime = old_dir->i_mtime = ctime;
2839         new_dir->i_ctime = new_dir->i_mtime = ctime;
2840         old_inode->i_ctime = ctime;
2841         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2842                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2843                 u64 old_parent_oid;
2844                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2845                                            "..", 2, -1);
2846                 if (IS_ERR(di)) {
2847                         ret = PTR_ERR(di);
2848                         goto out_fail;
2849                 }
2850                 if (!di) {
2851                         ret = -ENOENT;
2852                         goto out_fail;
2853                 }
2854                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2855                 ret = btrfs_del_item(trans, root, path);
2856                 if (ret) {
2857                         ret = -EIO;
2858                         goto out_fail;
2859                 }
2860                 btrfs_release_path(root, path);
2861
2862                 di = btrfs_lookup_dir_index_item(trans, root, path,
2863                                                  old_inode->i_ino,
2864                                                  old_parent_oid,
2865                                                  "..", 2, -1);
2866                 if (IS_ERR(di)) {
2867                         ret = PTR_ERR(di);
2868                         goto out_fail;
2869                 }
2870                 if (!di) {
2871                         ret = -ENOENT;
2872                         goto out_fail;
2873                 }
2874                 ret = btrfs_del_item(trans, root, path);
2875                 if (ret) {
2876                         ret = -EIO;
2877                         goto out_fail;
2878                 }
2879                 btrfs_release_path(root, path);
2880
2881                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2882                                             old_inode->i_ino, location,
2883                                             BTRFS_FT_DIR);
2884                 if (ret)
2885                         goto out_fail;
2886         }
2887
2888
2889         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2890         if (ret)
2891                 goto out_fail;
2892
2893         if (new_inode) {
2894                 new_inode->i_ctime = CURRENT_TIME;
2895                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2896                 if (ret)
2897                         goto out_fail;
2898                 if (S_ISDIR(new_inode->i_mode))
2899                         clear_nlink(new_inode);
2900                 else
2901                         drop_nlink(new_inode);
2902                 btrfs_update_inode(trans, root, new_inode);
2903         }
2904         ret = btrfs_add_link(trans, new_dentry, old_inode);
2905         if (ret)
2906                 goto out_fail;
2907
2908 out_fail:
2909         btrfs_free_path(path);
2910         btrfs_end_transaction(trans, root);
2911         mutex_unlock(&root->fs_info->fs_mutex);
2912         return ret;
2913 }
2914
2915 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2916                          const char *symname)
2917 {
2918         struct btrfs_trans_handle *trans;
2919         struct btrfs_root *root = BTRFS_I(dir)->root;
2920         struct btrfs_path *path;
2921         struct btrfs_key key;
2922         struct inode *inode;
2923         int err;
2924         int drop_inode = 0;
2925         u64 objectid;
2926         int name_len;
2927         int datasize;
2928         char *ptr;
2929         struct btrfs_file_extent_item *ei;
2930
2931         name_len = strlen(symname) + 1;
2932         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2933                 return -ENAMETOOLONG;
2934         mutex_lock(&root->fs_info->fs_mutex);
2935         trans = btrfs_start_transaction(root, 1);
2936         btrfs_set_trans_block_group(trans, dir);
2937
2938         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2939         if (err) {
2940                 err = -ENOSPC;
2941                 goto out_unlock;
2942         }
2943
2944         inode = btrfs_new_inode(trans, root, objectid,
2945                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2946         err = PTR_ERR(inode);
2947         if (IS_ERR(inode))
2948                 goto out_unlock;
2949
2950         btrfs_set_trans_block_group(trans, inode);
2951         err = btrfs_add_nondir(trans, dentry, inode);
2952         if (err)
2953                 drop_inode = 1;
2954         else {
2955                 inode->i_mapping->a_ops = &btrfs_aops;
2956                 inode->i_fop = &btrfs_file_operations;
2957                 inode->i_op = &btrfs_file_inode_operations;
2958         }
2959         dir->i_sb->s_dirt = 1;
2960         btrfs_update_inode_block_group(trans, inode);
2961         btrfs_update_inode_block_group(trans, dir);
2962         if (drop_inode)
2963                 goto out_unlock;
2964
2965         path = btrfs_alloc_path();
2966         BUG_ON(!path);
2967         key.objectid = inode->i_ino;
2968         key.offset = 0;
2969         key.flags = 0;
2970         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2971         datasize = btrfs_file_extent_calc_inline_size(name_len);
2972         err = btrfs_insert_empty_item(trans, root, path, &key,
2973                                       datasize);
2974         BUG_ON(err);
2975         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2976                path->slots[0], struct btrfs_file_extent_item);
2977         btrfs_set_file_extent_generation(ei, trans->transid);
2978         btrfs_set_file_extent_type(ei,
2979                                    BTRFS_FILE_EXTENT_INLINE);
2980         ptr = btrfs_file_extent_inline_start(ei);
2981         btrfs_memcpy(root, path->nodes[0]->b_data,
2982                      ptr, symname, name_len);
2983         mark_buffer_dirty(path->nodes[0]);
2984         btrfs_free_path(path);
2985         inode->i_op = &btrfs_symlink_inode_operations;
2986         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2987         inode->i_size = name_len - 1;
2988         btrfs_update_inode(trans, root, inode);
2989         err = 0;
2990
2991 out_unlock:
2992         btrfs_end_transaction(trans, root);
2993         mutex_unlock(&root->fs_info->fs_mutex);
2994
2995         if (drop_inode) {
2996                 inode_dec_link_count(inode);
2997                 iput(inode);
2998         }
2999         btrfs_btree_balance_dirty(root);
3000         return err;
3001 }
3002
3003 static struct file_system_type btrfs_fs_type = {
3004         .owner          = THIS_MODULE,
3005         .name           = "btrfs",
3006         .get_sb         = btrfs_get_sb,
3007         .kill_sb        = kill_block_super,
3008         .fs_flags       = FS_REQUIRES_DEV,
3009 };
3010
3011 static struct super_operations btrfs_super_ops = {
3012         .delete_inode   = btrfs_delete_inode,
3013         .put_super      = btrfs_put_super,
3014         .read_inode     = btrfs_read_locked_inode,
3015         .write_super    = btrfs_write_super,
3016         .sync_fs        = btrfs_sync_fs,
3017         .write_inode    = btrfs_write_inode,
3018         .dirty_inode    = btrfs_dirty_inode,
3019         .alloc_inode    = btrfs_alloc_inode,
3020         .destroy_inode  = btrfs_destroy_inode,
3021         .statfs         = btrfs_statfs,
3022 };
3023
3024 static struct inode_operations btrfs_dir_inode_operations = {
3025         .lookup         = btrfs_lookup,
3026         .create         = btrfs_create,
3027         .unlink         = btrfs_unlink,
3028         .link           = btrfs_link,
3029         .mkdir          = btrfs_mkdir,
3030         .rmdir          = btrfs_rmdir,
3031         .rename         = btrfs_rename,
3032         .symlink        = btrfs_symlink,
3033         .setattr        = btrfs_setattr,
3034 };
3035
3036 static struct inode_operations btrfs_dir_ro_inode_operations = {
3037         .lookup         = btrfs_lookup,
3038 };
3039
3040 static struct file_operations btrfs_dir_file_operations = {
3041         .llseek         = generic_file_llseek,
3042         .read           = generic_read_dir,
3043         .readdir        = btrfs_readdir,
3044         .ioctl          = btrfs_ioctl,
3045 };
3046
3047 static struct address_space_operations btrfs_aops = {
3048         .readpage       = btrfs_readpage,
3049         .writepage      = btrfs_writepage,
3050         .sync_page      = block_sync_page,
3051         .prepare_write  = btrfs_prepare_write,
3052         .commit_write   = btrfs_commit_write,
3053         .bmap           = btrfs_bmap,
3054 };
3055
3056 static struct address_space_operations btrfs_symlink_aops = {
3057         .readpage       = btrfs_readpage,
3058         .writepage      = btrfs_writepage,
3059 };
3060
3061 static struct inode_operations btrfs_file_inode_operations = {
3062         .truncate       = btrfs_truncate,
3063         .getattr        = btrfs_getattr,
3064         .setattr        = btrfs_setattr,
3065 };
3066
3067 static struct file_operations btrfs_file_operations = {
3068         .llseek         = generic_file_llseek,
3069         .read           = do_sync_read,
3070         .aio_read       = btrfs_file_aio_read,
3071         .write          = btrfs_file_write,
3072         .mmap           = generic_file_mmap,
3073         .open           = generic_file_open,
3074         .ioctl          = btrfs_ioctl,
3075         .fsync          = btrfs_sync_file,
3076 };
3077
3078 static struct inode_operations btrfs_symlink_inode_operations = {
3079         .readlink       = generic_readlink,
3080         .follow_link    = page_follow_link_light,
3081         .put_link       = page_put_link,
3082 };
3083
3084 static int __init init_btrfs_fs(void)
3085 {
3086         int err;
3087         printk("btrfs loaded!\n");
3088         err = init_inodecache();
3089         if (err)
3090                 return err;
3091         return register_filesystem(&btrfs_fs_type);
3092         destroy_inodecache();
3093         return err;
3094 }
3095
3096 static void __exit exit_btrfs_fs(void)
3097 {
3098         destroy_inodecache();
3099         unregister_filesystem(&btrfs_fs_type);
3100         printk("btrfs unloaded\n");
3101 }
3102
3103 module_init(init_btrfs_fs)
3104 module_exit(exit_btrfs_fs)
3105
3106 MODULE_LICENSE("GPL");