]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/super.c
Btrfs: directory inode index is back
[linux-2.6-omap-h63xx.git] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include "ctree.h"
15 #include "disk-io.h"
16 #include "transaction.h"
17 #include "btrfs_inode.h"
18 #include "ioctl.h"
19
20 void btrfs_fsinfo_release(struct kobject *obj)
21 {
22         struct btrfs_fs_info *fsinfo = container_of(obj,
23                                             struct btrfs_fs_info, kobj);
24         kfree(fsinfo);
25 }
26
27 struct kobj_type btrfs_fsinfo_ktype = {
28         .release = btrfs_fsinfo_release,
29 };
30
31 struct btrfs_iget_args {
32         u64 ino;
33         struct btrfs_root *root;
34 };
35
36 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
37
38 #define BTRFS_SUPER_MAGIC 0x9123682E
39
40 static struct inode_operations btrfs_dir_inode_operations;
41 static struct inode_operations btrfs_dir_ro_inode_operations;
42 static struct super_operations btrfs_super_ops;
43 static struct file_operations btrfs_dir_file_operations;
44 static struct inode_operations btrfs_file_inode_operations;
45 static struct address_space_operations btrfs_aops;
46 static struct file_operations btrfs_file_operations;
47
48 static void btrfs_read_locked_inode(struct inode *inode)
49 {
50         struct btrfs_path *path;
51         struct btrfs_inode_item *inode_item;
52         struct btrfs_root *root = BTRFS_I(inode)->root;
53         struct btrfs_key location;
54         int ret;
55
56         path = btrfs_alloc_path();
57         BUG_ON(!path);
58         btrfs_init_path(path);
59         mutex_lock(&root->fs_info->fs_mutex);
60
61         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
62         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
63         if (ret) {
64                 btrfs_free_path(path);
65                 goto make_bad;
66         }
67         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
68                                   path->slots[0],
69                                   struct btrfs_inode_item);
70
71         inode->i_mode = btrfs_inode_mode(inode_item);
72         inode->i_nlink = btrfs_inode_nlink(inode_item);
73         inode->i_uid = btrfs_inode_uid(inode_item);
74         inode->i_gid = btrfs_inode_gid(inode_item);
75         inode->i_size = btrfs_inode_size(inode_item);
76         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
77         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
78         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
79         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
80         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
81         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
82         inode->i_blocks = btrfs_inode_nblocks(inode_item);
83         inode->i_generation = btrfs_inode_generation(inode_item);
84
85         btrfs_free_path(path);
86         inode_item = NULL;
87
88         mutex_unlock(&root->fs_info->fs_mutex);
89
90         switch (inode->i_mode & S_IFMT) {
91 #if 0
92         default:
93                 init_special_inode(inode, inode->i_mode,
94                                    btrfs_inode_rdev(inode_item));
95                 break;
96 #endif
97         case S_IFREG:
98                 inode->i_mapping->a_ops = &btrfs_aops;
99                 inode->i_fop = &btrfs_file_operations;
100                 inode->i_op = &btrfs_file_inode_operations;
101                 break;
102         case S_IFDIR:
103                 inode->i_fop = &btrfs_dir_file_operations;
104                 if (root == root->fs_info->tree_root)
105                         inode->i_op = &btrfs_dir_ro_inode_operations;
106                 else
107                         inode->i_op = &btrfs_dir_inode_operations;
108                 break;
109         case S_IFLNK:
110                 // inode->i_op = &page_symlink_inode_operations;
111                 break;
112         }
113         return;
114
115 make_bad:
116         btrfs_release_path(root, path);
117         btrfs_free_path(path);
118         mutex_unlock(&root->fs_info->fs_mutex);
119         make_bad_inode(inode);
120 }
121
122 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
123                               struct btrfs_root *root,
124                               struct inode *dir,
125                               struct dentry *dentry)
126 {
127         struct btrfs_path *path;
128         const char *name = dentry->d_name.name;
129         int name_len = dentry->d_name.len;
130         int ret = 0;
131         u64 objectid;
132         struct btrfs_dir_item *di;
133
134         path = btrfs_alloc_path();
135         BUG_ON(!path);
136         btrfs_init_path(path);
137         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
138                                     name, name_len, -1);
139         if (IS_ERR(di)) {
140                 ret = PTR_ERR(di);
141                 goto err;
142         }
143         if (!di) {
144                 ret = -ENOENT;
145                 goto err;
146         }
147         objectid = btrfs_disk_key_objectid(&di->location);
148         ret = btrfs_delete_one_dir_name(trans, root, path, di);
149         BUG_ON(ret);
150         btrfs_release_path(root, path);
151
152         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
153                                          objectid, name, name_len, -1);
154         if (IS_ERR(di)) {
155                 ret = PTR_ERR(di);
156                 goto err;
157         }
158         if (!di) {
159                 ret = -ENOENT;
160                 goto err;
161         }
162         ret = btrfs_delete_one_dir_name(trans, root, path, di);
163         BUG_ON(ret);
164
165         dentry->d_inode->i_ctime = dir->i_ctime;
166 err:
167         btrfs_free_path(path);
168         if (ret == 0) {
169                 inode_dec_link_count(dentry->d_inode);
170                 dir->i_size -= name_len * 2;
171                 mark_inode_dirty(dir);
172         }
173         return ret;
174 }
175
176 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
177 {
178         struct btrfs_root *root;
179         struct btrfs_trans_handle *trans;
180         int ret;
181
182         root = BTRFS_I(dir)->root;
183         mutex_lock(&root->fs_info->fs_mutex);
184         trans = btrfs_start_transaction(root, 1);
185         ret = btrfs_unlink_trans(trans, root, dir, dentry);
186         btrfs_end_transaction(trans, root);
187         mutex_unlock(&root->fs_info->fs_mutex);
188         return ret;
189 }
190
191 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
192 {
193         struct inode *inode = dentry->d_inode;
194         int err;
195         int ret;
196         struct btrfs_root *root = BTRFS_I(dir)->root;
197         struct btrfs_path *path;
198         struct btrfs_key key;
199         struct btrfs_trans_handle *trans;
200         struct btrfs_key found_key;
201         int found_type;
202         struct btrfs_leaf *leaf;
203         char *goodnames = "..";
204
205         path = btrfs_alloc_path();
206         BUG_ON(!path);
207         btrfs_init_path(path);
208         mutex_lock(&root->fs_info->fs_mutex);
209         trans = btrfs_start_transaction(root, 1);
210         key.objectid = inode->i_ino;
211         key.offset = (u64)-1;
212         key.flags = (u32)-1;
213         while(1) {
214                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
215                 if (ret < 0) {
216                         err = ret;
217                         goto out;
218                 }
219                 BUG_ON(ret == 0);
220                 if (path->slots[0] == 0) {
221                         err = -ENOENT;
222                         goto out;
223                 }
224                 path->slots[0]--;
225                 leaf = btrfs_buffer_leaf(path->nodes[0]);
226                 btrfs_disk_key_to_cpu(&found_key,
227                                       &leaf->items[path->slots[0]].key);
228                 found_type = btrfs_key_type(&found_key);
229                 if (found_key.objectid != inode->i_ino) {
230                         err = -ENOENT;
231                         goto out;
232                 }
233                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
234                      found_type != BTRFS_DIR_INDEX_KEY) ||
235                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
236                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
237                         err = -ENOTEMPTY;
238                         goto out;
239                 }
240                 ret = btrfs_del_item(trans, root, path);
241                 BUG_ON(ret);
242
243                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
244                         break;
245                 btrfs_release_path(root, path);
246         }
247         ret = 0;
248         btrfs_release_path(root, path);
249
250         /* now the directory is empty */
251         err = btrfs_unlink_trans(trans, root, dir, dentry);
252         if (!err) {
253                 inode->i_size = 0;
254         }
255 out:
256         btrfs_release_path(root, path);
257         btrfs_free_path(path);
258         mutex_unlock(&root->fs_info->fs_mutex);
259         ret = btrfs_end_transaction(trans, root);
260         if (ret && !err)
261                 err = ret;
262         return err;
263 }
264
265 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
266                             struct btrfs_root *root,
267                             struct inode *inode)
268 {
269         struct btrfs_path *path;
270         int ret;
271
272         clear_inode(inode);
273
274         path = btrfs_alloc_path();
275         BUG_ON(!path);
276         btrfs_init_path(path);
277         ret = btrfs_lookup_inode(trans, root, path,
278                                  &BTRFS_I(inode)->location, -1);
279         BUG_ON(ret);
280         ret = btrfs_del_item(trans, root, path);
281         BUG_ON(ret);
282         btrfs_free_path(path);
283         return ret;
284 }
285
286 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
287                                    struct btrfs_root *root,
288                                    struct inode *inode)
289 {
290         int ret;
291         struct btrfs_path *path;
292         struct btrfs_key key;
293         struct btrfs_disk_key *found_key;
294         struct btrfs_leaf *leaf;
295         struct btrfs_file_extent_item *fi = NULL;
296         u64 extent_start = 0;
297         u64 extent_num_blocks = 0;
298         int found_extent;
299
300         path = btrfs_alloc_path();
301         BUG_ON(!path);
302         /* FIXME, add redo link to tree so we don't leak on crash */
303         key.objectid = inode->i_ino;
304         key.offset = (u64)-1;
305         key.flags = 0;
306         /*
307          * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
308          * or extent data
309          */
310         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
311         while(1) {
312                 btrfs_init_path(path);
313                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
314                 if (ret < 0) {
315                         goto error;
316                 }
317                 if (ret > 0) {
318                         BUG_ON(path->slots[0] == 0);
319                         path->slots[0]--;
320                 }
321                 leaf = btrfs_buffer_leaf(path->nodes[0]);
322                 found_key = &leaf->items[path->slots[0]].key;
323                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
324                         break;
325                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
326                     btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY &&
327                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
328                         break;
329                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
330                         break;
331                 found_extent = 0;
332                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
333                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
334                                             path->slots[0],
335                                             struct btrfs_file_extent_item);
336                         if (btrfs_file_extent_type(fi) !=
337                             BTRFS_FILE_EXTENT_INLINE) {
338                                 extent_start =
339                                         btrfs_file_extent_disk_blocknr(fi);
340                                 extent_num_blocks =
341                                         btrfs_file_extent_disk_num_blocks(fi);
342                                 /* FIXME blocksize != 4096 */
343                                 inode->i_blocks -=
344                                         btrfs_file_extent_num_blocks(fi) << 3;
345                                 found_extent = 1;
346                         }
347                 }
348                 ret = btrfs_del_item(trans, root, path);
349                 BUG_ON(ret);
350                 btrfs_release_path(root, path);
351                 if (found_extent) {
352                         ret = btrfs_free_extent(trans, root, extent_start,
353                                                 extent_num_blocks, 0);
354                         BUG_ON(ret);
355                 }
356         }
357         ret = 0;
358 error:
359         btrfs_release_path(root, path);
360         btrfs_free_path(path);
361         return ret;
362 }
363
364 static void btrfs_delete_inode(struct inode *inode)
365 {
366         struct btrfs_trans_handle *trans;
367         struct btrfs_root *root = BTRFS_I(inode)->root;
368         int ret;
369
370         truncate_inode_pages(&inode->i_data, 0);
371         if (is_bad_inode(inode)) {
372                 goto no_delete;
373         }
374         inode->i_size = 0;
375         mutex_lock(&root->fs_info->fs_mutex);
376         trans = btrfs_start_transaction(root, 1);
377         if (S_ISREG(inode->i_mode)) {
378                 ret = btrfs_truncate_in_trans(trans, root, inode);
379                 BUG_ON(ret);
380         }
381         btrfs_free_inode(trans, root, inode);
382         btrfs_end_transaction(trans, root);
383         mutex_unlock(&root->fs_info->fs_mutex);
384         return;
385 no_delete:
386         clear_inode(inode);
387 }
388
389 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
390                                struct btrfs_key *location)
391 {
392         const char *name = dentry->d_name.name;
393         int namelen = dentry->d_name.len;
394         struct btrfs_dir_item *di;
395         struct btrfs_path *path;
396         struct btrfs_root *root = BTRFS_I(dir)->root;
397         int ret;
398
399         path = btrfs_alloc_path();
400         BUG_ON(!path);
401         btrfs_init_path(path);
402         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
403                                     namelen, 0);
404         if (!di || IS_ERR(di)) {
405                 location->objectid = 0;
406                 ret = 0;
407                 goto out;
408         }
409         btrfs_disk_key_to_cpu(location, &di->location);
410 out:
411         btrfs_release_path(root, path);
412         btrfs_free_path(path);
413         return ret;
414 }
415
416 int fixup_tree_root_location(struct btrfs_root *root,
417                              struct btrfs_key *location,
418                              struct btrfs_root **sub_root)
419 {
420         struct btrfs_path *path;
421         struct btrfs_root_item *ri;
422
423         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
424                 return 0;
425         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
426                 return 0;
427
428         path = btrfs_alloc_path();
429         BUG_ON(!path);
430         mutex_lock(&root->fs_info->fs_mutex);
431
432         *sub_root = btrfs_read_fs_root(root->fs_info, location);
433         if (IS_ERR(*sub_root))
434                 return PTR_ERR(*sub_root);
435
436         ri = &(*sub_root)->root_item;
437         location->objectid = btrfs_root_dirid(ri);
438         location->flags = 0;
439         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
440         location->offset = 0;
441
442         btrfs_free_path(path);
443         mutex_unlock(&root->fs_info->fs_mutex);
444         return 0;
445 }
446
447 int btrfs_init_locked_inode(struct inode *inode, void *p)
448 {
449         struct btrfs_iget_args *args = p;
450         inode->i_ino = args->ino;
451         BTRFS_I(inode)->root = args->root;
452         return 0;
453 }
454
455 int btrfs_find_actor(struct inode *inode, void *opaque)
456 {
457         struct btrfs_iget_args *args = opaque;
458         return (args->ino == inode->i_ino &&
459                 args->root == BTRFS_I(inode)->root);
460 }
461
462 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
463                                 struct btrfs_root *root)
464 {
465         struct inode *inode;
466         struct btrfs_iget_args args;
467         args.ino = objectid;
468         args.root = root;
469
470         inode = iget5_locked(s, objectid, btrfs_find_actor,
471                              btrfs_init_locked_inode,
472                              (void *)&args);
473         return inode;
474 }
475
476 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
477                                    struct nameidata *nd)
478 {
479         struct inode * inode;
480         struct btrfs_inode *bi = BTRFS_I(dir);
481         struct btrfs_root *root = bi->root;
482         struct btrfs_root *sub_root = root;
483         struct btrfs_key location;
484         int ret;
485
486         if (dentry->d_name.len > BTRFS_NAME_LEN)
487                 return ERR_PTR(-ENAMETOOLONG);
488         mutex_lock(&root->fs_info->fs_mutex);
489         ret = btrfs_inode_by_name(dir, dentry, &location);
490         mutex_unlock(&root->fs_info->fs_mutex);
491         if (ret < 0)
492                 return ERR_PTR(ret);
493         inode = NULL;
494         if (location.objectid) {
495                 ret = fixup_tree_root_location(root, &location, &sub_root);
496                 if (ret < 0)
497                         return ERR_PTR(ret);
498                 if (ret > 0)
499                         return ERR_PTR(-ENOENT);
500                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
501                                           sub_root);
502                 if (!inode)
503                         return ERR_PTR(-EACCES);
504                 if (inode->i_state & I_NEW) {
505                         if (sub_root != root) {
506 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
507                                 igrab(inode);
508                                 sub_root->inode = inode;
509                         }
510                         BTRFS_I(inode)->root = sub_root;
511                         memcpy(&BTRFS_I(inode)->location, &location,
512                                sizeof(location));
513                         btrfs_read_locked_inode(inode);
514                         unlock_new_inode(inode);
515                 }
516         }
517         return d_splice_alias(inode, dentry);
518 }
519
520 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
521 {
522         struct inode *inode = filp->f_path.dentry->d_inode;
523         struct btrfs_root *root = BTRFS_I(inode)->root;
524         struct btrfs_item *item;
525         struct btrfs_dir_item *di;
526         struct btrfs_key key;
527         struct btrfs_path *path;
528         int ret;
529         u32 nritems;
530         struct btrfs_leaf *leaf;
531         int slot;
532         int advance;
533         unsigned char d_type = DT_UNKNOWN;
534         int over = 0;
535         u32 di_cur;
536         u32 di_total;
537         u32 di_len;
538         int key_type = BTRFS_DIR_INDEX_KEY;
539
540         /* FIXME, use a real flag for deciding about the key type */
541         if (root->fs_info->tree_root == root)
542                 key_type = BTRFS_DIR_ITEM_KEY;
543         mutex_lock(&root->fs_info->fs_mutex);
544         key.objectid = inode->i_ino;
545         key.flags = 0;
546         btrfs_set_key_type(&key, key_type);
547         key.offset = filp->f_pos;
548         path = btrfs_alloc_path();
549         btrfs_init_path(path);
550         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
551         if (ret < 0)
552                 goto err;
553         advance = 0;
554         while(1) {
555                 leaf = btrfs_buffer_leaf(path->nodes[0]);
556                 nritems = btrfs_header_nritems(&leaf->header);
557                 slot = path->slots[0];
558                 if (advance || slot >= nritems) {
559                         if (slot >= nritems -1) {
560                                 ret = btrfs_next_leaf(root, path);
561                                 if (ret)
562                                         break;
563                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
564                                 nritems = btrfs_header_nritems(&leaf->header);
565                                 slot = path->slots[0];
566                         } else {
567                                 slot++;
568                                 path->slots[0]++;
569                         }
570                 }
571                 advance = 1;
572                 item = leaf->items + slot;
573                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
574                         break;
575                 if (btrfs_disk_key_type(&item->key) != key_type)
576                         break;
577                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
578                         continue;
579                 filp->f_pos = btrfs_disk_key_offset(&item->key);
580                 advance = 1;
581                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
582                 di_cur = 0;
583                 di_total = btrfs_item_size(leaf->items + slot);
584                 while(di_cur < di_total) {
585                         over = filldir(dirent, (const char *)(di + 1),
586                                        btrfs_dir_name_len(di),
587                                        btrfs_disk_key_offset(&item->key),
588                                        btrfs_disk_key_objectid(&di->location),
589                                        d_type);
590                         if (over)
591                                 goto nopos;
592                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
593                         di_cur += di_len;
594                         di = (struct btrfs_dir_item *)((char *)di + di_len);
595                 }
596         }
597         filp->f_pos++;
598 nopos:
599         ret = 0;
600 err:
601         btrfs_release_path(root, path);
602         btrfs_free_path(path);
603         mutex_unlock(&root->fs_info->fs_mutex);
604         return ret;
605 }
606
607 static void btrfs_put_super (struct super_block * sb)
608 {
609         struct btrfs_root *root = btrfs_sb(sb);
610         int ret;
611
612         ret = close_ctree(root);
613         if (ret) {
614                 printk("close ctree returns %d\n", ret);
615         }
616         sb->s_fs_info = NULL;
617 }
618
619 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
620 {
621         struct inode * inode;
622         struct dentry * root_dentry;
623         struct btrfs_super_block *disk_super;
624         struct btrfs_root *tree_root;
625         struct btrfs_inode *bi;
626
627         sb->s_maxbytes = MAX_LFS_FILESIZE;
628         sb->s_magic = BTRFS_SUPER_MAGIC;
629         sb->s_op = &btrfs_super_ops;
630         sb->s_time_gran = 1;
631
632         tree_root = open_ctree(sb);
633
634         if (!tree_root) {
635                 printk("btrfs: open_ctree failed\n");
636                 return -EIO;
637         }
638         sb->s_fs_info = tree_root;
639         disk_super = tree_root->fs_info->disk_super;
640         printk("read in super total blocks %Lu root %Lu\n",
641                btrfs_super_total_blocks(disk_super),
642                btrfs_super_root_dir(disk_super));
643
644         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
645                                   tree_root);
646         bi = BTRFS_I(inode);
647         bi->location.objectid = inode->i_ino;
648         bi->location.offset = 0;
649         bi->location.flags = 0;
650         bi->root = tree_root;
651         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
652
653         if (!inode)
654                 return -ENOMEM;
655         if (inode->i_state & I_NEW) {
656                 btrfs_read_locked_inode(inode);
657                 unlock_new_inode(inode);
658         }
659
660         root_dentry = d_alloc_root(inode);
661         if (!root_dentry) {
662                 iput(inode);
663                 return -ENOMEM;
664         }
665         sb->s_root = root_dentry;
666
667         return 0;
668 }
669
670 static void fill_inode_item(struct btrfs_inode_item *item,
671                             struct inode *inode)
672 {
673         btrfs_set_inode_uid(item, inode->i_uid);
674         btrfs_set_inode_gid(item, inode->i_gid);
675         btrfs_set_inode_size(item, inode->i_size);
676         btrfs_set_inode_mode(item, inode->i_mode);
677         btrfs_set_inode_nlink(item, inode->i_nlink);
678         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
679         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
680         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
681         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
682         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
683         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
684         btrfs_set_inode_nblocks(item, inode->i_blocks);
685         btrfs_set_inode_generation(item, inode->i_generation);
686 }
687
688 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
689                               struct btrfs_root *root,
690                               struct inode *inode)
691 {
692         struct btrfs_inode_item *inode_item;
693         struct btrfs_path *path;
694         int ret;
695
696         path = btrfs_alloc_path();
697         BUG_ON(!path);
698         btrfs_init_path(path);
699         ret = btrfs_lookup_inode(trans, root, path,
700                                  &BTRFS_I(inode)->location, 1);
701         if (ret) {
702                 if (ret > 0)
703                         ret = -ENOENT;
704                 goto failed;
705         }
706
707         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
708                                   path->slots[0],
709                                   struct btrfs_inode_item);
710
711         fill_inode_item(inode_item, inode);
712         btrfs_mark_buffer_dirty(path->nodes[0]);
713         ret = 0;
714 failed:
715         btrfs_release_path(root, path);
716         btrfs_free_path(path);
717         return ret;
718 }
719
720 static int btrfs_write_inode(struct inode *inode, int wait)
721 {
722         struct btrfs_root *root = BTRFS_I(inode)->root;
723         struct btrfs_trans_handle *trans;
724         int ret;
725
726         mutex_lock(&root->fs_info->fs_mutex);
727         trans = btrfs_start_transaction(root, 1);
728         ret = btrfs_update_inode(trans, root, inode);
729         if (wait)
730                 btrfs_commit_transaction(trans, root);
731         else
732                 btrfs_end_transaction(trans, root);
733         mutex_unlock(&root->fs_info->fs_mutex);
734         return ret;
735 }
736
737 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
738                                      struct btrfs_root *root,
739                                      u64 objectid, int mode)
740 {
741         struct inode *inode;
742         struct btrfs_inode_item inode_item;
743         struct btrfs_key *location;
744         int ret;
745
746         inode = new_inode(root->fs_info->sb);
747         if (!inode)
748                 return ERR_PTR(-ENOMEM);
749
750         BTRFS_I(inode)->root = root;
751
752         inode->i_uid = current->fsuid;
753         inode->i_gid = current->fsgid;
754         inode->i_mode = mode;
755         inode->i_ino = objectid;
756         inode->i_blocks = 0;
757         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
758         fill_inode_item(&inode_item, inode);
759         location = &BTRFS_I(inode)->location;
760         location->objectid = objectid;
761         location->flags = 0;
762         location->offset = 0;
763         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
764
765         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
766         BUG_ON(ret);
767
768         insert_inode_hash(inode);
769         return inode;
770 }
771
772 static int btrfs_add_link(struct btrfs_trans_handle *trans,
773                             struct dentry *dentry, struct inode *inode)
774 {
775         int ret;
776         struct btrfs_key key;
777         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
778         key.objectid = inode->i_ino;
779         key.flags = 0;
780         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
781         key.offset = 0;
782
783         ret = btrfs_insert_dir_item(trans, root,
784                                     dentry->d_name.name, dentry->d_name.len,
785                                     dentry->d_parent->d_inode->i_ino,
786                                     &key, 0);
787         if (ret == 0) {
788                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
789                 ret = btrfs_update_inode(trans, root,
790                                          dentry->d_parent->d_inode);
791         }
792         return ret;
793 }
794
795 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
796                             struct dentry *dentry, struct inode *inode)
797 {
798         int err = btrfs_add_link(trans, dentry, inode);
799         if (!err) {
800                 d_instantiate(dentry, inode);
801                 return 0;
802         }
803         if (err > 0)
804                 err = -EEXIST;
805         return err;
806 }
807
808 static int btrfs_create(struct inode *dir, struct dentry *dentry,
809                         int mode, struct nameidata *nd)
810 {
811         struct btrfs_trans_handle *trans;
812         struct btrfs_root *root = BTRFS_I(dir)->root;
813         struct inode *inode;
814         int err;
815         int drop_inode = 0;
816         u64 objectid;
817
818         mutex_lock(&root->fs_info->fs_mutex);
819         trans = btrfs_start_transaction(root, 1);
820
821         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
822         if (err) {
823                 err = -ENOSPC;
824                 goto out_unlock;
825         }
826
827         inode = btrfs_new_inode(trans, root, objectid, mode);
828         err = PTR_ERR(inode);
829         if (IS_ERR(inode))
830                 goto out_unlock;
831         // FIXME mark the inode dirty
832         err = btrfs_add_nondir(trans, dentry, inode);
833         if (err)
834                 drop_inode = 1;
835         else {
836                 inode->i_mapping->a_ops = &btrfs_aops;
837                 inode->i_fop = &btrfs_file_operations;
838                 inode->i_op = &btrfs_file_inode_operations;
839         }
840         dir->i_sb->s_dirt = 1;
841 out_unlock:
842         btrfs_end_transaction(trans, root);
843         mutex_unlock(&root->fs_info->fs_mutex);
844
845         if (drop_inode) {
846                 inode_dec_link_count(inode);
847                 iput(inode);
848         }
849         return err;
850 }
851
852 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
853                                 struct btrfs_root *root,
854                                 u64 objectid, u64 dirid)
855 {
856         int ret;
857         char buf[2];
858         struct btrfs_key key;
859
860         buf[0] = '.';
861         buf[1] = '.';
862
863         key.objectid = objectid;
864         key.offset = 0;
865         key.flags = 0;
866         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
867
868         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
869                                     &key, 1);
870         if (ret)
871                 goto error;
872         key.objectid = dirid;
873         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
874                                     &key, 1);
875         if (ret)
876                 goto error;
877 error:
878         return ret;
879 }
880
881 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
882 {
883         struct inode *inode;
884         struct btrfs_trans_handle *trans;
885         struct btrfs_root *root = BTRFS_I(dir)->root;
886         int err = 0;
887         int drop_on_err = 0;
888         u64 objectid;
889
890         mutex_lock(&root->fs_info->fs_mutex);
891         trans = btrfs_start_transaction(root, 1);
892         if (IS_ERR(trans)) {
893                 err = PTR_ERR(trans);
894                 goto out_unlock;
895         }
896
897         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
898         if (err) {
899                 err = -ENOSPC;
900                 goto out_unlock;
901         }
902
903         inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode);
904         if (IS_ERR(inode)) {
905                 err = PTR_ERR(inode);
906                 goto out_fail;
907         }
908         drop_on_err = 1;
909         inode->i_op = &btrfs_dir_inode_operations;
910         inode->i_fop = &btrfs_dir_file_operations;
911
912         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
913         if (err)
914                 goto out_fail;
915
916         inode->i_size = 6;
917         err = btrfs_update_inode(trans, root, inode);
918         if (err)
919                 goto out_fail;
920         err = btrfs_add_link(trans, dentry, inode);
921         if (err)
922                 goto out_fail;
923         d_instantiate(dentry, inode);
924         drop_on_err = 0;
925
926 out_fail:
927         btrfs_end_transaction(trans, root);
928 out_unlock:
929         mutex_unlock(&root->fs_info->fs_mutex);
930         if (drop_on_err)
931                 iput(inode);
932         return err;
933 }
934
935 static int btrfs_sync_fs(struct super_block *sb, int wait)
936 {
937         struct btrfs_trans_handle *trans;
938         struct btrfs_root *root;
939         int ret;
940         root = btrfs_sb(sb);
941
942         sb->s_dirt = 0;
943         if (!wait) {
944                 filemap_flush(root->fs_info->btree_inode->i_mapping);
945                 return 0;
946         }
947         filemap_write_and_wait(root->fs_info->btree_inode->i_mapping);
948         mutex_lock(&root->fs_info->fs_mutex);
949         trans = btrfs_start_transaction(root, 1);
950         ret = btrfs_commit_transaction(trans, root);
951         sb->s_dirt = 0;
952         BUG_ON(ret);
953 printk("btrfs sync_fs\n");
954         mutex_unlock(&root->fs_info->fs_mutex);
955         return 0;
956 }
957
958 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
959                            struct buffer_head *result, int create)
960 {
961         int ret;
962         int err = 0;
963         u64 blocknr;
964         u64 extent_start = 0;
965         u64 extent_end = 0;
966         u64 objectid = inode->i_ino;
967         u32 found_type;
968         struct btrfs_path *path;
969         struct btrfs_root *root = BTRFS_I(inode)->root;
970         struct btrfs_file_extent_item *item;
971         struct btrfs_leaf *leaf;
972         struct btrfs_disk_key *found_key;
973
974         path = btrfs_alloc_path();
975         BUG_ON(!path);
976         btrfs_init_path(path);
977         if (create) {
978                 WARN_ON(1);
979         }
980
981         ret = btrfs_lookup_file_extent(NULL, root, path,
982                                        inode->i_ino,
983                                        iblock << inode->i_blkbits, 0);
984         if (ret < 0) {
985                 err = ret;
986                 goto out;
987         }
988
989         if (ret != 0) {
990                 if (path->slots[0] == 0) {
991                         btrfs_release_path(root, path);
992                         goto out;
993                 }
994                 path->slots[0]--;
995         }
996
997         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
998                               struct btrfs_file_extent_item);
999         leaf = btrfs_buffer_leaf(path->nodes[0]);
1000         blocknr = btrfs_file_extent_disk_blocknr(item);
1001         blocknr += btrfs_file_extent_offset(item);
1002
1003         /* are we inside the extent that was found? */
1004         found_key = &leaf->items[path->slots[0]].key;
1005         found_type = btrfs_disk_key_type(found_key);
1006         if (btrfs_disk_key_objectid(found_key) != objectid ||
1007             found_type != BTRFS_EXTENT_DATA_KEY) {
1008                 extent_end = 0;
1009                 extent_start = 0;
1010                 btrfs_release_path(root, path);
1011                 goto out;
1012         }
1013         found_type = btrfs_file_extent_type(item);
1014         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1015         if (found_type == BTRFS_FILE_EXTENT_REG) {
1016                 extent_start = extent_start >> inode->i_blkbits;
1017                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1018                 if (iblock >= extent_start && iblock < extent_end) {
1019                         err = 0;
1020                         btrfs_map_bh_to_logical(root, result, blocknr +
1021                                                 iblock - extent_start);
1022                         goto out;
1023                 }
1024         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1025                 char *ptr;
1026                 char *map;
1027                 u32 size;
1028                 size = btrfs_file_extent_inline_len(leaf->items +
1029                                                     path->slots[0]);
1030                 extent_end = (extent_start + size) >> inode->i_blkbits;
1031                 extent_start >>= inode->i_blkbits;
1032                 if (iblock < extent_start || iblock > extent_end) {
1033                         goto out;
1034                 }
1035                 ptr = btrfs_file_extent_inline_start(item);
1036                 map = kmap(result->b_page);
1037                 memcpy(map, ptr, size);
1038                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1039                 flush_dcache_page(result->b_page);
1040                 kunmap(result->b_page);
1041                 set_buffer_uptodate(result);
1042                 SetPageChecked(result->b_page);
1043                 btrfs_map_bh_to_logical(root, result, 0);
1044         }
1045 out:
1046         btrfs_release_path(root, path);
1047         btrfs_free_path(path);
1048         return err;
1049 }
1050
1051 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1052                            struct buffer_head *result, int create)
1053 {
1054         int err;
1055         struct btrfs_root *root = BTRFS_I(inode)->root;
1056         mutex_lock(&root->fs_info->fs_mutex);
1057         err = btrfs_get_block_lock(inode, iblock, result, create);
1058         mutex_unlock(&root->fs_info->fs_mutex);
1059         return err;
1060 }
1061
1062 static int btrfs_prepare_write(struct file *file, struct page *page,
1063                                unsigned from, unsigned to)
1064 {
1065         return nobh_prepare_write(page, from, to, btrfs_get_block);
1066 }
1067
1068 static void btrfs_write_super(struct super_block *sb)
1069 {
1070         btrfs_sync_fs(sb, 1);
1071 }
1072
1073 static int btrfs_readpage(struct file *file, struct page *page)
1074 {
1075         return mpage_readpage(page, btrfs_get_block);
1076 }
1077
1078 /*
1079  * While block_write_full_page is writing back the dirty buffers under
1080  * the page lock, whoever dirtied the buffers may decide to clean them
1081  * again at any time.  We handle that by only looking at the buffer
1082  * state inside lock_buffer().
1083  *
1084  * If block_write_full_page() is called for regular writeback
1085  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1086  * locked buffer.   This only can happen if someone has written the buffer
1087  * directly, with submit_bh().  At the address_space level PageWriteback
1088  * prevents this contention from occurring.
1089  */
1090 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1091                                    struct writeback_control *wbc)
1092 {
1093         int err;
1094         sector_t block;
1095         sector_t last_block;
1096         struct buffer_head *bh, *head;
1097         const unsigned blocksize = 1 << inode->i_blkbits;
1098         int nr_underway = 0;
1099
1100         BUG_ON(!PageLocked(page));
1101
1102         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1103
1104         if (!page_has_buffers(page)) {
1105                 create_empty_buffers(page, blocksize,
1106                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1107         }
1108
1109         /*
1110          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1111          * here, and the (potentially unmapped) buffers may become dirty at
1112          * any time.  If a buffer becomes dirty here after we've inspected it
1113          * then we just miss that fact, and the page stays dirty.
1114          *
1115          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1116          * handle that here by just cleaning them.
1117          */
1118
1119         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1120         head = page_buffers(page);
1121         bh = head;
1122
1123         /*
1124          * Get all the dirty buffers mapped to disk addresses and
1125          * handle any aliases from the underlying blockdev's mapping.
1126          */
1127         do {
1128                 if (block > last_block) {
1129                         /*
1130                          * mapped buffers outside i_size will occur, because
1131                          * this page can be outside i_size when there is a
1132                          * truncate in progress.
1133                          */
1134                         /*
1135                          * The buffer was zeroed by block_write_full_page()
1136                          */
1137                         clear_buffer_dirty(bh);
1138                         set_buffer_uptodate(bh);
1139                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1140                         WARN_ON(bh->b_size != blocksize);
1141                         err = btrfs_get_block(inode, block, bh, 0);
1142                         if (err)
1143                                 goto recover;
1144                         if (buffer_new(bh)) {
1145                                 /* blockdev mappings never come here */
1146                                 clear_buffer_new(bh);
1147                                 unmap_underlying_metadata(bh->b_bdev,
1148                                                         bh->b_blocknr);
1149                         }
1150                 }
1151                 bh = bh->b_this_page;
1152                 block++;
1153         } while (bh != head);
1154
1155         do {
1156                 if (!buffer_mapped(bh))
1157                         continue;
1158                 /*
1159                  * If it's a fully non-blocking write attempt and we cannot
1160                  * lock the buffer then redirty the page.  Note that this can
1161                  * potentially cause a busy-wait loop from pdflush and kswapd
1162                  * activity, but those code paths have their own higher-level
1163                  * throttling.
1164                  */
1165                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1166                         lock_buffer(bh);
1167                 } else if (test_set_buffer_locked(bh)) {
1168                         redirty_page_for_writepage(wbc, page);
1169                         continue;
1170                 }
1171                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1172                         mark_buffer_async_write(bh);
1173                 } else {
1174                         unlock_buffer(bh);
1175                 }
1176         } while ((bh = bh->b_this_page) != head);
1177
1178         /*
1179          * The page and its buffers are protected by PageWriteback(), so we can
1180          * drop the bh refcounts early.
1181          */
1182         BUG_ON(PageWriteback(page));
1183         set_page_writeback(page);
1184
1185         do {
1186                 struct buffer_head *next = bh->b_this_page;
1187                 if (buffer_async_write(bh)) {
1188                         submit_bh(WRITE, bh);
1189                         nr_underway++;
1190                 }
1191                 bh = next;
1192         } while (bh != head);
1193         unlock_page(page);
1194
1195         err = 0;
1196 done:
1197         if (nr_underway == 0) {
1198                 /*
1199                  * The page was marked dirty, but the buffers were
1200                  * clean.  Someone wrote them back by hand with
1201                  * ll_rw_block/submit_bh.  A rare case.
1202                  */
1203                 int uptodate = 1;
1204                 do {
1205                         if (!buffer_uptodate(bh)) {
1206                                 uptodate = 0;
1207                                 break;
1208                         }
1209                         bh = bh->b_this_page;
1210                 } while (bh != head);
1211                 if (uptodate)
1212                         SetPageUptodate(page);
1213                 end_page_writeback(page);
1214                 /*
1215                  * The page and buffer_heads can be released at any time from
1216                  * here on.
1217                  */
1218                 wbc->pages_skipped++;   /* We didn't write this page */
1219         }
1220         return err;
1221
1222 recover:
1223         /*
1224          * ENOSPC, or some other error.  We may already have added some
1225          * blocks to the file, so we need to write these out to avoid
1226          * exposing stale data.
1227          * The page is currently locked and not marked for writeback
1228          */
1229         bh = head;
1230         /* Recovery: lock and submit the mapped buffers */
1231         do {
1232                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1233                         lock_buffer(bh);
1234                         mark_buffer_async_write(bh);
1235                 } else {
1236                         /*
1237                          * The buffer may have been set dirty during
1238                          * attachment to a dirty page.
1239                          */
1240                         clear_buffer_dirty(bh);
1241                 }
1242         } while ((bh = bh->b_this_page) != head);
1243         SetPageError(page);
1244         BUG_ON(PageWriteback(page));
1245         set_page_writeback(page);
1246         do {
1247                 struct buffer_head *next = bh->b_this_page;
1248                 if (buffer_async_write(bh)) {
1249                         clear_buffer_dirty(bh);
1250                         submit_bh(WRITE, bh);
1251                         nr_underway++;
1252                 }
1253                 bh = next;
1254         } while (bh != head);
1255         unlock_page(page);
1256         goto done;
1257 }
1258
1259 /*
1260  * The generic ->writepage function for buffer-backed address_spaces
1261  */
1262 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1263 {
1264         struct inode * const inode = page->mapping->host;
1265         loff_t i_size = i_size_read(inode);
1266         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1267         unsigned offset;
1268         void *kaddr;
1269
1270         /* Is the page fully inside i_size? */
1271         if (page->index < end_index)
1272                 return __btrfs_write_full_page(inode, page, wbc);
1273
1274         /* Is the page fully outside i_size? (truncate in progress) */
1275         offset = i_size & (PAGE_CACHE_SIZE-1);
1276         if (page->index >= end_index+1 || !offset) {
1277                 /*
1278                  * The page may have dirty, unmapped buffers.  For example,
1279                  * they may have been added in ext3_writepage().  Make them
1280                  * freeable here, so the page does not leak.
1281                  */
1282                 block_invalidatepage(page, 0);
1283                 unlock_page(page);
1284                 return 0; /* don't care */
1285         }
1286
1287         /*
1288          * The page straddles i_size.  It must be zeroed out on each and every
1289          * writepage invokation because it may be mmapped.  "A file is mapped
1290          * in multiples of the page size.  For a file that is not a multiple of
1291          * the  page size, the remaining memory is zeroed when mapped, and
1292          * writes to that region are not written out to the file."
1293          */
1294         kaddr = kmap_atomic(page, KM_USER0);
1295         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1296         flush_dcache_page(page);
1297         kunmap_atomic(kaddr, KM_USER0);
1298         return __btrfs_write_full_page(inode, page, wbc);
1299 }
1300
1301 static void btrfs_truncate(struct inode *inode)
1302 {
1303         struct btrfs_root *root = BTRFS_I(inode)->root;
1304         int ret;
1305         struct btrfs_trans_handle *trans;
1306
1307         if (!S_ISREG(inode->i_mode))
1308                 return;
1309         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1310                 return;
1311
1312         nobh_truncate_page(inode->i_mapping, inode->i_size);
1313
1314         /* FIXME, add redo link to tree so we don't leak on crash */
1315         mutex_lock(&root->fs_info->fs_mutex);
1316         trans = btrfs_start_transaction(root, 1);
1317         ret = btrfs_truncate_in_trans(trans, root, inode);
1318         BUG_ON(ret);
1319         ret = btrfs_end_transaction(trans, root);
1320         BUG_ON(ret);
1321         mutex_unlock(&root->fs_info->fs_mutex);
1322         mark_inode_dirty(inode);
1323 }
1324
1325 /*
1326  * Make sure any changes to nobh_commit_write() are reflected in
1327  * nobh_truncate_page(), since it doesn't call commit_write().
1328  */
1329 static int btrfs_commit_write(struct file *file, struct page *page,
1330                               unsigned from, unsigned to)
1331 {
1332         struct inode *inode = page->mapping->host;
1333         struct buffer_head *bh;
1334         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1335
1336         SetPageUptodate(page);
1337         bh = page_buffers(page);
1338         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1339                 set_page_dirty(page);
1340         }
1341         if (pos > inode->i_size) {
1342                 i_size_write(inode, pos);
1343                 mark_inode_dirty(inode);
1344         }
1345         return 0;
1346 }
1347
1348 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1349                                 struct page **prepared_pages,
1350                                 const char __user * buf)
1351 {
1352         long page_fault = 0;
1353         int i;
1354         int offset = pos & (PAGE_CACHE_SIZE - 1);
1355
1356         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1357                 size_t count = min_t(size_t,
1358                                      PAGE_CACHE_SIZE - offset, write_bytes);
1359                 struct page *page = prepared_pages[i];
1360                 fault_in_pages_readable(buf, count);
1361
1362                 /* Copy data from userspace to the current page */
1363                 kmap(page);
1364                 page_fault = __copy_from_user(page_address(page) + offset,
1365                                               buf, count);
1366                 /* Flush processor's dcache for this page */
1367                 flush_dcache_page(page);
1368                 kunmap(page);
1369                 buf += count;
1370                 write_bytes -= count;
1371
1372                 if (page_fault)
1373                         break;
1374         }
1375         return page_fault ? -EFAULT : 0;
1376 }
1377
1378 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1379 {
1380         size_t i;
1381         for (i = 0; i < num_pages; i++) {
1382                 if (!pages[i])
1383                         break;
1384                 unlock_page(pages[i]);
1385                 mark_page_accessed(pages[i]);
1386                 page_cache_release(pages[i]);
1387         }
1388 }
1389 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1390                                    struct btrfs_root *root,
1391                                    struct file *file,
1392                                    struct page **pages,
1393                                    size_t num_pages,
1394                                    loff_t pos,
1395                                    size_t write_bytes)
1396 {
1397         int i;
1398         int offset;
1399         int err = 0;
1400         int ret;
1401         int this_write;
1402         struct inode *inode = file->f_path.dentry->d_inode;
1403         struct buffer_head *bh;
1404         struct btrfs_file_extent_item *ei;
1405
1406         for (i = 0; i < num_pages; i++) {
1407                 offset = pos & (PAGE_CACHE_SIZE -1);
1408                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1409                 /* FIXME, one block at a time */
1410
1411                 mutex_lock(&root->fs_info->fs_mutex);
1412                 trans = btrfs_start_transaction(root, 1);
1413
1414                 bh = page_buffers(pages[i]);
1415                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1416                         struct btrfs_key key;
1417                         struct btrfs_path *path;
1418                         char *ptr;
1419                         u32 datasize;
1420
1421                         path = btrfs_alloc_path();
1422                         BUG_ON(!path);
1423                         key.objectid = inode->i_ino;
1424                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1425                         key.flags = 0;
1426                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1427                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1428                         datasize = offset +
1429                                 btrfs_file_extent_calc_inline_size(write_bytes);
1430                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1431                                                       datasize);
1432                         BUG_ON(ret);
1433                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1434                                path->slots[0], struct btrfs_file_extent_item);
1435                         btrfs_set_file_extent_generation(ei, trans->transid);
1436                         btrfs_set_file_extent_type(ei,
1437                                                    BTRFS_FILE_EXTENT_INLINE);
1438                         ptr = btrfs_file_extent_inline_start(ei);
1439                         memcpy(ptr, bh->b_data, offset + write_bytes);
1440                         mark_buffer_dirty(path->nodes[0]);
1441                         btrfs_free_path(path);
1442                 } else {
1443                         btrfs_csum_file_block(trans, root, inode->i_ino,
1444                                       pages[i]->index << PAGE_CACHE_SHIFT,
1445                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1446                         kunmap(pages[i]);
1447                 }
1448                 SetPageChecked(pages[i]);
1449                 ret = btrfs_end_transaction(trans, root);
1450                 BUG_ON(ret);
1451                 mutex_unlock(&root->fs_info->fs_mutex);
1452
1453                 ret = btrfs_commit_write(file, pages[i], offset,
1454                                          offset + this_write);
1455                 pos += this_write;
1456                 if (ret) {
1457                         err = ret;
1458                         goto failed;
1459                 }
1460                 WARN_ON(this_write > write_bytes);
1461                 write_bytes -= this_write;
1462         }
1463 failed:
1464         return err;
1465 }
1466
1467 static int drop_extents(struct btrfs_trans_handle *trans,
1468                           struct btrfs_root *root,
1469                           struct inode *inode,
1470                           u64 start, u64 end)
1471 {
1472         int ret;
1473         struct btrfs_key key;
1474         struct btrfs_leaf *leaf;
1475         int slot;
1476         struct btrfs_file_extent_item *extent;
1477         u64 extent_end = 0;
1478         int keep;
1479         struct btrfs_file_extent_item old;
1480         struct btrfs_path *path;
1481         u64 search_start = start;
1482         int bookend;
1483         int found_type;
1484         int found_extent;
1485         int found_inline;
1486
1487         path = btrfs_alloc_path();
1488         if (!path)
1489                 return -ENOMEM;
1490         while(1) {
1491                 btrfs_release_path(root, path);
1492                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1493                                                search_start, -1);
1494                 if (ret < 0)
1495                         goto out;
1496                 if (ret > 0) {
1497                         if (path->slots[0] == 0) {
1498                                 ret = 0;
1499                                 goto out;
1500                         }
1501                         path->slots[0]--;
1502                 }
1503                 keep = 0;
1504                 bookend = 0;
1505                 found_extent = 0;
1506                 found_inline = 0;
1507                 extent = NULL;
1508                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1509                 slot = path->slots[0];
1510                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1511                 if (key.offset >= end || key.objectid != inode->i_ino) {
1512                         ret = 0;
1513                         goto out;
1514                 }
1515                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1516                         ret = 0;
1517                         goto out;
1518                 }
1519                 extent = btrfs_item_ptr(leaf, slot,
1520                                         struct btrfs_file_extent_item);
1521                 found_type = btrfs_file_extent_type(extent);
1522                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1523                         extent_end = key.offset +
1524                                 (btrfs_file_extent_num_blocks(extent) <<
1525                                  inode->i_blkbits);
1526                         found_extent = 1;
1527                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1528                         found_inline = 1;
1529                         extent_end = key.offset +
1530                              btrfs_file_extent_inline_len(leaf->items + slot);
1531                 }
1532
1533                 if (!found_extent && !found_inline) {
1534                         ret = 0;
1535                         goto out;
1536                 }
1537
1538                 if (search_start >= extent_end) {
1539                         ret = 0;
1540                         goto out;
1541                 }
1542
1543                 search_start = extent_end;
1544
1545                 if (end < extent_end && end >= key.offset) {
1546                         if (found_extent) {
1547                                 memcpy(&old, extent, sizeof(old));
1548                                 ret = btrfs_inc_extent_ref(trans, root,
1549                                       btrfs_file_extent_disk_blocknr(&old),
1550                                       btrfs_file_extent_disk_num_blocks(&old));
1551                                 BUG_ON(ret);
1552                         }
1553                         WARN_ON(found_inline);
1554                         bookend = 1;
1555                 }
1556
1557                 if (start > key.offset) {
1558                         u64 new_num;
1559                         u64 old_num;
1560                         /* truncate existing extent */
1561                         keep = 1;
1562                         WARN_ON(start & (root->blocksize - 1));
1563                         if (found_extent) {
1564                                 new_num = (start - key.offset) >>
1565                                         inode->i_blkbits;
1566                                 old_num = btrfs_file_extent_num_blocks(extent);
1567                                 inode->i_blocks -= (old_num - new_num) << 3;
1568                                 btrfs_set_file_extent_num_blocks(extent,
1569                                                                  new_num);
1570                                 mark_buffer_dirty(path->nodes[0]);
1571                         } else {
1572                                 WARN_ON(1);
1573                                 /*
1574                                 ret = btrfs_truncate_item(trans, root, path,
1575                                                           start - key.offset);
1576                                 BUG_ON(ret);
1577                                 */
1578                         }
1579                 }
1580                 if (!keep) {
1581                         u64 disk_blocknr = 0;
1582                         u64 disk_num_blocks = 0;
1583                         u64 extent_num_blocks = 0;
1584                         if (found_extent) {
1585                                 disk_blocknr =
1586                                       btrfs_file_extent_disk_blocknr(extent);
1587                                 disk_num_blocks =
1588                                       btrfs_file_extent_disk_num_blocks(extent);
1589                                 extent_num_blocks =
1590                                       btrfs_file_extent_num_blocks(extent);
1591                         }
1592                         ret = btrfs_del_item(trans, root, path);
1593                         BUG_ON(ret);
1594                         btrfs_release_path(root, path);
1595                         if (found_extent) {
1596                                 inode->i_blocks -=
1597                                 btrfs_file_extent_num_blocks(extent) << 3;
1598                                 ret = btrfs_free_extent(trans, root,
1599                                                         disk_blocknr,
1600                                                         disk_num_blocks, 0);
1601                         }
1602
1603                         BUG_ON(ret);
1604                         if (!bookend && search_start >= end) {
1605                                 ret = 0;
1606                                 goto out;
1607                         }
1608                         if (!bookend)
1609                                 continue;
1610                 }
1611                 if (bookend && found_extent) {
1612                         /* create bookend */
1613                         struct btrfs_key ins;
1614                         ins.objectid = inode->i_ino;
1615                         ins.offset = end;
1616                         ins.flags = 0;
1617                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1618
1619                         btrfs_release_path(root, path);
1620                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1621                                                       sizeof(*extent));
1622                         BUG_ON(ret);
1623                         extent = btrfs_item_ptr(
1624                                     btrfs_buffer_leaf(path->nodes[0]),
1625                                     path->slots[0],
1626                                     struct btrfs_file_extent_item);
1627                         btrfs_set_file_extent_disk_blocknr(extent,
1628                                     btrfs_file_extent_disk_blocknr(&old));
1629                         btrfs_set_file_extent_disk_num_blocks(extent,
1630                                     btrfs_file_extent_disk_num_blocks(&old));
1631
1632                         btrfs_set_file_extent_offset(extent,
1633                                     btrfs_file_extent_offset(&old) +
1634                                     ((end - key.offset) >> inode->i_blkbits));
1635                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1636                                 (end - key.offset) >> inode->i_blkbits);
1637                         btrfs_set_file_extent_num_blocks(extent,
1638                                     btrfs_file_extent_num_blocks(&old) -
1639                                     ((end - key.offset) >> inode->i_blkbits));
1640
1641                         btrfs_set_file_extent_type(extent,
1642                                                    BTRFS_FILE_EXTENT_REG);
1643                         btrfs_set_file_extent_generation(extent,
1644                                     btrfs_file_extent_generation(&old));
1645                         btrfs_mark_buffer_dirty(path->nodes[0]);
1646                         inode->i_blocks +=
1647                                 btrfs_file_extent_num_blocks(extent) << 3;
1648                         ret = 0;
1649                         goto out;
1650                 }
1651         }
1652 out:
1653         btrfs_free_path(path);
1654         return ret;
1655 }
1656
1657 static int prepare_pages(struct btrfs_root *root,
1658                          struct file *file,
1659                          struct page **pages,
1660                          size_t num_pages,
1661                          loff_t pos,
1662                          unsigned long first_index,
1663                          unsigned long last_index,
1664                          size_t write_bytes,
1665                          u64 alloc_extent_start)
1666 {
1667         int i;
1668         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1669         struct inode *inode = file->f_path.dentry->d_inode;
1670         int offset;
1671         int err = 0;
1672         int this_write;
1673         struct buffer_head *bh;
1674         struct buffer_head *head;
1675         loff_t isize = i_size_read(inode);
1676
1677         memset(pages, 0, num_pages * sizeof(struct page *));
1678
1679         for (i = 0; i < num_pages; i++) {
1680                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1681                 if (!pages[i]) {
1682                         err = -ENOMEM;
1683                         goto failed_release;
1684                 }
1685                 offset = pos & (PAGE_CACHE_SIZE -1);
1686                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1687                 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1688                                      (1 << BH_Uptodate));
1689                 head = page_buffers(pages[i]);
1690                 bh = head;
1691                 do {
1692                         err = btrfs_map_bh_to_logical(root, bh,
1693                                                       alloc_extent_start);
1694                         BUG_ON(err);
1695                         if (err)
1696                                 goto failed_truncate;
1697                         bh = bh->b_this_page;
1698                         if (alloc_extent_start)
1699                                 alloc_extent_start++;
1700                 } while (bh != head);
1701                 pos += this_write;
1702                 WARN_ON(this_write > write_bytes);
1703                 write_bytes -= this_write;
1704         }
1705         return 0;
1706
1707 failed_release:
1708         btrfs_drop_pages(pages, num_pages);
1709         return err;
1710
1711 failed_truncate:
1712         btrfs_drop_pages(pages, num_pages);
1713         if (pos > isize)
1714                 vmtruncate(inode, isize);
1715         return err;
1716 }
1717
1718 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1719                                 size_t count, loff_t *ppos)
1720 {
1721         loff_t pos;
1722         size_t num_written = 0;
1723         int err = 0;
1724         int ret = 0;
1725         struct inode *inode = file->f_path.dentry->d_inode;
1726         struct btrfs_root *root = BTRFS_I(inode)->root;
1727         struct page *pages[8];
1728         struct page *pinned[2] = { NULL, NULL };
1729         unsigned long first_index;
1730         unsigned long last_index;
1731         u64 start_pos;
1732         u64 num_blocks;
1733         u64 alloc_extent_start;
1734         struct btrfs_trans_handle *trans;
1735         struct btrfs_key ins;
1736
1737         if (file->f_flags & O_DIRECT)
1738                 return -EINVAL;
1739         pos = *ppos;
1740         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1741         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1742         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1743         if (err)
1744                 goto out;
1745         if (count == 0)
1746                 goto out;
1747         err = remove_suid(file->f_path.dentry);
1748         if (err)
1749                 goto out;
1750         file_update_time(file);
1751
1752         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1753         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1754                         inode->i_blkbits;
1755
1756         mutex_lock(&inode->i_mutex);
1757         first_index = pos >> PAGE_CACHE_SHIFT;
1758         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1759
1760         if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1761             (pos & (PAGE_CACHE_SIZE - 1))) {
1762                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1763                 if (!PageUptodate(pinned[0])) {
1764                         ret = mpage_readpage(pinned[0], btrfs_get_block);
1765                         BUG_ON(ret);
1766                 } else {
1767                         unlock_page(pinned[0]);
1768                 }
1769         }
1770         if (first_index != last_index &&
1771             (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1772             (count & (PAGE_CACHE_SIZE - 1))) {
1773                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1774                 if (!PageUptodate(pinned[1])) {
1775                         ret = mpage_readpage(pinned[1], btrfs_get_block);
1776                         BUG_ON(ret);
1777                 } else {
1778                         unlock_page(pinned[1]);
1779                 }
1780         }
1781
1782         mutex_lock(&root->fs_info->fs_mutex);
1783         trans = btrfs_start_transaction(root, 1);
1784         if (!trans) {
1785                 err = -ENOMEM;
1786                 mutex_unlock(&root->fs_info->fs_mutex);
1787                 goto out_unlock;
1788         }
1789         /* FIXME blocksize != 4096 */
1790         inode->i_blocks += num_blocks << 3;
1791         if (start_pos < inode->i_size) {
1792                 /* FIXME blocksize != pagesize */
1793                 ret = drop_extents(trans, root, inode,
1794                                    start_pos,
1795                                    (pos + count + root->blocksize -1) &
1796                                    ~((u64)root->blocksize - 1));
1797                 BUG_ON(ret);
1798         }
1799         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1800             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1801                 ret = btrfs_alloc_extent(trans, root, num_blocks, 1,
1802                                  (u64)-1, &ins);
1803                 BUG_ON(ret);
1804                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1805                                        start_pos, ins.objectid, ins.offset);
1806                 BUG_ON(ret);
1807         } else {
1808                 ins.offset = 0;
1809                 ins.objectid = 0;
1810         }
1811         BUG_ON(ret);
1812         alloc_extent_start = ins.objectid;
1813         ret = btrfs_end_transaction(trans, root);
1814         mutex_unlock(&root->fs_info->fs_mutex);
1815
1816         while(count > 0) {
1817                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1818                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1819                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1820                                         PAGE_CACHE_SHIFT;
1821
1822                 memset(pages, 0, sizeof(pages));
1823                 ret = prepare_pages(root, file, pages, num_pages,
1824                                     pos, first_index, last_index,
1825                                     write_bytes, alloc_extent_start);
1826                 BUG_ON(ret);
1827
1828                 /* FIXME blocks != pagesize */
1829                 if (alloc_extent_start)
1830                         alloc_extent_start += num_pages;
1831                 ret = btrfs_copy_from_user(pos, num_pages,
1832                                            write_bytes, pages, buf);
1833                 BUG_ON(ret);
1834
1835                 ret = dirty_and_release_pages(NULL, root, file, pages,
1836                                               num_pages, pos, write_bytes);
1837                 BUG_ON(ret);
1838                 btrfs_drop_pages(pages, num_pages);
1839
1840                 buf += write_bytes;
1841                 count -= write_bytes;
1842                 pos += write_bytes;
1843                 num_written += write_bytes;
1844
1845                 balance_dirty_pages_ratelimited(inode->i_mapping);
1846                 cond_resched();
1847         }
1848 out_unlock:
1849         mutex_unlock(&inode->i_mutex);
1850 out:
1851         if (pinned[0])
1852                 page_cache_release(pinned[0]);
1853         if (pinned[1])
1854                 page_cache_release(pinned[1]);
1855         *ppos = pos;
1856         current->backing_dev_info = NULL;
1857         mark_inode_dirty(inode);
1858         return num_written ? num_written : err;
1859 }
1860
1861 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1862                         unsigned long offset, unsigned long size)
1863 {
1864         char *kaddr;
1865         unsigned long left, count = desc->count;
1866         struct inode *inode = page->mapping->host;
1867
1868         if (size > count)
1869                 size = count;
1870
1871         if (!PageChecked(page)) {
1872                 /* FIXME, do it per block */
1873                 struct btrfs_root *root = BTRFS_I(inode)->root;
1874
1875                 int ret = btrfs_csum_verify_file_block(root,
1876                                   page->mapping->host->i_ino,
1877                                   page->index << PAGE_CACHE_SHIFT,
1878                                   kmap(page), PAGE_CACHE_SIZE);
1879                 if (ret) {
1880                         printk("failed to verify ino %lu page %lu\n",
1881                                page->mapping->host->i_ino,
1882                                page->index);
1883                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1884                 }
1885                 SetPageChecked(page);
1886                 kunmap(page);
1887         }
1888         /*
1889          * Faults on the destination of a read are common, so do it before
1890          * taking the kmap.
1891          */
1892         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1893                 kaddr = kmap_atomic(page, KM_USER0);
1894                 left = __copy_to_user_inatomic(desc->arg.buf,
1895                                                 kaddr + offset, size);
1896                 kunmap_atomic(kaddr, KM_USER0);
1897                 if (left == 0)
1898                         goto success;
1899         }
1900
1901         /* Do it the slow way */
1902         kaddr = kmap(page);
1903         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1904         kunmap(page);
1905
1906         if (left) {
1907                 size -= left;
1908                 desc->error = -EFAULT;
1909         }
1910 success:
1911         desc->count = count - size;
1912         desc->written += size;
1913         desc->arg.buf += size;
1914         return size;
1915 }
1916
1917 /**
1918  * btrfs_file_aio_read - filesystem read routine
1919  * @iocb:       kernel I/O control block
1920  * @iov:        io vector request
1921  * @nr_segs:    number of segments in the iovec
1922  * @pos:        current file position
1923  */
1924 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1925                                    unsigned long nr_segs, loff_t pos)
1926 {
1927         struct file *filp = iocb->ki_filp;
1928         ssize_t retval;
1929         unsigned long seg;
1930         size_t count;
1931         loff_t *ppos = &iocb->ki_pos;
1932
1933         count = 0;
1934         for (seg = 0; seg < nr_segs; seg++) {
1935                 const struct iovec *iv = &iov[seg];
1936
1937                 /*
1938                  * If any segment has a negative length, or the cumulative
1939                  * length ever wraps negative then return -EINVAL.
1940                  */
1941                 count += iv->iov_len;
1942                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
1943                         return -EINVAL;
1944                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
1945                         continue;
1946                 if (seg == 0)
1947                         return -EFAULT;
1948                 nr_segs = seg;
1949                 count -= iv->iov_len;   /* This segment is no good */
1950                 break;
1951         }
1952         retval = 0;
1953         if (count) {
1954                 for (seg = 0; seg < nr_segs; seg++) {
1955                         read_descriptor_t desc;
1956
1957                         desc.written = 0;
1958                         desc.arg.buf = iov[seg].iov_base;
1959                         desc.count = iov[seg].iov_len;
1960                         if (desc.count == 0)
1961                                 continue;
1962                         desc.error = 0;
1963                         do_generic_file_read(filp, ppos, &desc,
1964                                              btrfs_read_actor);
1965                         retval += desc.written;
1966                         if (desc.error) {
1967                                 retval = retval ?: desc.error;
1968                                 break;
1969                         }
1970                 }
1971         }
1972         return retval;
1973 }
1974
1975 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1976 {
1977         struct btrfs_trans_handle *trans;
1978         struct btrfs_key key;
1979         struct btrfs_root_item root_item;
1980         struct btrfs_inode_item *inode_item;
1981         struct buffer_head *subvol;
1982         struct btrfs_leaf *leaf;
1983         struct btrfs_root *new_root;
1984         struct inode *inode;
1985         int ret;
1986         u64 objectid;
1987         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1988
1989         mutex_lock(&root->fs_info->fs_mutex);
1990         trans = btrfs_start_transaction(root, 1);
1991         BUG_ON(!trans);
1992
1993         subvol = btrfs_alloc_free_block(trans, root);
1994         leaf = btrfs_buffer_leaf(subvol);
1995         btrfs_set_header_nritems(&leaf->header, 0);
1996         btrfs_set_header_level(&leaf->header, 0);
1997         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1998         btrfs_set_header_generation(&leaf->header, trans->transid);
1999         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2000                sizeof(leaf->header.fsid));
2001
2002         inode_item = &root_item.inode;
2003         memset(inode_item, 0, sizeof(*inode_item));
2004         btrfs_set_inode_generation(inode_item, 1);
2005         btrfs_set_inode_size(inode_item, 3);
2006         btrfs_set_inode_nlink(inode_item, 1);
2007         btrfs_set_inode_nblocks(inode_item, 1);
2008         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2009
2010         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2011         btrfs_set_root_refs(&root_item, 1);
2012
2013         mark_buffer_dirty(subvol);
2014         brelse(subvol);
2015         subvol = NULL;
2016
2017         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2018                                        0, &objectid);
2019         BUG_ON(ret);
2020
2021         btrfs_set_root_dirid(&root_item, new_dirid);
2022
2023         key.objectid = objectid;
2024         key.offset = 1;
2025         key.flags = 0;
2026         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2027         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2028                                 &root_item);
2029         BUG_ON(ret);
2030
2031         /*
2032          * insert the directory item
2033          */
2034         key.offset = (u64)-1;
2035         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2036                                     name, namelen,
2037                                     root->fs_info->sb->s_root->d_inode->i_ino,
2038                                     &key, 0);
2039         BUG_ON(ret);
2040
2041         ret = btrfs_commit_transaction(trans, root);
2042         BUG_ON(ret);
2043
2044         new_root = btrfs_read_fs_root(root->fs_info, &key);
2045         BUG_ON(!new_root);
2046
2047         trans = btrfs_start_transaction(new_root, 1);
2048         BUG_ON(!trans);
2049
2050         inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700);
2051         inode->i_op = &btrfs_dir_inode_operations;
2052         inode->i_fop = &btrfs_dir_file_operations;
2053
2054         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2055         BUG_ON(ret);
2056
2057         inode->i_nlink = 1;
2058         inode->i_size = 6;
2059         ret = btrfs_update_inode(trans, new_root, inode);
2060         BUG_ON(ret);
2061
2062         ret = btrfs_commit_transaction(trans, new_root);
2063         BUG_ON(ret);
2064
2065         iput(inode);
2066
2067         mutex_unlock(&root->fs_info->fs_mutex);
2068         return 0;
2069 }
2070
2071 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2072 {
2073         struct btrfs_trans_handle *trans;
2074         struct btrfs_key key;
2075         struct btrfs_root_item new_root_item;
2076         int ret;
2077         u64 objectid;
2078
2079         if (!root->ref_cows)
2080                 return -EINVAL;
2081
2082         mutex_lock(&root->fs_info->fs_mutex);
2083         trans = btrfs_start_transaction(root, 1);
2084         BUG_ON(!trans);
2085
2086         ret = btrfs_update_inode(trans, root, root->inode);
2087         BUG_ON(ret);
2088
2089         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2090                                        0, &objectid);
2091         BUG_ON(ret);
2092
2093         memcpy(&new_root_item, &root->root_item,
2094                sizeof(new_root_item));
2095
2096         key.objectid = objectid;
2097         key.offset = 1;
2098         key.flags = 0;
2099         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2100         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2101
2102         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2103                                 &new_root_item);
2104         BUG_ON(ret);
2105
2106         /*
2107          * insert the directory item
2108          */
2109         key.offset = (u64)-1;
2110         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2111                                     name, namelen,
2112                                     root->fs_info->sb->s_root->d_inode->i_ino,
2113                                     &key, 0);
2114
2115         BUG_ON(ret);
2116
2117         ret = btrfs_inc_root_ref(trans, root);
2118         BUG_ON(ret);
2119
2120         ret = btrfs_commit_transaction(trans, root);
2121         BUG_ON(ret);
2122         mutex_unlock(&root->fs_info->fs_mutex);
2123         return 0;
2124 }
2125
2126 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2127 {
2128         struct block_device *bdev;
2129         struct btrfs_path *path;
2130         struct super_block *sb = root->fs_info->sb;
2131         struct btrfs_root *dev_root = root->fs_info->dev_root;
2132         struct btrfs_trans_handle *trans;
2133         struct btrfs_device_item *dev_item;
2134         struct btrfs_key key;
2135         u16 item_size;
2136         u64 num_blocks;
2137         u64 new_blocks;
2138         u64 device_id;
2139         int ret;
2140
2141 printk("adding disk %s\n", name);
2142         path = btrfs_alloc_path();
2143         if (!path)
2144                 return -ENOMEM;
2145         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2146         bdev = open_bdev_excl(name, O_RDWR, sb);
2147         if (IS_ERR(bdev)) {
2148                 ret = PTR_ERR(bdev);
2149 printk("open bdev excl failed ret %d\n", ret);
2150                 goto out_nolock;
2151         }
2152         set_blocksize(bdev, sb->s_blocksize);
2153         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2154         key.objectid = num_blocks;
2155         key.offset = new_blocks;
2156         key.flags = 0;
2157         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2158
2159         mutex_lock(&dev_root->fs_info->fs_mutex);
2160         trans = btrfs_start_transaction(dev_root, 1);
2161         item_size = sizeof(*dev_item) + namelen;
2162 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2163         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2164         if (ret) {
2165 printk("insert failed %d\n", ret);
2166                 close_bdev_excl(bdev);
2167                 if (ret > 0)
2168                         ret = -EEXIST;
2169                 goto out;
2170         }
2171         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2172                                   path->slots[0], struct btrfs_device_item);
2173         btrfs_set_device_pathlen(dev_item, namelen);
2174         memcpy(dev_item + 1, name, namelen);
2175
2176         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2177         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2178         btrfs_set_device_id(dev_item, device_id);
2179         mark_buffer_dirty(path->nodes[0]);
2180
2181         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2182                                      new_blocks);
2183
2184         if (!ret) {
2185                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2186                                              num_blocks + new_blocks);
2187                 i_size_write(root->fs_info->btree_inode,
2188                              (num_blocks + new_blocks) <<
2189                              root->fs_info->btree_inode->i_blkbits);
2190         }
2191
2192 out:
2193         ret = btrfs_commit_transaction(trans, dev_root);
2194         BUG_ON(ret);
2195         mutex_unlock(&root->fs_info->fs_mutex);
2196 out_nolock:
2197         btrfs_free_path(path);
2198
2199         return ret;
2200 }
2201
2202 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2203                        cmd, unsigned long arg)
2204 {
2205         struct btrfs_root *root = BTRFS_I(inode)->root;
2206         struct btrfs_ioctl_vol_args vol_args;
2207         int ret = 0;
2208         struct btrfs_dir_item *di;
2209         int namelen;
2210         struct btrfs_path *path;
2211         u64 root_dirid;
2212
2213         switch (cmd) {
2214         case BTRFS_IOC_SNAP_CREATE:
2215                 if (copy_from_user(&vol_args,
2216                                    (struct btrfs_ioctl_vol_args __user *)arg,
2217                                    sizeof(vol_args)))
2218                         return -EFAULT;
2219                 namelen = strlen(vol_args.name);
2220                 if (namelen > BTRFS_VOL_NAME_MAX)
2221                         return -EINVAL;
2222                 path = btrfs_alloc_path();
2223                 if (!path)
2224                         return -ENOMEM;
2225                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2226                 mutex_lock(&root->fs_info->fs_mutex);
2227                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2228                                     path, root_dirid,
2229                                     vol_args.name, namelen, 0);
2230                 mutex_unlock(&root->fs_info->fs_mutex);
2231                 btrfs_free_path(path);
2232                 if (di && !IS_ERR(di))
2233                         return -EEXIST;
2234
2235                 if (root == root->fs_info->tree_root)
2236                         ret = create_subvol(root, vol_args.name, namelen);
2237                 else
2238                         ret = create_snapshot(root, vol_args.name, namelen);
2239                 WARN_ON(ret);
2240                 break;
2241         case BTRFS_IOC_ADD_DISK:
2242                 if (copy_from_user(&vol_args,
2243                                    (struct btrfs_ioctl_vol_args __user *)arg,
2244                                    sizeof(vol_args)))
2245                         return -EFAULT;
2246                 namelen = strlen(vol_args.name);
2247                 if (namelen > BTRFS_VOL_NAME_MAX)
2248                         return -EINVAL;
2249                 vol_args.name[namelen] = '\0';
2250                 ret = add_disk(root, vol_args.name, namelen);
2251                 break;
2252         default:
2253                 return -ENOTTY;
2254         }
2255         return ret;
2256 }
2257
2258 static struct kmem_cache *btrfs_inode_cachep;
2259 struct kmem_cache *btrfs_trans_handle_cachep;
2260 struct kmem_cache *btrfs_transaction_cachep;
2261 struct kmem_cache *btrfs_bit_radix_cachep;
2262 struct kmem_cache *btrfs_path_cachep;
2263
2264 /*
2265  * Called inside transaction, so use GFP_NOFS
2266  */
2267 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2268 {
2269         struct btrfs_inode *ei;
2270
2271         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2272         if (!ei)
2273                 return NULL;
2274         return &ei->vfs_inode;
2275 }
2276
2277 static void btrfs_destroy_inode(struct inode *inode)
2278 {
2279         WARN_ON(!list_empty(&inode->i_dentry));
2280         WARN_ON(inode->i_data.nrpages);
2281
2282         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2283 }
2284
2285 static void init_once(void * foo, struct kmem_cache * cachep,
2286                       unsigned long flags)
2287 {
2288         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2289
2290         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2291             SLAB_CTOR_CONSTRUCTOR) {
2292                 inode_init_once(&ei->vfs_inode);
2293         }
2294 }
2295
2296 static int init_inodecache(void)
2297 {
2298         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2299                                              sizeof(struct btrfs_inode),
2300                                              0, (SLAB_RECLAIM_ACCOUNT|
2301                                                 SLAB_MEM_SPREAD),
2302                                              init_once, NULL);
2303         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2304                                              sizeof(struct btrfs_trans_handle),
2305                                              0, (SLAB_RECLAIM_ACCOUNT|
2306                                                 SLAB_MEM_SPREAD),
2307                                              NULL, NULL);
2308         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2309                                              sizeof(struct btrfs_transaction),
2310                                              0, (SLAB_RECLAIM_ACCOUNT|
2311                                                 SLAB_MEM_SPREAD),
2312                                              NULL, NULL);
2313         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2314                                              sizeof(struct btrfs_transaction),
2315                                              0, (SLAB_RECLAIM_ACCOUNT|
2316                                                 SLAB_MEM_SPREAD),
2317                                              NULL, NULL);
2318         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2319                                              256,
2320                                              0, (SLAB_RECLAIM_ACCOUNT|
2321                                                 SLAB_MEM_SPREAD |
2322                                                 SLAB_DESTROY_BY_RCU),
2323                                              NULL, NULL);
2324         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2325             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2326                 return -ENOMEM;
2327         return 0;
2328 }
2329
2330 static void destroy_inodecache(void)
2331 {
2332         kmem_cache_destroy(btrfs_inode_cachep);
2333         kmem_cache_destroy(btrfs_trans_handle_cachep);
2334         kmem_cache_destroy(btrfs_transaction_cachep);
2335         kmem_cache_destroy(btrfs_bit_radix_cachep);
2336         kmem_cache_destroy(btrfs_path_cachep);
2337 }
2338
2339 static int btrfs_get_sb(struct file_system_type *fs_type,
2340         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2341 {
2342         return get_sb_bdev(fs_type, flags, dev_name, data,
2343                            btrfs_fill_super, mnt);
2344 }
2345
2346
2347 static int btrfs_getattr(struct vfsmount *mnt,
2348                          struct dentry *dentry, struct kstat *stat)
2349 {
2350         struct inode *inode = dentry->d_inode;
2351         generic_fillattr(inode, stat);
2352         stat->blksize = 256 * 1024;
2353         return 0;
2354 }
2355
2356 static struct file_system_type btrfs_fs_type = {
2357         .owner          = THIS_MODULE,
2358         .name           = "btrfs",
2359         .get_sb         = btrfs_get_sb,
2360         .kill_sb        = kill_block_super,
2361         .fs_flags       = FS_REQUIRES_DEV,
2362 };
2363
2364 static struct super_operations btrfs_super_ops = {
2365         .statfs         = simple_statfs,
2366         .delete_inode   = btrfs_delete_inode,
2367         .put_super      = btrfs_put_super,
2368         .read_inode     = btrfs_read_locked_inode,
2369         .write_super    = btrfs_write_super,
2370         .sync_fs        = btrfs_sync_fs,
2371         .write_inode    = btrfs_write_inode,
2372         .alloc_inode    = btrfs_alloc_inode,
2373         .destroy_inode  = btrfs_destroy_inode,
2374 };
2375
2376 static struct inode_operations btrfs_dir_inode_operations = {
2377         .lookup         = btrfs_lookup,
2378         .create         = btrfs_create,
2379         .unlink         = btrfs_unlink,
2380         .mkdir          = btrfs_mkdir,
2381         .rmdir          = btrfs_rmdir,
2382 };
2383
2384 static struct inode_operations btrfs_dir_ro_inode_operations = {
2385         .lookup         = btrfs_lookup,
2386 };
2387
2388 static struct file_operations btrfs_dir_file_operations = {
2389         .llseek         = generic_file_llseek,
2390         .read           = generic_read_dir,
2391         .readdir        = btrfs_readdir,
2392         .ioctl          = btrfs_ioctl,
2393 };
2394
2395 static struct address_space_operations btrfs_aops = {
2396         .readpage       = btrfs_readpage,
2397         .writepage      = btrfs_writepage,
2398         .sync_page      = block_sync_page,
2399         .prepare_write  = btrfs_prepare_write,
2400         .commit_write   = btrfs_commit_write,
2401 };
2402
2403 static struct inode_operations btrfs_file_inode_operations = {
2404         .truncate       = btrfs_truncate,
2405         .getattr        = btrfs_getattr,
2406 };
2407
2408 static struct file_operations btrfs_file_operations = {
2409         .llseek         = generic_file_llseek,
2410         .read           = do_sync_read,
2411         .aio_read       = btrfs_file_aio_read,
2412         .write          = btrfs_file_write,
2413         .mmap           = generic_file_mmap,
2414         .open           = generic_file_open,
2415         .ioctl          = btrfs_ioctl,
2416 };
2417
2418 static int __init init_btrfs_fs(void)
2419 {
2420         int err;
2421         printk("btrfs loaded!\n");
2422         err = init_inodecache();
2423         if (err)
2424                 return err;
2425         kset_set_kset_s(&btrfs_subsys, fs_subsys);
2426         err = subsystem_register(&btrfs_subsys);
2427         if (err)
2428                 goto out;
2429         return register_filesystem(&btrfs_fs_type);
2430 out:
2431         destroy_inodecache();
2432         return err;
2433 }
2434
2435 static void __exit exit_btrfs_fs(void)
2436 {
2437         destroy_inodecache();
2438         unregister_filesystem(&btrfs_fs_type);
2439         subsystem_unregister(&btrfs_subsys);
2440         printk("btrfs unloaded\n");
2441 }
2442
2443 module_init(init_btrfs_fs)
2444 module_exit(exit_btrfs_fs)
2445
2446 MODULE_LICENSE("GPL");