]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/xfs/xfs_attr.c
[XFS] decontaminate vnode operations from behavior details
[linux-2.6-omap-h63xx.git] / fs / xfs / xfs_attr.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include <linux/capability.h>
20
21 #include "xfs.h"
22 #include "xfs_fs.h"
23 #include "xfs_types.h"
24 #include "xfs_bit.h"
25 #include "xfs_log.h"
26 #include "xfs_inum.h"
27 #include "xfs_trans.h"
28 #include "xfs_sb.h"
29 #include "xfs_ag.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dmapi.h"
32 #include "xfs_mount.h"
33 #include "xfs_da_btree.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h"
40 #include "xfs_inode.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_inode_item.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_trans_space.h"
50 #include "xfs_acl.h"
51 #include "xfs_rw.h"
52
53 /*
54  * xfs_attr.c
55  *
56  * Provide the external interfaces to manage attribute lists.
57  */
58
59 #define ATTR_SYSCOUNT   2
60 static struct attrnames posix_acl_access;
61 static struct attrnames posix_acl_default;
62 static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
63
64 /*========================================================================
65  * Function prototypes for the kernel.
66  *========================================================================*/
67
68 /*
69  * Internal routines when attribute list fits inside the inode.
70  */
71 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
72
73 /*
74  * Internal routines when attribute list is one block.
75  */
76 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
77 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
78 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
79 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
80
81 /*
82  * Internal routines when attribute list is more than one block.
83  */
84 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
85 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
86 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
87 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
88 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
89 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
90
91 /*
92  * Routines to manipulate out-of-line attribute values.
93  */
94 STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
95 STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
96
97 #define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
98
99 #if defined(XFS_ATTR_TRACE)
100 ktrace_t *xfs_attr_trace_buf;
101 #endif
102
103
104 /*========================================================================
105  * Overall external interface routines.
106  *========================================================================*/
107
108 int
109 xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
110                char *value, int *valuelenp, int flags, struct cred *cred)
111 {
112         xfs_da_args_t   args;
113         int             error;
114
115         if ((XFS_IFORK_Q(ip) == 0) ||
116             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
117              ip->i_d.di_anextents == 0))
118                 return(ENOATTR);
119
120         /*
121          * Fill in the arg structure for this request.
122          */
123         memset((char *)&args, 0, sizeof(args));
124         args.name = name;
125         args.namelen = namelen;
126         args.value = value;
127         args.valuelen = *valuelenp;
128         args.flags = flags;
129         args.hashval = xfs_da_hashname(args.name, args.namelen);
130         args.dp = ip;
131         args.whichfork = XFS_ATTR_FORK;
132
133         /*
134          * Decide on what work routines to call based on the inode size.
135          */
136         if (XFS_IFORK_Q(ip) == 0 ||
137             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
138              ip->i_d.di_anextents == 0)) {
139                 error = XFS_ERROR(ENOATTR);
140         } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
141                 error = xfs_attr_shortform_getvalue(&args);
142         } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
143                 error = xfs_attr_leaf_get(&args);
144         } else {
145                 error = xfs_attr_node_get(&args);
146         }
147
148         /*
149          * Return the number of bytes in the value to the caller.
150          */
151         *valuelenp = args.valuelen;
152
153         if (error == EEXIST)
154                 error = 0;
155         return(error);
156 }
157
158 int
159 xfs_attr_get(
160         xfs_inode_t     *ip,
161         const char      *name,
162         char            *value,
163         int             *valuelenp,
164         int             flags,
165         cred_t          *cred)
166 {
167         int             error, namelen;
168
169         XFS_STATS_INC(xs_attr_get);
170
171         if (!name)
172                 return(EINVAL);
173         namelen = strlen(name);
174         if (namelen >= MAXNAMELEN)
175                 return(EFAULT);         /* match IRIX behaviour */
176
177         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
178                 return(EIO);
179
180         xfs_ilock(ip, XFS_ILOCK_SHARED);
181         error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
182         xfs_iunlock(ip, XFS_ILOCK_SHARED);
183         return(error);
184 }
185
186 int
187 xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
188                  char *value, int valuelen, int flags)
189 {
190         xfs_da_args_t   args;
191         xfs_fsblock_t   firstblock;
192         xfs_bmap_free_t flist;
193         int             error, err2, committed;
194         int             local, size;
195         uint            nblks;
196         xfs_mount_t     *mp = dp->i_mount;
197         int             rsvd = (flags & ATTR_ROOT) != 0;
198
199         /*
200          * Attach the dquots to the inode.
201          */
202         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
203                 return (error);
204
205         /*
206          * If the inode doesn't have an attribute fork, add one.
207          * (inode must not be locked when we call this routine)
208          */
209         if (XFS_IFORK_Q(dp) == 0) {
210                 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
211                               XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
212
213                 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
214                         return(error);
215         }
216
217         /*
218          * Fill in the arg structure for this request.
219          */
220         memset((char *)&args, 0, sizeof(args));
221         args.name = name;
222         args.namelen = namelen;
223         args.value = value;
224         args.valuelen = valuelen;
225         args.flags = flags;
226         args.hashval = xfs_da_hashname(args.name, args.namelen);
227         args.dp = dp;
228         args.firstblock = &firstblock;
229         args.flist = &flist;
230         args.whichfork = XFS_ATTR_FORK;
231         args.addname = 1;
232         args.oknoent = 1;
233
234         /*
235          * Determine space new attribute will use, and if it would be
236          * "local" or "remote" (note: local != inline).
237          */
238         size = xfs_attr_leaf_newentsize(namelen, valuelen,
239                                         mp->m_sb.sb_blocksize, &local);
240
241         nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
242         if (local) {
243                 if (size > (mp->m_sb.sb_blocksize >> 1)) {
244                         /* Double split possible */
245                         nblks <<= 1;
246                 }
247         } else {
248                 uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
249                 /* Out of line attribute, cannot double split, but make
250                  * room for the attribute value itself.
251                  */
252                 nblks += dblocks;
253                 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
254         }
255
256         /* Size is now blocks for attribute data */
257         args.total = nblks;
258
259         /*
260          * Start our first transaction of the day.
261          *
262          * All future transactions during this code must be "chained" off
263          * this one via the trans_dup() call.  All transactions will contain
264          * the inode, and the inode will always be marked with trans_ihold().
265          * Since the inode will be locked in all transactions, we must log
266          * the inode in every transaction to let it float upward through
267          * the log.
268          */
269         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
270
271         /*
272          * Root fork attributes can use reserved data blocks for this
273          * operation if necessary
274          */
275
276         if (rsvd)
277                 args.trans->t_flags |= XFS_TRANS_RESERVE;
278
279         if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
280                                       XFS_ATTRSET_LOG_RES(mp, nblks),
281                                       0, XFS_TRANS_PERM_LOG_RES,
282                                       XFS_ATTRSET_LOG_COUNT))) {
283                 xfs_trans_cancel(args.trans, 0);
284                 return(error);
285         }
286         xfs_ilock(dp, XFS_ILOCK_EXCL);
287
288         error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
289                          rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
290                                 XFS_QMOPT_RES_REGBLKS);
291         if (error) {
292                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
293                 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
294                 return (error);
295         }
296
297         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
298         xfs_trans_ihold(args.trans, dp);
299
300         /*
301          * If the attribute list is non-existent or a shortform list,
302          * upgrade it to a single-leaf-block attribute list.
303          */
304         if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
305             ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
306              (dp->i_d.di_anextents == 0))) {
307
308                 /*
309                  * Build initial attribute list (if required).
310                  */
311                 if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
312                         xfs_attr_shortform_create(&args);
313
314                 /*
315                  * Try to add the attr to the attribute list in
316                  * the inode.
317                  */
318                 error = xfs_attr_shortform_addname(&args);
319                 if (error != ENOSPC) {
320                         /*
321                          * Commit the shortform mods, and we're done.
322                          * NOTE: this is also the error path (EEXIST, etc).
323                          */
324                         ASSERT(args.trans != NULL);
325
326                         /*
327                          * If this is a synchronous mount, make sure that
328                          * the transaction goes to disk before returning
329                          * to the user.
330                          */
331                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
332                                 xfs_trans_set_sync(args.trans);
333                         }
334                         err2 = xfs_trans_commit(args.trans,
335                                                  XFS_TRANS_RELEASE_LOG_RES);
336                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
337
338                         /*
339                          * Hit the inode change time.
340                          */
341                         if (!error && (flags & ATTR_KERNOTIME) == 0) {
342                                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
343                         }
344                         return(error == 0 ? err2 : error);
345                 }
346
347                 /*
348                  * It won't fit in the shortform, transform to a leaf block.
349                  * GROT: another possible req'mt for a double-split btree op.
350                  */
351                 XFS_BMAP_INIT(args.flist, args.firstblock);
352                 error = xfs_attr_shortform_to_leaf(&args);
353                 if (!error) {
354                         error = xfs_bmap_finish(&args.trans, args.flist,
355                                                 &committed);
356                 }
357                 if (error) {
358                         ASSERT(committed);
359                         args.trans = NULL;
360                         xfs_bmap_cancel(&flist);
361                         goto out;
362                 }
363
364                 /*
365                  * bmap_finish() may have committed the last trans and started
366                  * a new one.  We need the inode to be in all transactions.
367                  */
368                 if (committed) {
369                         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
370                         xfs_trans_ihold(args.trans, dp);
371                 }
372
373                 /*
374                  * Commit the leaf transformation.  We'll need another (linked)
375                  * transaction to add the new attribute to the leaf.
376                  */
377                 if ((error = xfs_attr_rolltrans(&args.trans, dp)))
378                         goto out;
379
380         }
381
382         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
383                 error = xfs_attr_leaf_addname(&args);
384         } else {
385                 error = xfs_attr_node_addname(&args);
386         }
387         if (error) {
388                 goto out;
389         }
390
391         /*
392          * If this is a synchronous mount, make sure that the
393          * transaction goes to disk before returning to the user.
394          */
395         if (mp->m_flags & XFS_MOUNT_WSYNC) {
396                 xfs_trans_set_sync(args.trans);
397         }
398
399         /*
400          * Commit the last in the sequence of transactions.
401          */
402         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
403         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
404         xfs_iunlock(dp, XFS_ILOCK_EXCL);
405
406         /*
407          * Hit the inode change time.
408          */
409         if (!error && (flags & ATTR_KERNOTIME) == 0) {
410                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
411         }
412
413         return(error);
414
415 out:
416         if (args.trans)
417                 xfs_trans_cancel(args.trans,
418                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
419         xfs_iunlock(dp, XFS_ILOCK_EXCL);
420         return(error);
421 }
422
423 int
424 xfs_attr_set(
425         xfs_inode_t     *dp,
426         const char      *name,
427         char            *value,
428         int             valuelen,
429         int             flags)
430 {
431         int             namelen;
432
433         namelen = strlen(name);
434         if (namelen >= MAXNAMELEN)
435                 return EFAULT;          /* match IRIX behaviour */
436
437         XFS_STATS_INC(xs_attr_set);
438
439         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
440                 return (EIO);
441
442         return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
443 }
444
445 /*
446  * Generic handler routine to remove a name from an attribute list.
447  * Transitions attribute list from Btree to shortform as necessary.
448  */
449 int
450 xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
451 {
452         xfs_da_args_t   args;
453         xfs_fsblock_t   firstblock;
454         xfs_bmap_free_t flist;
455         int             error;
456         xfs_mount_t     *mp = dp->i_mount;
457
458         /*
459          * Fill in the arg structure for this request.
460          */
461         memset((char *)&args, 0, sizeof(args));
462         args.name = name;
463         args.namelen = namelen;
464         args.flags = flags;
465         args.hashval = xfs_da_hashname(args.name, args.namelen);
466         args.dp = dp;
467         args.firstblock = &firstblock;
468         args.flist = &flist;
469         args.total = 0;
470         args.whichfork = XFS_ATTR_FORK;
471
472         /*
473          * Attach the dquots to the inode.
474          */
475         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
476                 return (error);
477
478         /*
479          * Start our first transaction of the day.
480          *
481          * All future transactions during this code must be "chained" off
482          * this one via the trans_dup() call.  All transactions will contain
483          * the inode, and the inode will always be marked with trans_ihold().
484          * Since the inode will be locked in all transactions, we must log
485          * the inode in every transaction to let it float upward through
486          * the log.
487          */
488         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
489
490         /*
491          * Root fork attributes can use reserved data blocks for this
492          * operation if necessary
493          */
494
495         if (flags & ATTR_ROOT)
496                 args.trans->t_flags |= XFS_TRANS_RESERVE;
497
498         if ((error = xfs_trans_reserve(args.trans,
499                                       XFS_ATTRRM_SPACE_RES(mp),
500                                       XFS_ATTRRM_LOG_RES(mp),
501                                       0, XFS_TRANS_PERM_LOG_RES,
502                                       XFS_ATTRRM_LOG_COUNT))) {
503                 xfs_trans_cancel(args.trans, 0);
504                 return(error);
505         }
506
507         xfs_ilock(dp, XFS_ILOCK_EXCL);
508         /*
509          * No need to make quota reservations here. We expect to release some
510          * blocks not allocate in the common case.
511          */
512         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
513         xfs_trans_ihold(args.trans, dp);
514
515         /*
516          * Decide on what work routines to call based on the inode size.
517          */
518         if (XFS_IFORK_Q(dp) == 0 ||
519             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
520              dp->i_d.di_anextents == 0)) {
521                 error = XFS_ERROR(ENOATTR);
522                 goto out;
523         }
524         if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
525                 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
526                 error = xfs_attr_shortform_remove(&args);
527                 if (error) {
528                         goto out;
529                 }
530         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
531                 error = xfs_attr_leaf_removename(&args);
532         } else {
533                 error = xfs_attr_node_removename(&args);
534         }
535         if (error) {
536                 goto out;
537         }
538
539         /*
540          * If this is a synchronous mount, make sure that the
541          * transaction goes to disk before returning to the user.
542          */
543         if (mp->m_flags & XFS_MOUNT_WSYNC) {
544                 xfs_trans_set_sync(args.trans);
545         }
546
547         /*
548          * Commit the last in the sequence of transactions.
549          */
550         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
551         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
552         xfs_iunlock(dp, XFS_ILOCK_EXCL);
553
554         /*
555          * Hit the inode change time.
556          */
557         if (!error && (flags & ATTR_KERNOTIME) == 0) {
558                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
559         }
560
561         return(error);
562
563 out:
564         if (args.trans)
565                 xfs_trans_cancel(args.trans,
566                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
567         xfs_iunlock(dp, XFS_ILOCK_EXCL);
568         return(error);
569 }
570
571 int
572 xfs_attr_remove(
573         xfs_inode_t     *dp,
574         const char      *name,
575         int             flags)
576 {
577         int             namelen;
578
579         namelen = strlen(name);
580         if (namelen >= MAXNAMELEN)
581                 return EFAULT;          /* match IRIX behaviour */
582
583         XFS_STATS_INC(xs_attr_remove);
584
585         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
586                 return (EIO);
587
588         xfs_ilock(dp, XFS_ILOCK_SHARED);
589         if (XFS_IFORK_Q(dp) == 0 ||
590                    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
591                     dp->i_d.di_anextents == 0)) {
592                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
593                 return(XFS_ERROR(ENOATTR));
594         }
595         xfs_iunlock(dp, XFS_ILOCK_SHARED);
596
597         return xfs_attr_remove_int(dp, name, namelen, flags);
598 }
599
600 int                                                             /* error */
601 xfs_attr_list_int(xfs_attr_list_context_t *context)
602 {
603         int error;
604         xfs_inode_t *dp = context->dp;
605
606         /*
607          * Decide on what work routines to call based on the inode size.
608          */
609         if (XFS_IFORK_Q(dp) == 0 ||
610             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
611              dp->i_d.di_anextents == 0)) {
612                 error = 0;
613         } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
614                 error = xfs_attr_shortform_list(context);
615         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
616                 error = xfs_attr_leaf_list(context);
617         } else {
618                 error = xfs_attr_node_list(context);
619         }
620         return error;
621 }
622
623 #define ATTR_ENTBASESIZE                /* minimum bytes used by an attr */ \
624         (((struct attrlist_ent *) 0)->a_name - (char *) 0)
625 #define ATTR_ENTSIZE(namelen)           /* actual bytes used by an attr */ \
626         ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
627          & ~(sizeof(u_int32_t)-1))
628
629 /*
630  * Format an attribute and copy it out to the user's buffer.
631  * Take care to check values and protect against them changing later,
632  * we may be reading them directly out of a user buffer.
633  */
634 /*ARGSUSED*/
635 STATIC int
636 xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
637                      char *name, int namelen,
638                      int valuelen, char *value)
639 {
640         attrlist_ent_t *aep;
641         int arraytop;
642
643         ASSERT(!(context->flags & ATTR_KERNOVAL));
644         ASSERT(context->count >= 0);
645         ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
646         ASSERT(context->firstu >= sizeof(*context->alist));
647         ASSERT(context->firstu <= context->bufsize);
648
649         arraytop = sizeof(*context->alist) +
650                         context->count * sizeof(context->alist->al_offset[0]);
651         context->firstu -= ATTR_ENTSIZE(namelen);
652         if (context->firstu < arraytop) {
653                 xfs_attr_trace_l_c("buffer full", context);
654                 context->alist->al_more = 1;
655                 context->seen_enough = 1;
656                 return 1;
657         }
658
659         aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
660         aep->a_valuelen = valuelen;
661         memcpy(aep->a_name, name, namelen);
662         aep->a_name[ namelen ] = 0;
663         context->alist->al_offset[ context->count++ ] = context->firstu;
664         context->alist->al_count = context->count;
665         xfs_attr_trace_l_c("add", context);
666         return 0;
667 }
668
669 STATIC int
670 xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
671                      char *name, int namelen,
672                      int valuelen, char *value)
673 {
674         char *offset;
675         int arraytop;
676
677         ASSERT(context->count >= 0);
678
679         arraytop = context->count + namesp->attr_namelen + namelen + 1;
680         if (arraytop > context->firstu) {
681                 context->count = -1;    /* insufficient space */
682                 return 1;
683         }
684         offset = (char *)context->alist + context->count;
685         strncpy(offset, namesp->attr_name, namesp->attr_namelen);
686         offset += namesp->attr_namelen;
687         strncpy(offset, name, namelen);                 /* real name */
688         offset += namelen;
689         *offset = '\0';
690         context->count += namesp->attr_namelen + namelen + 1;
691         return 0;
692 }
693
694 /*ARGSUSED*/
695 STATIC int
696 xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
697                      char *name, int namelen,
698                      int valuelen, char *value)
699 {
700         context->count += namesp->attr_namelen + namelen + 1;
701         return 0;
702 }
703
704 /*
705  * Generate a list of extended attribute names and optionally
706  * also value lengths.  Positive return value follows the XFS
707  * convention of being an error, zero or negative return code
708  * is the length of the buffer returned (negated), indicating
709  * success.
710  */
711 int
712 xfs_attr_list(
713         xfs_inode_t     *dp,
714         char            *buffer,
715         int             bufsize,
716         int             flags,
717         attrlist_cursor_kern_t *cursor)
718 {
719         xfs_attr_list_context_t context;
720         int error;
721
722         XFS_STATS_INC(xs_attr_list);
723
724         /*
725          * Validate the cursor.
726          */
727         if (cursor->pad1 || cursor->pad2)
728                 return(XFS_ERROR(EINVAL));
729         if ((cursor->initted == 0) &&
730             (cursor->hashval || cursor->blkno || cursor->offset))
731                 return XFS_ERROR(EINVAL);
732
733         /*
734          * Check for a properly aligned buffer.
735          */
736         if (((long)buffer) & (sizeof(int)-1))
737                 return XFS_ERROR(EFAULT);
738         if (flags & ATTR_KERNOVAL)
739                 bufsize = 0;
740
741         /*
742          * Initialize the output buffer.
743          */
744         context.dp = dp;
745         context.cursor = cursor;
746         context.count = 0;
747         context.dupcnt = 0;
748         context.resynch = 1;
749         context.flags = flags;
750         context.seen_enough = 0;
751         context.alist = (attrlist_t *)buffer;
752         context.put_value = 0;
753
754         if (flags & ATTR_KERNAMELS) {
755                 context.bufsize = bufsize;
756                 context.firstu = context.bufsize;
757                 if (flags & ATTR_KERNOVAL)
758                         context.put_listent = xfs_attr_kern_list_sizes;
759                 else
760                         context.put_listent = xfs_attr_kern_list;
761         } else {
762                 context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
763                 context.firstu = context.bufsize;
764                 context.alist->al_count = 0;
765                 context.alist->al_more = 0;
766                 context.alist->al_offset[0] = context.bufsize;
767                 context.put_listent = xfs_attr_put_listent;
768         }
769
770         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
771                 return EIO;
772
773         xfs_ilock(dp, XFS_ILOCK_SHARED);
774         xfs_attr_trace_l_c("syscall start", &context);
775
776         error = xfs_attr_list_int(&context);
777
778         xfs_iunlock(dp, XFS_ILOCK_SHARED);
779         xfs_attr_trace_l_c("syscall end", &context);
780
781         if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
782                 /* must return negated buffer size or the error */
783                 if (context.count < 0)
784                         error = XFS_ERROR(ERANGE);
785                 else
786                         error = -context.count;
787         } else
788                 ASSERT(error >= 0);
789
790         return error;
791 }
792
793 int                                                             /* error */
794 xfs_attr_inactive(xfs_inode_t *dp)
795 {
796         xfs_trans_t *trans;
797         xfs_mount_t *mp;
798         int error;
799
800         mp = dp->i_mount;
801         ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
802
803         xfs_ilock(dp, XFS_ILOCK_SHARED);
804         if ((XFS_IFORK_Q(dp) == 0) ||
805             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
806             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
807              dp->i_d.di_anextents == 0)) {
808                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
809                 return(0);
810         }
811         xfs_iunlock(dp, XFS_ILOCK_SHARED);
812
813         /*
814          * Start our first transaction of the day.
815          *
816          * All future transactions during this code must be "chained" off
817          * this one via the trans_dup() call.  All transactions will contain
818          * the inode, and the inode will always be marked with trans_ihold().
819          * Since the inode will be locked in all transactions, we must log
820          * the inode in every transaction to let it float upward through
821          * the log.
822          */
823         trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
824         if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
825                                       XFS_TRANS_PERM_LOG_RES,
826                                       XFS_ATTRINVAL_LOG_COUNT))) {
827                 xfs_trans_cancel(trans, 0);
828                 return(error);
829         }
830         xfs_ilock(dp, XFS_ILOCK_EXCL);
831
832         /*
833          * No need to make quota reservations here. We expect to release some
834          * blocks, not allocate, in the common case.
835          */
836         xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
837         xfs_trans_ihold(trans, dp);
838
839         /*
840          * Decide on what work routines to call based on the inode size.
841          */
842         if ((XFS_IFORK_Q(dp) == 0) ||
843             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
844             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
845              dp->i_d.di_anextents == 0)) {
846                 error = 0;
847                 goto out;
848         }
849         error = xfs_attr_root_inactive(&trans, dp);
850         if (error)
851                 goto out;
852         /*
853          * signal synchronous inactive transactions unless this
854          * is a synchronous mount filesystem in which case we
855          * know that we're here because we've been called out of
856          * xfs_inactive which means that the last reference is gone
857          * and the unlink transaction has already hit the disk so
858          * async inactive transactions are safe.
859          */
860         if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
861                                 (!(mp->m_flags & XFS_MOUNT_WSYNC)
862                                  ? 1 : 0))))
863                 goto out;
864
865         /*
866          * Commit the last in the sequence of transactions.
867          */
868         xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
869         error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
870         xfs_iunlock(dp, XFS_ILOCK_EXCL);
871
872         return(error);
873
874 out:
875         xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
876         xfs_iunlock(dp, XFS_ILOCK_EXCL);
877         return(error);
878 }
879
880
881
882 /*========================================================================
883  * External routines when attribute list is inside the inode
884  *========================================================================*/
885
886 /*
887  * Add a name to the shortform attribute list structure
888  * This is the external routine.
889  */
890 STATIC int
891 xfs_attr_shortform_addname(xfs_da_args_t *args)
892 {
893         int newsize, forkoff, retval;
894
895         retval = xfs_attr_shortform_lookup(args);
896         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
897                 return(retval);
898         } else if (retval == EEXIST) {
899                 if (args->flags & ATTR_CREATE)
900                         return(retval);
901                 retval = xfs_attr_shortform_remove(args);
902                 ASSERT(retval == 0);
903         }
904
905         if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
906             args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
907                 return(XFS_ERROR(ENOSPC));
908
909         newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
910         newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
911
912         forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
913         if (!forkoff)
914                 return(XFS_ERROR(ENOSPC));
915
916         xfs_attr_shortform_add(args, forkoff);
917         return(0);
918 }
919
920
921 /*========================================================================
922  * External routines when attribute list is one block
923  *========================================================================*/
924
925 /*
926  * Add a name to the leaf attribute list structure
927  *
928  * This leaf block cannot have a "remote" value, we only call this routine
929  * if bmap_one_block() says there is only one block (ie: no remote blks).
930  */
931 int
932 xfs_attr_leaf_addname(xfs_da_args_t *args)
933 {
934         xfs_inode_t *dp;
935         xfs_dabuf_t *bp;
936         int retval, error, committed, forkoff;
937
938         /*
939          * Read the (only) block in the attribute list in.
940          */
941         dp = args->dp;
942         args->blkno = 0;
943         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
944                                              XFS_ATTR_FORK);
945         if (error)
946                 return(error);
947         ASSERT(bp != NULL);
948
949         /*
950          * Look up the given attribute in the leaf block.  Figure out if
951          * the given flags produce an error or call for an atomic rename.
952          */
953         retval = xfs_attr_leaf_lookup_int(bp, args);
954         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
955                 xfs_da_brelse(args->trans, bp);
956                 return(retval);
957         } else if (retval == EEXIST) {
958                 if (args->flags & ATTR_CREATE) {        /* pure create op */
959                         xfs_da_brelse(args->trans, bp);
960                         return(retval);
961                 }
962                 args->rename = 1;                       /* an atomic rename */
963                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
964                 args->index2 = args->index;
965                 args->rmtblkno2 = args->rmtblkno;
966                 args->rmtblkcnt2 = args->rmtblkcnt;
967         }
968
969         /*
970          * Add the attribute to the leaf block, transitioning to a Btree
971          * if required.
972          */
973         retval = xfs_attr_leaf_add(bp, args);
974         xfs_da_buf_done(bp);
975         if (retval == ENOSPC) {
976                 /*
977                  * Promote the attribute list to the Btree format, then
978                  * Commit that transaction so that the node_addname() call
979                  * can manage its own transactions.
980                  */
981                 XFS_BMAP_INIT(args->flist, args->firstblock);
982                 error = xfs_attr_leaf_to_node(args);
983                 if (!error) {
984                         error = xfs_bmap_finish(&args->trans, args->flist,
985                                                 &committed);
986                 }
987                 if (error) {
988                         ASSERT(committed);
989                         args->trans = NULL;
990                         xfs_bmap_cancel(args->flist);
991                         return(error);
992                 }
993
994                 /*
995                  * bmap_finish() may have committed the last trans and started
996                  * a new one.  We need the inode to be in all transactions.
997                  */
998                 if (committed) {
999                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1000                         xfs_trans_ihold(args->trans, dp);
1001                 }
1002
1003                 /*
1004                  * Commit the current trans (including the inode) and start
1005                  * a new one.
1006                  */
1007                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1008                         return (error);
1009
1010                 /*
1011                  * Fob the whole rest of the problem off on the Btree code.
1012                  */
1013                 error = xfs_attr_node_addname(args);
1014                 return(error);
1015         }
1016
1017         /*
1018          * Commit the transaction that added the attr name so that
1019          * later routines can manage their own transactions.
1020          */
1021         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1022                 return (error);
1023
1024         /*
1025          * If there was an out-of-line value, allocate the blocks we
1026          * identified for its storage and copy the value.  This is done
1027          * after we create the attribute so that we don't overflow the
1028          * maximum size of a transaction and/or hit a deadlock.
1029          */
1030         if (args->rmtblkno > 0) {
1031                 error = xfs_attr_rmtval_set(args);
1032                 if (error)
1033                         return(error);
1034         }
1035
1036         /*
1037          * If this is an atomic rename operation, we must "flip" the
1038          * incomplete flags on the "new" and "old" attribute/value pairs
1039          * so that one disappears and one appears atomically.  Then we
1040          * must remove the "old" attribute/value pair.
1041          */
1042         if (args->rename) {
1043                 /*
1044                  * In a separate transaction, set the incomplete flag on the
1045                  * "old" attr and clear the incomplete flag on the "new" attr.
1046                  */
1047                 error = xfs_attr_leaf_flipflags(args);
1048                 if (error)
1049                         return(error);
1050
1051                 /*
1052                  * Dismantle the "old" attribute/value pair by removing
1053                  * a "remote" value (if it exists).
1054                  */
1055                 args->index = args->index2;
1056                 args->blkno = args->blkno2;
1057                 args->rmtblkno = args->rmtblkno2;
1058                 args->rmtblkcnt = args->rmtblkcnt2;
1059                 if (args->rmtblkno) {
1060                         error = xfs_attr_rmtval_remove(args);
1061                         if (error)
1062                                 return(error);
1063                 }
1064
1065                 /*
1066                  * Read in the block containing the "old" attr, then
1067                  * remove the "old" attr from that block (neat, huh!)
1068                  */
1069                 error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
1070                                                      &bp, XFS_ATTR_FORK);
1071                 if (error)
1072                         return(error);
1073                 ASSERT(bp != NULL);
1074                 (void)xfs_attr_leaf_remove(bp, args);
1075
1076                 /*
1077                  * If the result is small enough, shrink it all into the inode.
1078                  */
1079                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1080                         XFS_BMAP_INIT(args->flist, args->firstblock);
1081                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1082                         /* bp is gone due to xfs_da_shrink_inode */
1083                         if (!error) {
1084                                 error = xfs_bmap_finish(&args->trans,
1085                                                         args->flist,
1086                                                         &committed);
1087                         }
1088                         if (error) {
1089                                 ASSERT(committed);
1090                                 args->trans = NULL;
1091                                 xfs_bmap_cancel(args->flist);
1092                                 return(error);
1093                         }
1094
1095                         /*
1096                          * bmap_finish() may have committed the last trans
1097                          * and started a new one.  We need the inode to be
1098                          * in all transactions.
1099                          */
1100                         if (committed) {
1101                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1102                                 xfs_trans_ihold(args->trans, dp);
1103                         }
1104                 } else
1105                         xfs_da_buf_done(bp);
1106
1107                 /*
1108                  * Commit the remove and start the next trans in series.
1109                  */
1110                 error = xfs_attr_rolltrans(&args->trans, dp);
1111
1112         } else if (args->rmtblkno > 0) {
1113                 /*
1114                  * Added a "remote" value, just clear the incomplete flag.
1115                  */
1116                 error = xfs_attr_leaf_clearflag(args);
1117         }
1118         return(error);
1119 }
1120
1121 /*
1122  * Remove a name from the leaf attribute list structure
1123  *
1124  * This leaf block cannot have a "remote" value, we only call this routine
1125  * if bmap_one_block() says there is only one block (ie: no remote blks).
1126  */
1127 STATIC int
1128 xfs_attr_leaf_removename(xfs_da_args_t *args)
1129 {
1130         xfs_inode_t *dp;
1131         xfs_dabuf_t *bp;
1132         int error, committed, forkoff;
1133
1134         /*
1135          * Remove the attribute.
1136          */
1137         dp = args->dp;
1138         args->blkno = 0;
1139         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1140                                              XFS_ATTR_FORK);
1141         if (error) {
1142                 return(error);
1143         }
1144
1145         ASSERT(bp != NULL);
1146         error = xfs_attr_leaf_lookup_int(bp, args);
1147         if (error == ENOATTR) {
1148                 xfs_da_brelse(args->trans, bp);
1149                 return(error);
1150         }
1151
1152         (void)xfs_attr_leaf_remove(bp, args);
1153
1154         /*
1155          * If the result is small enough, shrink it all into the inode.
1156          */
1157         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1158                 XFS_BMAP_INIT(args->flist, args->firstblock);
1159                 error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1160                 /* bp is gone due to xfs_da_shrink_inode */
1161                 if (!error) {
1162                         error = xfs_bmap_finish(&args->trans, args->flist,
1163                                                 &committed);
1164                 }
1165                 if (error) {
1166                         ASSERT(committed);
1167                         args->trans = NULL;
1168                         xfs_bmap_cancel(args->flist);
1169                         return(error);
1170                 }
1171
1172                 /*
1173                  * bmap_finish() may have committed the last trans and started
1174                  * a new one.  We need the inode to be in all transactions.
1175                  */
1176                 if (committed) {
1177                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1178                         xfs_trans_ihold(args->trans, dp);
1179                 }
1180         } else
1181                 xfs_da_buf_done(bp);
1182         return(0);
1183 }
1184
1185 /*
1186  * Look up a name in a leaf attribute list structure.
1187  *
1188  * This leaf block cannot have a "remote" value, we only call this routine
1189  * if bmap_one_block() says there is only one block (ie: no remote blks).
1190  */
1191 STATIC int
1192 xfs_attr_leaf_get(xfs_da_args_t *args)
1193 {
1194         xfs_dabuf_t *bp;
1195         int error;
1196
1197         args->blkno = 0;
1198         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1199                                              XFS_ATTR_FORK);
1200         if (error)
1201                 return(error);
1202         ASSERT(bp != NULL);
1203
1204         error = xfs_attr_leaf_lookup_int(bp, args);
1205         if (error != EEXIST)  {
1206                 xfs_da_brelse(args->trans, bp);
1207                 return(error);
1208         }
1209         error = xfs_attr_leaf_getvalue(bp, args);
1210         xfs_da_brelse(args->trans, bp);
1211         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1212                 error = xfs_attr_rmtval_get(args);
1213         }
1214         return(error);
1215 }
1216
1217 /*
1218  * Copy out attribute entries for attr_list(), for leaf attribute lists.
1219  */
1220 STATIC int
1221 xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1222 {
1223         xfs_attr_leafblock_t *leaf;
1224         int error;
1225         xfs_dabuf_t *bp;
1226
1227         context->cursor->blkno = 0;
1228         error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1229         if (error)
1230                 return XFS_ERROR(error);
1231         ASSERT(bp != NULL);
1232         leaf = bp->data;
1233         if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1234                 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1235                                      context->dp->i_mount, leaf);
1236                 xfs_da_brelse(NULL, bp);
1237                 return XFS_ERROR(EFSCORRUPTED);
1238         }
1239
1240         error = xfs_attr_leaf_list_int(bp, context);
1241         xfs_da_brelse(NULL, bp);
1242         return XFS_ERROR(error);
1243 }
1244
1245
1246 /*========================================================================
1247  * External routines when attribute list size > XFS_LBSIZE(mp).
1248  *========================================================================*/
1249
1250 /*
1251  * Add a name to a Btree-format attribute list.
1252  *
1253  * This will involve walking down the Btree, and may involve splitting
1254  * leaf nodes and even splitting intermediate nodes up to and including
1255  * the root node (a special case of an intermediate node).
1256  *
1257  * "Remote" attribute values confuse the issue and atomic rename operations
1258  * add a whole extra layer of confusion on top of that.
1259  */
1260 STATIC int
1261 xfs_attr_node_addname(xfs_da_args_t *args)
1262 {
1263         xfs_da_state_t *state;
1264         xfs_da_state_blk_t *blk;
1265         xfs_inode_t *dp;
1266         xfs_mount_t *mp;
1267         int committed, retval, error;
1268
1269         /*
1270          * Fill in bucket of arguments/results/context to carry around.
1271          */
1272         dp = args->dp;
1273         mp = dp->i_mount;
1274 restart:
1275         state = xfs_da_state_alloc();
1276         state->args = args;
1277         state->mp = mp;
1278         state->blocksize = state->mp->m_sb.sb_blocksize;
1279         state->node_ents = state->mp->m_attr_node_ents;
1280
1281         /*
1282          * Search to see if name already exists, and get back a pointer
1283          * to where it should go.
1284          */
1285         error = xfs_da_node_lookup_int(state, &retval);
1286         if (error)
1287                 goto out;
1288         blk = &state->path.blk[ state->path.active-1 ];
1289         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1290         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
1291                 goto out;
1292         } else if (retval == EEXIST) {
1293                 if (args->flags & ATTR_CREATE)
1294                         goto out;
1295                 args->rename = 1;                       /* atomic rename op */
1296                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
1297                 args->index2 = args->index;
1298                 args->rmtblkno2 = args->rmtblkno;
1299                 args->rmtblkcnt2 = args->rmtblkcnt;
1300                 args->rmtblkno = 0;
1301                 args->rmtblkcnt = 0;
1302         }
1303
1304         retval = xfs_attr_leaf_add(blk->bp, state->args);
1305         if (retval == ENOSPC) {
1306                 if (state->path.active == 1) {
1307                         /*
1308                          * Its really a single leaf node, but it had
1309                          * out-of-line values so it looked like it *might*
1310                          * have been a b-tree.
1311                          */
1312                         xfs_da_state_free(state);
1313                         XFS_BMAP_INIT(args->flist, args->firstblock);
1314                         error = xfs_attr_leaf_to_node(args);
1315                         if (!error) {
1316                                 error = xfs_bmap_finish(&args->trans,
1317                                                         args->flist,
1318                                                         &committed);
1319                         }
1320                         if (error) {
1321                                 ASSERT(committed);
1322                                 args->trans = NULL;
1323                                 xfs_bmap_cancel(args->flist);
1324                                 goto out;
1325                         }
1326
1327                         /*
1328                          * bmap_finish() may have committed the last trans
1329                          * and started a new one.  We need the inode to be
1330                          * in all transactions.
1331                          */
1332                         if (committed) {
1333                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1334                                 xfs_trans_ihold(args->trans, dp);
1335                         }
1336
1337                         /*
1338                          * Commit the node conversion and start the next
1339                          * trans in the chain.
1340                          */
1341                         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1342                                 goto out;
1343
1344                         goto restart;
1345                 }
1346
1347                 /*
1348                  * Split as many Btree elements as required.
1349                  * This code tracks the new and old attr's location
1350                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
1351                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1352                  */
1353                 XFS_BMAP_INIT(args->flist, args->firstblock);
1354                 error = xfs_da_split(state);
1355                 if (!error) {
1356                         error = xfs_bmap_finish(&args->trans, args->flist,
1357                                                 &committed);
1358                 }
1359                 if (error) {
1360                         ASSERT(committed);
1361                         args->trans = NULL;
1362                         xfs_bmap_cancel(args->flist);
1363                         goto out;
1364                 }
1365
1366                 /*
1367                  * bmap_finish() may have committed the last trans and started
1368                  * a new one.  We need the inode to be in all transactions.
1369                  */
1370                 if (committed) {
1371                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1372                         xfs_trans_ihold(args->trans, dp);
1373                 }
1374         } else {
1375                 /*
1376                  * Addition succeeded, update Btree hashvals.
1377                  */
1378                 xfs_da_fixhashpath(state, &state->path);
1379         }
1380
1381         /*
1382          * Kill the state structure, we're done with it and need to
1383          * allow the buffers to come back later.
1384          */
1385         xfs_da_state_free(state);
1386         state = NULL;
1387
1388         /*
1389          * Commit the leaf addition or btree split and start the next
1390          * trans in the chain.
1391          */
1392         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1393                 goto out;
1394
1395         /*
1396          * If there was an out-of-line value, allocate the blocks we
1397          * identified for its storage and copy the value.  This is done
1398          * after we create the attribute so that we don't overflow the
1399          * maximum size of a transaction and/or hit a deadlock.
1400          */
1401         if (args->rmtblkno > 0) {
1402                 error = xfs_attr_rmtval_set(args);
1403                 if (error)
1404                         return(error);
1405         }
1406
1407         /*
1408          * If this is an atomic rename operation, we must "flip" the
1409          * incomplete flags on the "new" and "old" attribute/value pairs
1410          * so that one disappears and one appears atomically.  Then we
1411          * must remove the "old" attribute/value pair.
1412          */
1413         if (args->rename) {
1414                 /*
1415                  * In a separate transaction, set the incomplete flag on the
1416                  * "old" attr and clear the incomplete flag on the "new" attr.
1417                  */
1418                 error = xfs_attr_leaf_flipflags(args);
1419                 if (error)
1420                         goto out;
1421
1422                 /*
1423                  * Dismantle the "old" attribute/value pair by removing
1424                  * a "remote" value (if it exists).
1425                  */
1426                 args->index = args->index2;
1427                 args->blkno = args->blkno2;
1428                 args->rmtblkno = args->rmtblkno2;
1429                 args->rmtblkcnt = args->rmtblkcnt2;
1430                 if (args->rmtblkno) {
1431                         error = xfs_attr_rmtval_remove(args);
1432                         if (error)
1433                                 return(error);
1434                 }
1435
1436                 /*
1437                  * Re-find the "old" attribute entry after any split ops.
1438                  * The INCOMPLETE flag means that we will find the "old"
1439                  * attr, not the "new" one.
1440                  */
1441                 args->flags |= XFS_ATTR_INCOMPLETE;
1442                 state = xfs_da_state_alloc();
1443                 state->args = args;
1444                 state->mp = mp;
1445                 state->blocksize = state->mp->m_sb.sb_blocksize;
1446                 state->node_ents = state->mp->m_attr_node_ents;
1447                 state->inleaf = 0;
1448                 error = xfs_da_node_lookup_int(state, &retval);
1449                 if (error)
1450                         goto out;
1451
1452                 /*
1453                  * Remove the name and update the hashvals in the tree.
1454                  */
1455                 blk = &state->path.blk[ state->path.active-1 ];
1456                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1457                 error = xfs_attr_leaf_remove(blk->bp, args);
1458                 xfs_da_fixhashpath(state, &state->path);
1459
1460                 /*
1461                  * Check to see if the tree needs to be collapsed.
1462                  */
1463                 if (retval && (state->path.active > 1)) {
1464                         XFS_BMAP_INIT(args->flist, args->firstblock);
1465                         error = xfs_da_join(state);
1466                         if (!error) {
1467                                 error = xfs_bmap_finish(&args->trans,
1468                                                         args->flist,
1469                                                         &committed);
1470                         }
1471                         if (error) {
1472                                 ASSERT(committed);
1473                                 args->trans = NULL;
1474                                 xfs_bmap_cancel(args->flist);
1475                                 goto out;
1476                         }
1477
1478                         /*
1479                          * bmap_finish() may have committed the last trans
1480                          * and started a new one.  We need the inode to be
1481                          * in all transactions.
1482                          */
1483                         if (committed) {
1484                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1485                                 xfs_trans_ihold(args->trans, dp);
1486                         }
1487                 }
1488
1489                 /*
1490                  * Commit and start the next trans in the chain.
1491                  */
1492                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1493                         goto out;
1494
1495         } else if (args->rmtblkno > 0) {
1496                 /*
1497                  * Added a "remote" value, just clear the incomplete flag.
1498                  */
1499                 error = xfs_attr_leaf_clearflag(args);
1500                 if (error)
1501                         goto out;
1502         }
1503         retval = error = 0;
1504
1505 out:
1506         if (state)
1507                 xfs_da_state_free(state);
1508         if (error)
1509                 return(error);
1510         return(retval);
1511 }
1512
1513 /*
1514  * Remove a name from a B-tree attribute list.
1515  *
1516  * This will involve walking down the Btree, and may involve joining
1517  * leaf nodes and even joining intermediate nodes up to and including
1518  * the root node (a special case of an intermediate node).
1519  */
1520 STATIC int
1521 xfs_attr_node_removename(xfs_da_args_t *args)
1522 {
1523         xfs_da_state_t *state;
1524         xfs_da_state_blk_t *blk;
1525         xfs_inode_t *dp;
1526         xfs_dabuf_t *bp;
1527         int retval, error, committed, forkoff;
1528
1529         /*
1530          * Tie a string around our finger to remind us where we are.
1531          */
1532         dp = args->dp;
1533         state = xfs_da_state_alloc();
1534         state->args = args;
1535         state->mp = dp->i_mount;
1536         state->blocksize = state->mp->m_sb.sb_blocksize;
1537         state->node_ents = state->mp->m_attr_node_ents;
1538
1539         /*
1540          * Search to see if name exists, and get back a pointer to it.
1541          */
1542         error = xfs_da_node_lookup_int(state, &retval);
1543         if (error || (retval != EEXIST)) {
1544                 if (error == 0)
1545                         error = retval;
1546                 goto out;
1547         }
1548
1549         /*
1550          * If there is an out-of-line value, de-allocate the blocks.
1551          * This is done before we remove the attribute so that we don't
1552          * overflow the maximum size of a transaction and/or hit a deadlock.
1553          */
1554         blk = &state->path.blk[ state->path.active-1 ];
1555         ASSERT(blk->bp != NULL);
1556         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1557         if (args->rmtblkno > 0) {
1558                 /*
1559                  * Fill in disk block numbers in the state structure
1560                  * so that we can get the buffers back after we commit
1561                  * several transactions in the following calls.
1562                  */
1563                 error = xfs_attr_fillstate(state);
1564                 if (error)
1565                         goto out;
1566
1567                 /*
1568                  * Mark the attribute as INCOMPLETE, then bunmapi() the
1569                  * remote value.
1570                  */
1571                 error = xfs_attr_leaf_setflag(args);
1572                 if (error)
1573                         goto out;
1574                 error = xfs_attr_rmtval_remove(args);
1575                 if (error)
1576                         goto out;
1577
1578                 /*
1579                  * Refill the state structure with buffers, the prior calls
1580                  * released our buffers.
1581                  */
1582                 error = xfs_attr_refillstate(state);
1583                 if (error)
1584                         goto out;
1585         }
1586
1587         /*
1588          * Remove the name and update the hashvals in the tree.
1589          */
1590         blk = &state->path.blk[ state->path.active-1 ];
1591         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1592         retval = xfs_attr_leaf_remove(blk->bp, args);
1593         xfs_da_fixhashpath(state, &state->path);
1594
1595         /*
1596          * Check to see if the tree needs to be collapsed.
1597          */
1598         if (retval && (state->path.active > 1)) {
1599                 XFS_BMAP_INIT(args->flist, args->firstblock);
1600                 error = xfs_da_join(state);
1601                 if (!error) {
1602                         error = xfs_bmap_finish(&args->trans, args->flist,
1603                                                 &committed);
1604                 }
1605                 if (error) {
1606                         ASSERT(committed);
1607                         args->trans = NULL;
1608                         xfs_bmap_cancel(args->flist);
1609                         goto out;
1610                 }
1611
1612                 /*
1613                  * bmap_finish() may have committed the last trans and started
1614                  * a new one.  We need the inode to be in all transactions.
1615                  */
1616                 if (committed) {
1617                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1618                         xfs_trans_ihold(args->trans, dp);
1619                 }
1620
1621                 /*
1622                  * Commit the Btree join operation and start a new trans.
1623                  */
1624                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1625                         goto out;
1626         }
1627
1628         /*
1629          * If the result is small enough, push it all into the inode.
1630          */
1631         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1632                 /*
1633                  * Have to get rid of the copy of this dabuf in the state.
1634                  */
1635                 ASSERT(state->path.active == 1);
1636                 ASSERT(state->path.blk[0].bp);
1637                 xfs_da_buf_done(state->path.blk[0].bp);
1638                 state->path.blk[0].bp = NULL;
1639
1640                 error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
1641                                                      XFS_ATTR_FORK);
1642                 if (error)
1643                         goto out;
1644                 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
1645                                       bp->data)->hdr.info.magic)
1646                                                        == XFS_ATTR_LEAF_MAGIC);
1647
1648                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1649                         XFS_BMAP_INIT(args->flist, args->firstblock);
1650                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1651                         /* bp is gone due to xfs_da_shrink_inode */
1652                         if (!error) {
1653                                 error = xfs_bmap_finish(&args->trans,
1654                                                         args->flist,
1655                                                         &committed);
1656                         }
1657                         if (error) {
1658                                 ASSERT(committed);
1659                                 args->trans = NULL;
1660                                 xfs_bmap_cancel(args->flist);
1661                                 goto out;
1662                         }
1663
1664                         /*
1665                          * bmap_finish() may have committed the last trans
1666                          * and started a new one.  We need the inode to be
1667                          * in all transactions.
1668                          */
1669                         if (committed) {
1670                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1671                                 xfs_trans_ihold(args->trans, dp);
1672                         }
1673                 } else
1674                         xfs_da_brelse(args->trans, bp);
1675         }
1676         error = 0;
1677
1678 out:
1679         xfs_da_state_free(state);
1680         return(error);
1681 }
1682
1683 /*
1684  * Fill in the disk block numbers in the state structure for the buffers
1685  * that are attached to the state structure.
1686  * This is done so that we can quickly reattach ourselves to those buffers
1687  * after some set of transaction commits have released these buffers.
1688  */
1689 STATIC int
1690 xfs_attr_fillstate(xfs_da_state_t *state)
1691 {
1692         xfs_da_state_path_t *path;
1693         xfs_da_state_blk_t *blk;
1694         int level;
1695
1696         /*
1697          * Roll down the "path" in the state structure, storing the on-disk
1698          * block number for those buffers in the "path".
1699          */
1700         path = &state->path;
1701         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1702         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1703                 if (blk->bp) {
1704                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1705                         xfs_da_buf_done(blk->bp);
1706                         blk->bp = NULL;
1707                 } else {
1708                         blk->disk_blkno = 0;
1709                 }
1710         }
1711
1712         /*
1713          * Roll down the "altpath" in the state structure, storing the on-disk
1714          * block number for those buffers in the "altpath".
1715          */
1716         path = &state->altpath;
1717         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1718         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1719                 if (blk->bp) {
1720                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1721                         xfs_da_buf_done(blk->bp);
1722                         blk->bp = NULL;
1723                 } else {
1724                         blk->disk_blkno = 0;
1725                 }
1726         }
1727
1728         return(0);
1729 }
1730
1731 /*
1732  * Reattach the buffers to the state structure based on the disk block
1733  * numbers stored in the state structure.
1734  * This is done after some set of transaction commits have released those
1735  * buffers from our grip.
1736  */
1737 STATIC int
1738 xfs_attr_refillstate(xfs_da_state_t *state)
1739 {
1740         xfs_da_state_path_t *path;
1741         xfs_da_state_blk_t *blk;
1742         int level, error;
1743
1744         /*
1745          * Roll down the "path" in the state structure, storing the on-disk
1746          * block number for those buffers in the "path".
1747          */
1748         path = &state->path;
1749         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1750         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1751                 if (blk->disk_blkno) {
1752                         error = xfs_da_read_buf(state->args->trans,
1753                                                 state->args->dp,
1754                                                 blk->blkno, blk->disk_blkno,
1755                                                 &blk->bp, XFS_ATTR_FORK);
1756                         if (error)
1757                                 return(error);
1758                 } else {
1759                         blk->bp = NULL;
1760                 }
1761         }
1762
1763         /*
1764          * Roll down the "altpath" in the state structure, storing the on-disk
1765          * block number for those buffers in the "altpath".
1766          */
1767         path = &state->altpath;
1768         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1769         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1770                 if (blk->disk_blkno) {
1771                         error = xfs_da_read_buf(state->args->trans,
1772                                                 state->args->dp,
1773                                                 blk->blkno, blk->disk_blkno,
1774                                                 &blk->bp, XFS_ATTR_FORK);
1775                         if (error)
1776                                 return(error);
1777                 } else {
1778                         blk->bp = NULL;
1779                 }
1780         }
1781
1782         return(0);
1783 }
1784
1785 /*
1786  * Look up a filename in a node attribute list.
1787  *
1788  * This routine gets called for any attribute fork that has more than one
1789  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1790  * "remote" values taking up more blocks.
1791  */
1792 STATIC int
1793 xfs_attr_node_get(xfs_da_args_t *args)
1794 {
1795         xfs_da_state_t *state;
1796         xfs_da_state_blk_t *blk;
1797         int error, retval;
1798         int i;
1799
1800         state = xfs_da_state_alloc();
1801         state->args = args;
1802         state->mp = args->dp->i_mount;
1803         state->blocksize = state->mp->m_sb.sb_blocksize;
1804         state->node_ents = state->mp->m_attr_node_ents;
1805
1806         /*
1807          * Search to see if name exists, and get back a pointer to it.
1808          */
1809         error = xfs_da_node_lookup_int(state, &retval);
1810         if (error) {
1811                 retval = error;
1812         } else if (retval == EEXIST) {
1813                 blk = &state->path.blk[ state->path.active-1 ];
1814                 ASSERT(blk->bp != NULL);
1815                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1816
1817                 /*
1818                  * Get the value, local or "remote"
1819                  */
1820                 retval = xfs_attr_leaf_getvalue(blk->bp, args);
1821                 if (!retval && (args->rmtblkno > 0)
1822                     && !(args->flags & ATTR_KERNOVAL)) {
1823                         retval = xfs_attr_rmtval_get(args);
1824                 }
1825         }
1826
1827         /*
1828          * If not in a transaction, we have to release all the buffers.
1829          */
1830         for (i = 0; i < state->path.active; i++) {
1831                 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1832                 state->path.blk[i].bp = NULL;
1833         }
1834
1835         xfs_da_state_free(state);
1836         return(retval);
1837 }
1838
1839 STATIC int                                                      /* error */
1840 xfs_attr_node_list(xfs_attr_list_context_t *context)
1841 {
1842         attrlist_cursor_kern_t *cursor;
1843         xfs_attr_leafblock_t *leaf;
1844         xfs_da_intnode_t *node;
1845         xfs_da_node_entry_t *btree;
1846         int error, i;
1847         xfs_dabuf_t *bp;
1848
1849         cursor = context->cursor;
1850         cursor->initted = 1;
1851
1852         /*
1853          * Do all sorts of validation on the passed-in cursor structure.
1854          * If anything is amiss, ignore the cursor and look up the hashval
1855          * starting from the btree root.
1856          */
1857         bp = NULL;
1858         if (cursor->blkno > 0) {
1859                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1860                                               &bp, XFS_ATTR_FORK);
1861                 if ((error != 0) && (error != EFSCORRUPTED))
1862                         return(error);
1863                 if (bp) {
1864                         node = bp->data;
1865                         switch (be16_to_cpu(node->hdr.info.magic)) {
1866                         case XFS_DA_NODE_MAGIC:
1867                                 xfs_attr_trace_l_cn("wrong blk", context, node);
1868                                 xfs_da_brelse(NULL, bp);
1869                                 bp = NULL;
1870                                 break;
1871                         case XFS_ATTR_LEAF_MAGIC:
1872                                 leaf = bp->data;
1873                                 if (cursor->hashval > be32_to_cpu(leaf->entries[
1874                                     be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1875                                         xfs_attr_trace_l_cl("wrong blk",
1876                                                            context, leaf);
1877                                         xfs_da_brelse(NULL, bp);
1878                                         bp = NULL;
1879                                 } else if (cursor->hashval <=
1880                                              be32_to_cpu(leaf->entries[0].hashval)) {
1881                                         xfs_attr_trace_l_cl("maybe wrong blk",
1882                                                            context, leaf);
1883                                         xfs_da_brelse(NULL, bp);
1884                                         bp = NULL;
1885                                 }
1886                                 break;
1887                         default:
1888                                 xfs_attr_trace_l_c("wrong blk - ??", context);
1889                                 xfs_da_brelse(NULL, bp);
1890                                 bp = NULL;
1891                         }
1892                 }
1893         }
1894
1895         /*
1896          * We did not find what we expected given the cursor's contents,
1897          * so we start from the top and work down based on the hash value.
1898          * Note that start of node block is same as start of leaf block.
1899          */
1900         if (bp == NULL) {
1901                 cursor->blkno = 0;
1902                 for (;;) {
1903                         error = xfs_da_read_buf(NULL, context->dp,
1904                                                       cursor->blkno, -1, &bp,
1905                                                       XFS_ATTR_FORK);
1906                         if (error)
1907                                 return(error);
1908                         if (unlikely(bp == NULL)) {
1909                                 XFS_ERROR_REPORT("xfs_attr_node_list(2)",
1910                                                  XFS_ERRLEVEL_LOW,
1911                                                  context->dp->i_mount);
1912                                 return(XFS_ERROR(EFSCORRUPTED));
1913                         }
1914                         node = bp->data;
1915                         if (be16_to_cpu(node->hdr.info.magic)
1916                                                         == XFS_ATTR_LEAF_MAGIC)
1917                                 break;
1918                         if (unlikely(be16_to_cpu(node->hdr.info.magic)
1919                                                         != XFS_DA_NODE_MAGIC)) {
1920                                 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1921                                                      XFS_ERRLEVEL_LOW,
1922                                                      context->dp->i_mount,
1923                                                      node);
1924                                 xfs_da_brelse(NULL, bp);
1925                                 return(XFS_ERROR(EFSCORRUPTED));
1926                         }
1927                         btree = node->btree;
1928                         for (i = 0; i < be16_to_cpu(node->hdr.count);
1929                                                                 btree++, i++) {
1930                                 if (cursor->hashval
1931                                                 <= be32_to_cpu(btree->hashval)) {
1932                                         cursor->blkno = be32_to_cpu(btree->before);
1933                                         xfs_attr_trace_l_cb("descending",
1934                                                             context, btree);
1935                                         break;
1936                                 }
1937                         }
1938                         if (i == be16_to_cpu(node->hdr.count)) {
1939                                 xfs_da_brelse(NULL, bp);
1940                                 return(0);
1941                         }
1942                         xfs_da_brelse(NULL, bp);
1943                 }
1944         }
1945         ASSERT(bp != NULL);
1946
1947         /*
1948          * Roll upward through the blocks, processing each leaf block in
1949          * order.  As long as there is space in the result buffer, keep
1950          * adding the information.
1951          */
1952         for (;;) {
1953                 leaf = bp->data;
1954                 if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
1955                                                 != XFS_ATTR_LEAF_MAGIC)) {
1956                         XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1957                                              XFS_ERRLEVEL_LOW,
1958                                              context->dp->i_mount, leaf);
1959                         xfs_da_brelse(NULL, bp);
1960                         return(XFS_ERROR(EFSCORRUPTED));
1961                 }
1962                 error = xfs_attr_leaf_list_int(bp, context);
1963                 if (error) {
1964                         xfs_da_brelse(NULL, bp);
1965                         return error;
1966                 }
1967                 if (context->seen_enough || leaf->hdr.info.forw == 0)
1968                         break;
1969                 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1970                 xfs_da_brelse(NULL, bp);
1971                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1972                                               &bp, XFS_ATTR_FORK);
1973                 if (error)
1974                         return(error);
1975                 if (unlikely((bp == NULL))) {
1976                         XFS_ERROR_REPORT("xfs_attr_node_list(5)",
1977                                          XFS_ERRLEVEL_LOW,
1978                                          context->dp->i_mount);
1979                         return(XFS_ERROR(EFSCORRUPTED));
1980                 }
1981         }
1982         xfs_da_brelse(NULL, bp);
1983         return(0);
1984 }
1985
1986
1987 /*========================================================================
1988  * External routines for manipulating out-of-line attribute values.
1989  *========================================================================*/
1990
1991 /*
1992  * Read the value associated with an attribute from the out-of-line buffer
1993  * that we stored it in.
1994  */
1995 int
1996 xfs_attr_rmtval_get(xfs_da_args_t *args)
1997 {
1998         xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1999         xfs_mount_t *mp;
2000         xfs_daddr_t dblkno;
2001         xfs_caddr_t dst;
2002         xfs_buf_t *bp;
2003         int nmap, error, tmp, valuelen, blkcnt, i;
2004         xfs_dablk_t lblkno;
2005
2006         ASSERT(!(args->flags & ATTR_KERNOVAL));
2007
2008         mp = args->dp->i_mount;
2009         dst = args->value;
2010         valuelen = args->valuelen;
2011         lblkno = args->rmtblkno;
2012         while (valuelen > 0) {
2013                 nmap = ATTR_RMTVALUE_MAPSIZE;
2014                 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2015                                   args->rmtblkcnt,
2016                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2017                                   NULL, 0, map, &nmap, NULL, NULL);
2018                 if (error)
2019                         return(error);
2020                 ASSERT(nmap >= 1);
2021
2022                 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
2023                         ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
2024                                (map[i].br_startblock != HOLESTARTBLOCK));
2025                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2026                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2027                         error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2028                                              blkcnt, XFS_BUF_LOCK, &bp);
2029                         if (error)
2030                                 return(error);
2031
2032                         tmp = (valuelen < XFS_BUF_SIZE(bp))
2033                                 ? valuelen : XFS_BUF_SIZE(bp);
2034                         xfs_biomove(bp, 0, tmp, dst, XFS_B_READ);
2035                         xfs_buf_relse(bp);
2036                         dst += tmp;
2037                         valuelen -= tmp;
2038
2039                         lblkno += map[i].br_blockcount;
2040                 }
2041         }
2042         ASSERT(valuelen == 0);
2043         return(0);
2044 }
2045
2046 /*
2047  * Write the value associated with an attribute into the out-of-line buffer
2048  * that we have defined for it.
2049  */
2050 STATIC int
2051 xfs_attr_rmtval_set(xfs_da_args_t *args)
2052 {
2053         xfs_mount_t *mp;
2054         xfs_fileoff_t lfileoff;
2055         xfs_inode_t *dp;
2056         xfs_bmbt_irec_t map;
2057         xfs_daddr_t dblkno;
2058         xfs_caddr_t src;
2059         xfs_buf_t *bp;
2060         xfs_dablk_t lblkno;
2061         int blkcnt, valuelen, nmap, error, tmp, committed;
2062
2063         dp = args->dp;
2064         mp = dp->i_mount;
2065         src = args->value;
2066
2067         /*
2068          * Find a "hole" in the attribute address space large enough for
2069          * us to drop the new attribute's value into.
2070          */
2071         blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2072         lfileoff = 0;
2073         error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2074                                                    XFS_ATTR_FORK);
2075         if (error) {
2076                 return(error);
2077         }
2078         args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2079         args->rmtblkcnt = blkcnt;
2080
2081         /*
2082          * Roll through the "value", allocating blocks on disk as required.
2083          */
2084         while (blkcnt > 0) {
2085                 /*
2086                  * Allocate a single extent, up to the size of the value.
2087                  */
2088                 XFS_BMAP_INIT(args->flist, args->firstblock);
2089                 nmap = 1;
2090                 error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
2091                                   blkcnt,
2092                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2093                                                         XFS_BMAPI_WRITE,
2094                                   args->firstblock, args->total, &map, &nmap,
2095                                   args->flist, NULL);
2096                 if (!error) {
2097                         error = xfs_bmap_finish(&args->trans, args->flist,
2098                                                 &committed);
2099                 }
2100                 if (error) {
2101                         ASSERT(committed);
2102                         args->trans = NULL;
2103                         xfs_bmap_cancel(args->flist);
2104                         return(error);
2105                 }
2106
2107                 /*
2108                  * bmap_finish() may have committed the last trans and started
2109                  * a new one.  We need the inode to be in all transactions.
2110                  */
2111                 if (committed) {
2112                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
2113                         xfs_trans_ihold(args->trans, dp);
2114                 }
2115
2116                 ASSERT(nmap == 1);
2117                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2118                        (map.br_startblock != HOLESTARTBLOCK));
2119                 lblkno += map.br_blockcount;
2120                 blkcnt -= map.br_blockcount;
2121
2122                 /*
2123                  * Start the next trans in the chain.
2124                  */
2125                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
2126                         return (error);
2127         }
2128
2129         /*
2130          * Roll through the "value", copying the attribute value to the
2131          * already-allocated blocks.  Blocks are written synchronously
2132          * so that we can know they are all on disk before we turn off
2133          * the INCOMPLETE flag.
2134          */
2135         lblkno = args->rmtblkno;
2136         valuelen = args->valuelen;
2137         while (valuelen > 0) {
2138                 /*
2139                  * Try to remember where we decided to put the value.
2140                  */
2141                 XFS_BMAP_INIT(args->flist, args->firstblock);
2142                 nmap = 1;
2143                 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2144                                   args->rmtblkcnt,
2145                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2146                                   args->firstblock, 0, &map, &nmap,
2147                                   NULL, NULL);
2148                 if (error) {
2149                         return(error);
2150                 }
2151                 ASSERT(nmap == 1);
2152                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2153                        (map.br_startblock != HOLESTARTBLOCK));
2154
2155                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2156                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2157
2158                 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
2159                                                         blkcnt, XFS_BUF_LOCK);
2160                 ASSERT(bp);
2161                 ASSERT(!XFS_BUF_GETERROR(bp));
2162
2163                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2164                                                         XFS_BUF_SIZE(bp);
2165                 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE);
2166                 if (tmp < XFS_BUF_SIZE(bp))
2167                         xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2168                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2169                         return (error);
2170                 }
2171                 src += tmp;
2172                 valuelen -= tmp;
2173
2174                 lblkno += map.br_blockcount;
2175         }
2176         ASSERT(valuelen == 0);
2177         return(0);
2178 }
2179
2180 /*
2181  * Remove the value associated with an attribute by deleting the
2182  * out-of-line buffer that it is stored on.
2183  */
2184 STATIC int
2185 xfs_attr_rmtval_remove(xfs_da_args_t *args)
2186 {
2187         xfs_mount_t *mp;
2188         xfs_bmbt_irec_t map;
2189         xfs_buf_t *bp;
2190         xfs_daddr_t dblkno;
2191         xfs_dablk_t lblkno;
2192         int valuelen, blkcnt, nmap, error, done, committed;
2193
2194         mp = args->dp->i_mount;
2195
2196         /*
2197          * Roll through the "value", invalidating the attribute value's
2198          * blocks.
2199          */
2200         lblkno = args->rmtblkno;
2201         valuelen = args->rmtblkcnt;
2202         while (valuelen > 0) {
2203                 /*
2204                  * Try to remember where we decided to put the value.
2205                  */
2206                 XFS_BMAP_INIT(args->flist, args->firstblock);
2207                 nmap = 1;
2208                 error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
2209                                         args->rmtblkcnt,
2210                                         XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2211                                         args->firstblock, 0, &map, &nmap,
2212                                         args->flist, NULL);
2213                 if (error) {
2214                         return(error);
2215                 }
2216                 ASSERT(nmap == 1);
2217                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2218                        (map.br_startblock != HOLESTARTBLOCK));
2219
2220                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2221                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2222
2223                 /*
2224                  * If the "remote" value is in the cache, remove it.
2225                  */
2226                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt,
2227                                 XFS_INCORE_TRYLOCK);
2228                 if (bp) {
2229                         XFS_BUF_STALE(bp);
2230                         XFS_BUF_UNDELAYWRITE(bp);
2231                         xfs_buf_relse(bp);
2232                         bp = NULL;
2233                 }
2234
2235                 valuelen -= map.br_blockcount;
2236
2237                 lblkno += map.br_blockcount;
2238         }
2239
2240         /*
2241          * Keep de-allocating extents until the remote-value region is gone.
2242          */
2243         lblkno = args->rmtblkno;
2244         blkcnt = args->rmtblkcnt;
2245         done = 0;
2246         while (!done) {
2247                 XFS_BMAP_INIT(args->flist, args->firstblock);
2248                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2249                                     XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2250                                     1, args->firstblock, args->flist,
2251                                     NULL, &done);
2252                 if (!error) {
2253                         error = xfs_bmap_finish(&args->trans, args->flist,
2254                                                 &committed);
2255                 }
2256                 if (error) {
2257                         ASSERT(committed);
2258                         args->trans = NULL;
2259                         xfs_bmap_cancel(args->flist);
2260                         return(error);
2261                 }
2262
2263                 /*
2264                  * bmap_finish() may have committed the last trans and started
2265                  * a new one.  We need the inode to be in all transactions.
2266                  */
2267                 if (committed) {
2268                         xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
2269                         xfs_trans_ihold(args->trans, args->dp);
2270                 }
2271
2272                 /*
2273                  * Close out trans and start the next one in the chain.
2274                  */
2275                 if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
2276                         return (error);
2277         }
2278         return(0);
2279 }
2280
2281 #if defined(XFS_ATTR_TRACE)
2282 /*
2283  * Add a trace buffer entry for an attr_list context structure.
2284  */
2285 void
2286 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2287 {
2288         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
2289                 (__psunsigned_t)context->dp,
2290                 (__psunsigned_t)context->cursor->hashval,
2291                 (__psunsigned_t)context->cursor->blkno,
2292                 (__psunsigned_t)context->cursor->offset,
2293                 (__psunsigned_t)context->alist,
2294                 (__psunsigned_t)context->bufsize,
2295                 (__psunsigned_t)context->count,
2296                 (__psunsigned_t)context->firstu,
2297                 (__psunsigned_t)
2298                         ((context->count > 0) &&
2299                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2300                                 ? (ATTR_ENTRY(context->alist,
2301                                               context->count-1)->a_valuelen)
2302                                 : 0,
2303                 (__psunsigned_t)context->dupcnt,
2304                 (__psunsigned_t)context->flags,
2305                 (__psunsigned_t)NULL,
2306                 (__psunsigned_t)NULL,
2307                 (__psunsigned_t)NULL);
2308 }
2309
2310 /*
2311  * Add a trace buffer entry for a context structure and a Btree node.
2312  */
2313 void
2314 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2315                          struct xfs_da_intnode *node)
2316 {
2317         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
2318                 (__psunsigned_t)context->dp,
2319                 (__psunsigned_t)context->cursor->hashval,
2320                 (__psunsigned_t)context->cursor->blkno,
2321                 (__psunsigned_t)context->cursor->offset,
2322                 (__psunsigned_t)context->alist,
2323                 (__psunsigned_t)context->bufsize,
2324                 (__psunsigned_t)context->count,
2325                 (__psunsigned_t)context->firstu,
2326                 (__psunsigned_t)
2327                         ((context->count > 0) &&
2328                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2329                                 ? (ATTR_ENTRY(context->alist,
2330                                               context->count-1)->a_valuelen)
2331                                 : 0,
2332                 (__psunsigned_t)context->dupcnt,
2333                 (__psunsigned_t)context->flags,
2334                 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2335                 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2336                 (__psunsigned_t)be32_to_cpu(node->btree[
2337                                     be16_to_cpu(node->hdr.count)-1].hashval));
2338 }
2339
2340 /*
2341  * Add a trace buffer entry for a context structure and a Btree element.
2342  */
2343 void
2344 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2345                           struct xfs_da_node_entry *btree)
2346 {
2347         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
2348                 (__psunsigned_t)context->dp,
2349                 (__psunsigned_t)context->cursor->hashval,
2350                 (__psunsigned_t)context->cursor->blkno,
2351                 (__psunsigned_t)context->cursor->offset,
2352                 (__psunsigned_t)context->alist,
2353                 (__psunsigned_t)context->bufsize,
2354                 (__psunsigned_t)context->count,
2355                 (__psunsigned_t)context->firstu,
2356                 (__psunsigned_t)
2357                         ((context->count > 0) &&
2358                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2359                                 ? (ATTR_ENTRY(context->alist,
2360                                               context->count-1)->a_valuelen)
2361                                 : 0,
2362                 (__psunsigned_t)context->dupcnt,
2363                 (__psunsigned_t)context->flags,
2364                 (__psunsigned_t)be32_to_cpu(btree->hashval),
2365                 (__psunsigned_t)be32_to_cpu(btree->before),
2366                 (__psunsigned_t)NULL);
2367 }
2368
2369 /*
2370  * Add a trace buffer entry for a context structure and a leaf block.
2371  */
2372 void
2373 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2374                               struct xfs_attr_leafblock *leaf)
2375 {
2376         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
2377                 (__psunsigned_t)context->dp,
2378                 (__psunsigned_t)context->cursor->hashval,
2379                 (__psunsigned_t)context->cursor->blkno,
2380                 (__psunsigned_t)context->cursor->offset,
2381                 (__psunsigned_t)context->alist,
2382                 (__psunsigned_t)context->bufsize,
2383                 (__psunsigned_t)context->count,
2384                 (__psunsigned_t)context->firstu,
2385                 (__psunsigned_t)
2386                         ((context->count > 0) &&
2387                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2388                                 ? (ATTR_ENTRY(context->alist,
2389                                               context->count-1)->a_valuelen)
2390                                 : 0,
2391                 (__psunsigned_t)context->dupcnt,
2392                 (__psunsigned_t)context->flags,
2393                 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2394                 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2395                 (__psunsigned_t)be32_to_cpu(leaf->entries[
2396                                 be16_to_cpu(leaf->hdr.count)-1].hashval));
2397 }
2398
2399 /*
2400  * Add a trace buffer entry for the arguments given to the routine,
2401  * generic form.
2402  */
2403 void
2404 xfs_attr_trace_enter(int type, char *where,
2405                          __psunsigned_t a2, __psunsigned_t a3,
2406                          __psunsigned_t a4, __psunsigned_t a5,
2407                          __psunsigned_t a6, __psunsigned_t a7,
2408                          __psunsigned_t a8, __psunsigned_t a9,
2409                          __psunsigned_t a10, __psunsigned_t a11,
2410                          __psunsigned_t a12, __psunsigned_t a13,
2411                          __psunsigned_t a14, __psunsigned_t a15)
2412 {
2413         ASSERT(xfs_attr_trace_buf);
2414         ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2415                                          (void *)where,
2416                                          (void *)a2,  (void *)a3,  (void *)a4,
2417                                          (void *)a5,  (void *)a6,  (void *)a7,
2418                                          (void *)a8,  (void *)a9,  (void *)a10,
2419                                          (void *)a11, (void *)a12, (void *)a13,
2420                                          (void *)a14, (void *)a15);
2421 }
2422 #endif  /* XFS_ATTR_TRACE */
2423
2424
2425 /*========================================================================
2426  * System (pseudo) namespace attribute interface routines.
2427  *========================================================================*/
2428
2429 STATIC int
2430 posix_acl_access_set(
2431         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2432 {
2433         return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2434 }
2435
2436 STATIC int
2437 posix_acl_access_remove(
2438         bhv_vnode_t *vp, char *name, int xflags)
2439 {
2440         return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2441 }
2442
2443 STATIC int
2444 posix_acl_access_get(
2445         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2446 {
2447         return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2448 }
2449
2450 STATIC int
2451 posix_acl_access_exists(
2452         bhv_vnode_t *vp)
2453 {
2454         return xfs_acl_vhasacl_access(vp);
2455 }
2456
2457 STATIC int
2458 posix_acl_default_set(
2459         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2460 {
2461         return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2462 }
2463
2464 STATIC int
2465 posix_acl_default_get(
2466         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2467 {
2468         return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2469 }
2470
2471 STATIC int
2472 posix_acl_default_remove(
2473         bhv_vnode_t *vp, char *name, int xflags)
2474 {
2475         return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2476 }
2477
2478 STATIC int
2479 posix_acl_default_exists(
2480         bhv_vnode_t *vp)
2481 {
2482         return xfs_acl_vhasacl_default(vp);
2483 }
2484
2485 static struct attrnames posix_acl_access = {
2486         .attr_name      = "posix_acl_access",
2487         .attr_namelen   = sizeof("posix_acl_access") - 1,
2488         .attr_get       = posix_acl_access_get,
2489         .attr_set       = posix_acl_access_set,
2490         .attr_remove    = posix_acl_access_remove,
2491         .attr_exists    = posix_acl_access_exists,
2492 };
2493
2494 static struct attrnames posix_acl_default = {
2495         .attr_name      = "posix_acl_default",
2496         .attr_namelen   = sizeof("posix_acl_default") - 1,
2497         .attr_get       = posix_acl_default_get,
2498         .attr_set       = posix_acl_default_set,
2499         .attr_remove    = posix_acl_default_remove,
2500         .attr_exists    = posix_acl_default_exists,
2501 };
2502
2503 static struct attrnames *attr_system_names[] =
2504         { &posix_acl_access, &posix_acl_default };
2505
2506
2507 /*========================================================================
2508  * Namespace-prefix-style attribute name interface routines.
2509  *========================================================================*/
2510
2511 STATIC int
2512 attr_generic_set(
2513         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2514 {
2515         return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
2516 }
2517
2518 STATIC int
2519 attr_generic_get(
2520         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2521 {
2522         int     error, asize = size;
2523
2524         error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
2525         if (!error)
2526                 return asize;
2527         return -error;
2528 }
2529
2530 STATIC int
2531 attr_generic_remove(
2532         bhv_vnode_t *vp, char *name, int xflags)
2533 {
2534         return -bhv_vop_attr_remove(vp, name, xflags, NULL);
2535 }
2536
2537 STATIC int
2538 attr_generic_listadd(
2539         attrnames_t             *prefix,
2540         attrnames_t             *namesp,
2541         void                    *data,
2542         size_t                  size,
2543         ssize_t                 *result)
2544 {
2545         char                    *p = data + *result;
2546
2547         *result += prefix->attr_namelen;
2548         *result += namesp->attr_namelen + 1;
2549         if (!size)
2550                 return 0;
2551         if (*result > size)
2552                 return -ERANGE;
2553         strcpy(p, prefix->attr_name);
2554         p += prefix->attr_namelen;
2555         strcpy(p, namesp->attr_name);
2556         p += namesp->attr_namelen + 1;
2557         return 0;
2558 }
2559
2560 STATIC int
2561 attr_system_list(
2562         bhv_vnode_t             *vp,
2563         void                    *data,
2564         size_t                  size,
2565         ssize_t                 *result)
2566 {
2567         attrnames_t             *namesp;
2568         int                     i, error = 0;
2569
2570         for (i = 0; i < ATTR_SYSCOUNT; i++) {
2571                 namesp = attr_system_names[i];
2572                 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2573                         continue;
2574                 error = attr_generic_listadd(&attr_system, namesp,
2575                                                 data, size, result);
2576                 if (error)
2577                         break;
2578         }
2579         return error;
2580 }
2581
2582 int
2583 attr_generic_list(
2584         bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2585 {
2586         attrlist_cursor_kern_t  cursor = { 0 };
2587         int                     error;
2588
2589         error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
2590         if (error > 0)
2591                 return -error;
2592         *result = -error;
2593         return attr_system_list(vp, data, size, result);
2594 }
2595
2596 attrnames_t *
2597 attr_lookup_namespace(
2598         char                    *name,
2599         struct attrnames        **names,
2600         int                     nnames)
2601 {
2602         int                     i;
2603
2604         for (i = 0; i < nnames; i++)
2605                 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2606                         return names[i];
2607         return NULL;
2608 }
2609
2610 /*
2611  * Some checks to prevent people abusing EAs to get over quota:
2612  * - Don't allow modifying user EAs on devices/symlinks;
2613  * - Don't allow modifying user EAs if sticky bit set;
2614  */
2615 STATIC int
2616 attr_user_capable(
2617         bhv_vnode_t     *vp,
2618         cred_t          *cred)
2619 {
2620         struct inode    *inode = vn_to_inode(vp);
2621
2622         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2623                 return -EPERM;
2624         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2625             !capable(CAP_SYS_ADMIN))
2626                 return -EPERM;
2627         if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2628             (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2629                 return -EPERM;
2630         return 0;
2631 }
2632
2633 STATIC int
2634 attr_trusted_capable(
2635         bhv_vnode_t     *vp,
2636         cred_t          *cred)
2637 {
2638         struct inode    *inode = vn_to_inode(vp);
2639
2640         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2641                 return -EPERM;
2642         if (!capable(CAP_SYS_ADMIN))
2643                 return -EPERM;
2644         return 0;
2645 }
2646
2647 STATIC int
2648 attr_secure_capable(
2649         bhv_vnode_t     *vp,
2650         cred_t          *cred)
2651 {
2652         return -ENOSECURITY;
2653 }
2654
2655 STATIC int
2656 attr_system_set(
2657         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2658 {
2659         attrnames_t     *namesp;
2660         int             error;
2661
2662         if (xflags & ATTR_CREATE)
2663                 return -EINVAL;
2664
2665         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2666         if (!namesp)
2667                 return -EOPNOTSUPP;
2668         error = namesp->attr_set(vp, name, data, size, xflags);
2669         if (!error)
2670                 error = vn_revalidate(vp);
2671         return error;
2672 }
2673
2674 STATIC int
2675 attr_system_get(
2676         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2677 {
2678         attrnames_t     *namesp;
2679
2680         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2681         if (!namesp)
2682                 return -EOPNOTSUPP;
2683         return namesp->attr_get(vp, name, data, size, xflags);
2684 }
2685
2686 STATIC int
2687 attr_system_remove(
2688         bhv_vnode_t *vp, char *name, int xflags)
2689 {
2690         attrnames_t     *namesp;
2691
2692         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2693         if (!namesp)
2694                 return -EOPNOTSUPP;
2695         return namesp->attr_remove(vp, name, xflags);
2696 }
2697
2698 struct attrnames attr_system = {
2699         .attr_name      = "system.",
2700         .attr_namelen   = sizeof("system.") - 1,
2701         .attr_flag      = ATTR_SYSTEM,
2702         .attr_get       = attr_system_get,
2703         .attr_set       = attr_system_set,
2704         .attr_remove    = attr_system_remove,
2705         .attr_capable   = (attrcapable_t)fs_noerr,
2706 };
2707
2708 struct attrnames attr_trusted = {
2709         .attr_name      = "trusted.",
2710         .attr_namelen   = sizeof("trusted.") - 1,
2711         .attr_flag      = ATTR_ROOT,
2712         .attr_get       = attr_generic_get,
2713         .attr_set       = attr_generic_set,
2714         .attr_remove    = attr_generic_remove,
2715         .attr_capable   = attr_trusted_capable,
2716 };
2717
2718 struct attrnames attr_secure = {
2719         .attr_name      = "security.",
2720         .attr_namelen   = sizeof("security.") - 1,
2721         .attr_flag      = ATTR_SECURE,
2722         .attr_get       = attr_generic_get,
2723         .attr_set       = attr_generic_set,
2724         .attr_remove    = attr_generic_remove,
2725         .attr_capable   = attr_secure_capable,
2726 };
2727
2728 struct attrnames attr_user = {
2729         .attr_name      = "user.",
2730         .attr_namelen   = sizeof("user.") - 1,
2731         .attr_get       = attr_generic_get,
2732         .attr_set       = attr_generic_set,
2733         .attr_remove    = attr_generic_remove,
2734         .attr_capable   = attr_user_capable,
2735 };
2736
2737 struct attrnames *attr_namespaces[] =
2738         { &attr_system, &attr_trusted, &attr_secure, &attr_user };