[XFS] Radix tree based inode caching

author David Chinner <dgc@sgi.com>

Tue, 28 Aug 2007 04:00:13 +0000 (14:00 +1000)

committer Tim Shimmin <tes@chook.melbourne.sgi.com>

Mon, 15 Oct 2007 06:50:50 +0000 (16:50 +1000)
author David Chinner <dgc@sgi.com>
Tue, 28 Aug 2007 04:00:13 +0000 (14:00 +1000)
committer Tim Shimmin <tes@chook.melbourne.sgi.com>
Mon, 15 Oct 2007 06:50:50 +0000 (16:50 +1000)
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c

index e3a5fedac1bad6166b94e92bae3dbcb8ac10565b..f6e99fa7a683c84509316dbae93f28386f010897 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -17,10 +17,12 @@
   */
  #include "xfs.h"
  #include "xfs_types.h"
-#include "xfs_dmapi.h"
+#include "xfs_inum.h"
  #include "xfs_log.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
  #include "xfs_mount.h"
  #include "xfs_export.h"
  
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h

index 51c09c114a201934c19ff657edcee15a1fcef52b..9381b0360c4b3e2da63a460dc2bcd6db30657fb5 100644 (file)
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -197,6 +197,10 @@ typedef struct xfs_perag
  #endif
         xfs_perag_busy_t *pagb_list;    /* unstable blocks */
         atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
+
+       int             pag_ici_init;   /* incore inode cache initialised */
+       rwlock_t        pag_ici_lock;   /* incore inode lock */
+       struct radix_tree_root pag_ici_root;    /* incore inode cache root */
  } xfs_perag_t;
  
  #define        XFS_AG_MAXLEVELS(mp)            ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index b0667cb27d6652be3db6731edb646da03eb7584c..c8f2c2886fe4a018516708bc071eae85f42a1b57 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -23,6 +23,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
  #include "xfs_buf_item.h"
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h

index f89196cb08d256cf5a8d288630856e7826e4482c..d16c1b971074ba0cb95403b522f1efe083771070 100644 (file)
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -89,7 +89,6 @@ struct xfs_mount_args {
  #define XFSMNT_IDELETE         0x08000000      /* inode cluster delete */
  #define XFSMNT_SWALLOC         0x10000000      /* turn on stripe width
                                                  * allocation */
-#define XFSMNT_IHASHSIZE       0x20000000      /* inode hash table size */
  #define XFSMNT_DIRSYNC         0x40000000      /* sync creat,link,unlink,rename
                                                  * symlink,mkdir,rmdir,mknod */
  #define XFSMNT_FLAGS2          0x80000000      /* more flags set in flags2 */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c

index f6b919af7b828c468c0d7bdebac6d87fffde64b9..c171767e242ad3b3d19c8c93ea4651f0e30146ea 100644 (file)
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c

index 7ebe295bd6d3bf459836a72a0cca4b1b6fd7ef8b..d2452699e9b1aca46933da988548344694277b1b 100644 (file)
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c

index 91c61d9632c83236f402f451c6ac9ee1ad1a6fc3..eb18e399e836faf1385006c2aca180f0a808a299 100644 (file)
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c

index c67d735729051c40e91783d6f062e99384069b0a..182c70315ad175b20e1ec5192bec92d9604c5635 100644 (file)
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index 8c433163133734d5905741bf1226c128319e82ab..339f9d4a49cd61c807ebb078acfb92254bfec27d 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index 3b14427ee123238d04dbc62fd6c2aa61b0eec33c..f938a51be81b64fb358d12d9284c5479459150ae 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -23,6 +23,7 @@
  #include "xfs_trans.h"
  #include "xfs_buf_item.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
  #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c

index 114433a22baafcfba0f51c73d15eeab21f7afe29..e07dcc1b70a6b6f9bc88bbef7e988b8c969da234 100644 (file)
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -39,132 +39,14 @@
  #include "xfs_quota.h"
  #include "xfs_utils.h"
  
-/*
- * Initialize the inode hash table for the newly mounted file system.
- * Choose an initial table size based on user specified value, else
- * use a simple algorithm using the maximum number of inodes as an
- * indicator for table size, and clamp it between one and some large
- * number of pages.
- */
-void
-xfs_ihash_init(xfs_mount_t *mp)
-{
-       __uint64_t      icount;
-       uint            i;
-
-       if (!mp->m_ihsize) {
-               icount = mp->m_maxicount ? mp->m_maxicount :
-                        (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
-               mp->m_ihsize = 1 << max_t(uint, 8,
-                                       (xfs_highbit64(icount) + 1) / 2);
-               mp->m_ihsize = min_t(uint, mp->m_ihsize,
-                                       (64 * NBPP) / sizeof(xfs_ihash_t));
-       }
-
-       mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize,
-                                        NBPC * sizeof(xfs_ihash_t),
-                                        mp->m_ihsize * sizeof(xfs_ihash_t),
-                                        KM_SLEEP | KM_MAYFAIL | KM_LARGE);
-       mp->m_ihsize /= sizeof(xfs_ihash_t);
-       for (i = 0; i < mp->m_ihsize; i++)
-               rwlock_init(&(mp->m_ihash[i].ih_lock));
-}
-
-/*
- * Free up structures allocated by xfs_ihash_init, at unmount time.
- */
-void
-xfs_ihash_free(xfs_mount_t *mp)
-{
-       kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t));
-       mp->m_ihash = NULL;
-}
-
-/*
- * Initialize the inode cluster hash table for the newly mounted file system.
- * Its size is derived from the ihash table size.
- */
-void
-xfs_chash_init(xfs_mount_t *mp)
-{
-       uint    i;
-
-       mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
-                        (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
-       mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
-       mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
-                                                * sizeof(xfs_chash_t),
-                                                KM_SLEEP | KM_LARGE);
-       for (i = 0; i < mp->m_chsize; i++) {
-               spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
-       }
-}
-
-/*
- * Free up structures allocated by xfs_chash_init, at unmount time.
- */
-void
-xfs_chash_free(xfs_mount_t *mp)
-{
-       int     i;
-
-       for (i = 0; i < mp->m_chsize; i++) {
-               spinlock_destroy(&mp->m_chash[i].ch_lock);
-       }
-
-       kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
-       mp->m_chash = NULL;
-}
-
-/*
- * Try to move an inode to the front of its hash list if possible
- * (and if its not there already).  Called right after obtaining
- * the list version number and then dropping the read_lock on the
- * hash list in question (which is done right after looking up the
- * inode in question...).
- */
-STATIC void
-xfs_ihash_promote(
-       xfs_ihash_t     *ih,
-       xfs_inode_t     *ip,
-       ulong           version)
-{
-       xfs_inode_t     *iq;
-
-       if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
-               if (likely(version == ih->ih_version)) {
-                       /* remove from list */
-                       if ((iq = ip->i_next)) {
-                               iq->i_prevp = ip->i_prevp;
-                       }
-                       *ip->i_prevp = iq;
-
-                       /* insert at list head */
-                       iq = ih->ih_next;
-                       iq->i_prevp = &ip->i_next;
-                       ip->i_next = iq;
-                       ip->i_prevp = &ih->ih_next;
-                       ih->ih_next = ip;
-               }
-               write_unlock(&ih->ih_lock);
-       }
-}
-
  /*
   * Look up an inode by number in the given file system.
- * The inode is looked up in the hash table for the file system
- * represented by the mount point parameter mp.  Each bucket of
- * the hash table is guarded by an individual semaphore.
- *
- * If the inode is found in the hash table, its corresponding vnode
- * is obtained with a call to vn_get().  This call takes care of
- * coordination with the reclamation of the inode and vnode.  Note
- * that the vmap structure is filled in while holding the hash lock.
- * This gives us the state of the inode/vnode when we found it and
- * is used for coordination in vn_get().
+ * The inode is looked up in the cache held in each AG.
+ * If the inode is found in the cache, attach it to the provided
+ * vnode.
   *
- * If it is not in core, read it in from the file system's device and
- * add the inode into the hash table.
+ * If it is not in core, read it in from the file system's device,
+ * add it to the cache and attach the provided vnode.
   *
   * The inode is locked according to the value of the lock_flags parameter.
   * This flag parameter indicates how and if the inode's IO lock and inode lock
@@ -192,274 +74,241 @@ xfs_iget_core(
         xfs_inode_t     **ipp,
         xfs_daddr_t     bno)
  {
-       xfs_ihash_t     *ih;
         xfs_inode_t     *ip;
         xfs_inode_t     *iq;
         bhv_vnode_t     *inode_vp;
-       ulong           version;
         int             error;
-       /* REFERENCED */
-       xfs_chash_t     *ch;
-       xfs_chashlist_t *chl, *chlnew;
-       SPLDECL(s);
+       xfs_icluster_t  *icl, *new_icl = NULL;
+       unsigned long   first_index, mask;
+       xfs_perag_t     *pag;
+       xfs_agino_t     agino;
+
+       /* the radix tree exists only in inode capable AGs */
+       if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+               return EINVAL;
+
+       /* get the perag structure and ensure that it's inode capable */
+       pag = xfs_get_perag(mp, ino);
+       if (!pag->pagi_inodeok)
+               return EINVAL;
+       ASSERT(pag->pag_ici_init);
+       agino = XFS_INO_TO_AGINO(mp, ino);
  
+again:
+       read_lock(&pag->pag_ici_lock);
+       ip = radix_tree_lookup(&pag->pag_ici_root, agino);
  
-       ih = XFS_IHASH(mp, ino);
+       if (ip != NULL) {
+               /*
+                * If INEW is set this inode is being set up
+                * we need to pause and try again.
+                */
+               if (xfs_iflags_test(ip, XFS_INEW)) {
+                       read_unlock(&pag->pag_ici_lock);
+                       delay(1);
+                       XFS_STATS_INC(xs_ig_frecycle);
  
-again:
-       read_lock(&ih->ih_lock);
+                       goto again;
+               }
  
-       for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
-               if (ip->i_ino == ino) {
+               inode_vp = XFS_ITOV_NULL(ip);
+               if (inode_vp == NULL) {
                         /*
-                        * If INEW is set this inode is being set up
+                        * If IRECLAIM is set this inode is
+                        * on its way out of the system,
                          * we need to pause and try again.
                          */
-                       if (xfs_iflags_test(ip, XFS_INEW)) {
-                               read_unlock(&ih->ih_lock);
+                       if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                               read_unlock(&pag->pag_ici_lock);
                                 delay(1);
                                 XFS_STATS_INC(xs_ig_frecycle);
  
                                 goto again;
                         }
+                       ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
  
-                       inode_vp = XFS_ITOV_NULL(ip);
-                       if (inode_vp == NULL) {
-                               /*
-                                * If IRECLAIM is set this inode is
-                                * on its way out of the system,
-                                * we need to pause and try again.
-                                */
-                               if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-                                       read_unlock(&ih->ih_lock);
-                                       delay(1);
-                                       XFS_STATS_INC(xs_ig_frecycle);
-
-                                       goto again;
-                               }
-                               ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-
-                               /*
-                                * If lookup is racing with unlink, then we
-                                * should return an error immediately so we
-                                * don't remove it from the reclaim list and
-                                * potentially leak the inode.
-                                */
-                               if ((ip->i_d.di_mode == 0) &&
-                                   !(flags & XFS_IGET_CREATE)) {
-                                       read_unlock(&ih->ih_lock);
-                                       return ENOENT;
-                               }
-
-                               /*
-                                * There may be transactions sitting in the
-                                * incore log buffers or being flushed to disk
-                                * at this time.  We can't clear the
-                                * XFS_IRECLAIMABLE flag until these
-                                * transactions have hit the disk, otherwise we
-                                * will void the guarantee the flag provides
-                                * xfs_iunpin()
-                                */
-                               if (xfs_ipincount(ip)) {
-                                       read_unlock(&ih->ih_lock);
-                                       xfs_log_force(mp, 0,
-                                               XFS_LOG_FORCE|XFS_LOG_SYNC);
-                                       XFS_STATS_INC(xs_ig_frecycle);
-                                       goto again;
-                               }
-
-                               vn_trace_exit(vp, "xfs_iget.alloc",
-                                       (inst_t *)__return_address);
+                       /*
+                        * If lookup is racing with unlink, then we
+                        * should return an error immediately so we
+                        * don't remove it from the reclaim list and
+                        * potentially leak the inode.
+                        */
+                       if ((ip->i_d.di_mode == 0) &&
+                           !(flags & XFS_IGET_CREATE)) {
+                               read_unlock(&pag->pag_ici_lock);
+                               xfs_put_perag(mp, pag);
+                               return ENOENT;
+                       }
  
-                               XFS_STATS_INC(xs_ig_found);
+                       /*
+                        * There may be transactions sitting in the
+                        * incore log buffers or being flushed to disk
+                        * at this time.  We can't clear the
+                        * XFS_IRECLAIMABLE flag until these
+                        * transactions have hit the disk, otherwise we
+                        * will void the guarantee the flag provides
+                        * xfs_iunpin()
+                        */
+                       if (xfs_ipincount(ip)) {
+                               read_unlock(&pag->pag_ici_lock);
+                               xfs_log_force(mp, 0,
+                                       XFS_LOG_FORCE|XFS_LOG_SYNC);
+                               XFS_STATS_INC(xs_ig_frecycle);
+                               goto again;
+                       }
  
-                               xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
-                               version = ih->ih_version;
-                               read_unlock(&ih->ih_lock);
-                               xfs_ihash_promote(ih, ip, version);
+                       vn_trace_exit(vp, "xfs_iget.alloc",
+                               (inst_t *)__return_address);
  
-                               XFS_MOUNT_ILOCK(mp);
-                               list_del_init(&ip->i_reclaim);
-                               XFS_MOUNT_IUNLOCK(mp);
+                       XFS_STATS_INC(xs_ig_found);
  
-                               goto finish_inode;
+                       xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
+                       read_unlock(&pag->pag_ici_lock);
  
-                       } else if (vp != inode_vp) {
-                               struct inode *inode = vn_to_inode(inode_vp);
+                       XFS_MOUNT_ILOCK(mp);
+                       list_del_init(&ip->i_reclaim);
+                       XFS_MOUNT_IUNLOCK(mp);
  
-                               /* The inode is being torn down, pause and
-                                * try again.
-                                */
-                               if (inode->i_state & (I_FREEING | I_CLEAR)) {
-                                       read_unlock(&ih->ih_lock);
-                                       delay(1);
-                                       XFS_STATS_INC(xs_ig_frecycle);
+                       goto finish_inode;
  
-                                       goto again;
-                               }
-/* Chances are the other vnode (the one in the inode) is being torn
- * down right now, and we landed on top of it. Question is, what do
- * we do? Unhook the old inode and hook up the new one?
- */
-                               cmn_err(CE_PANIC,
-                       "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
-                                               inode_vp, vp);
-                       }
+               } else if (vp != inode_vp) {
+                       struct inode *inode = vn_to_inode(inode_vp);
  
-                       /*
-                        * Inode cache hit: if ip is not at the front of
-                        * its hash chain, move it there now.
-                        * Do this with the lock held for update, but
-                        * do statistics after releasing the lock.
+                       /* The inode is being torn down, pause and
+                        * try again.
                          */
-                       version = ih->ih_version;
-                       read_unlock(&ih->ih_lock);
-                       xfs_ihash_promote(ih, ip, version);
-                       XFS_STATS_INC(xs_ig_found);
+                       if (inode->i_state & (I_FREEING | I_CLEAR)) {
+                               read_unlock(&pag->pag_ici_lock);
+                               delay(1);
+                               XFS_STATS_INC(xs_ig_frecycle);
  
-finish_inode:
-                       if (ip->i_d.di_mode == 0) {
-                               if (!(flags & XFS_IGET_CREATE))
-                                       return ENOENT;
-                               xfs_iocore_inode_reinit(ip);
+                               goto again;
                         }
+/* Chances are the other vnode (the one in the inode) is being torn
+* down right now, and we landed on top of it. Question is, what do
+* we do? Unhook the old inode and hook up the new one?
+*/
+                       cmn_err(CE_PANIC,
+               "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
+                                       inode_vp, vp);
+               }
  
-                       if (lock_flags != 0)
-                               xfs_ilock(ip, lock_flags);
+               /*
+                * Inode cache hit
+                */
+               read_unlock(&pag->pag_ici_lock);
+               XFS_STATS_INC(xs_ig_found);
  
-                       xfs_iflags_clear(ip, XFS_ISTALE);
-                       vn_trace_exit(vp, "xfs_iget.found",
-                                               (inst_t *)__return_address);
-                       goto return_ip;
+finish_inode:
+               if (ip->i_d.di_mode == 0) {
+                       if (!(flags & XFS_IGET_CREATE)) {
+                               xfs_put_perag(mp, pag);
+                               return ENOENT;
+                       }
+                       xfs_iocore_inode_reinit(ip);
                 }
+
+               if (lock_flags != 0)
+                       xfs_ilock(ip, lock_flags);
+
+               xfs_iflags_clear(ip, XFS_ISTALE);
+               vn_trace_exit(vp, "xfs_iget.found",
+                                       (inst_t *)__return_address);
+               goto return_ip;
         }
  
         /*
-        * Inode cache miss: save the hash chain version stamp and unlock
-        * the chain, so we don't deadlock in vn_alloc.
+        * Inode cache miss
          */
+       read_unlock(&pag->pag_ici_lock);
         XFS_STATS_INC(xs_ig_missed);
  
-       version = ih->ih_version;
-
-       read_unlock(&ih->ih_lock);
-
         /*
          * Read the disk inode attributes into a new inode structure and get
          * a new vnode for it. This should also initialize i_ino and i_mount.
          */
         error = xfs_iread(mp, tp, ino, &ip, bno,
                           (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
-       if (error)
+       if (error) {
+               xfs_put_perag(mp, pag);
                 return error;
+       }
  
         vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
  
         xfs_inode_lock_init(ip, vp);
         xfs_iocore_inode_init(ip);
-
         if (lock_flags)
                 xfs_ilock(ip, lock_flags);
  
         if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
                 xfs_idestroy(ip);
+               xfs_put_perag(mp, pag);
                 return ENOENT;
         }
  
         /*
-        * Put ip on its hash chain, unless someone else hashed a duplicate
-        * after we released the hash lock.
+        * This is a bit messy - we preallocate everything we _might_
+        * need before we pick up the ici lock. That way we don't have to
+        * juggle locks and go all the way back to the start.
          */
-       write_lock(&ih->ih_lock);
+       new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
+       if (radix_tree_preload(GFP_KERNEL)) {
+               delay(1);
+               goto again;
+       }
+       mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+       first_index = agino & mask;
+       write_lock(&pag->pag_ici_lock);
  
-       if (ih->ih_version != version) {
-               for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) {
-                       if (iq->i_ino == ino) {
-                               write_unlock(&ih->ih_lock);
-                               xfs_idestroy(ip);
+       /*
+        * Find the cluster if it exists
+        */
+       icl = NULL;
+       if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
+                                                       first_index, 1)) {
+               if ((iq->i_ino & mask) == first_index)
+                       icl = iq->i_cluster;
+       }
  
-                               XFS_STATS_INC(xs_ig_dup);
-                               goto again;
-                       }
-               }
+       /*
+        * insert the new inode
+        */
+       error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
+       if (unlikely(error)) {
+               BUG_ON(error != -EEXIST);
+               write_unlock(&pag->pag_ici_lock);
+               radix_tree_preload_end();
+               xfs_idestroy(ip);
+               XFS_STATS_INC(xs_ig_dup);
+               goto again;
         }
  
         /*
          * These values _must_ be set before releasing ihlock!
          */
-       ip->i_hash = ih;
-       if ((iq = ih->ih_next)) {
-               iq->i_prevp = &ip->i_next;
-       }
-       ip->i_next = iq;
-       ip->i_prevp = &ih->ih_next;
-       ih->ih_next = ip;
         ip->i_udquot = ip->i_gdquot = NULL;
-       ih->ih_version++;
         xfs_iflags_set(ip, XFS_INEW);
-       write_unlock(&ih->ih_lock);
  
-       /*
-        * put ip on its cluster's hash chain
-        */
-       ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
-              ip->i_cnext == NULL);
-
-       chlnew = NULL;
-       ch = XFS_CHASH(mp, ip->i_blkno);
- chlredo:
-       s = mutex_spinlock(&ch->ch_lock);
-       for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
-               if (chl->chl_blkno == ip->i_blkno) {
-
-                       /* insert this inode into the doubly-linked list
-                        * where chl points */
-                       if ((iq = chl->chl_ip)) {
-                               ip->i_cprev = iq->i_cprev;
-                               iq->i_cprev->i_cnext = ip;
-                               iq->i_cprev = ip;
-                               ip->i_cnext = iq;
-                       } else {
-                               ip->i_cnext = ip;
-                               ip->i_cprev = ip;
-                       }
-                       chl->chl_ip = ip;
-                       ip->i_chash = chl;
-                       break;
-               }
-       }
+       ASSERT(ip->i_cluster == NULL);
  
-       /* no hash list found for this block; add a new hash list */
-       if (chl == NULL)  {
-               if (chlnew == NULL) {
-                       mutex_spinunlock(&ch->ch_lock, s);
-                       ASSERT(xfs_chashlist_zone != NULL);
-                       chlnew = (xfs_chashlist_t *)
-                                       kmem_zone_alloc(xfs_chashlist_zone,
-                                               KM_SLEEP);
-                       ASSERT(chlnew != NULL);
-                       goto chlredo;
-               } else {
-                       ip->i_cnext = ip;
-                       ip->i_cprev = ip;
-                       ip->i_chash = chlnew;
-                       chlnew->chl_ip = ip;
-                       chlnew->chl_blkno = ip->i_blkno;
-                       if (ch->ch_list)
-                               ch->ch_list->chl_prev = chlnew;
-                       chlnew->chl_next = ch->ch_list;
-                       chlnew->chl_prev = NULL;
-                       ch->ch_list = chlnew;
-                       chlnew = NULL;
-               }
+       if (!icl) {
+               spin_lock_init(&new_icl->icl_lock);
+               INIT_HLIST_HEAD(&new_icl->icl_inodes);
+               icl = new_icl;
+               new_icl = NULL;
         } else {
-               if (chlnew != NULL) {
-                       kmem_zone_free(xfs_chashlist_zone, chlnew);
-               }
+               ASSERT(!hlist_empty(&icl->icl_inodes));
         }
+       spin_lock(&icl->icl_lock);
+       hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
+       ip->i_cluster = icl;
+       spin_unlock(&icl->icl_lock);
  
-       mutex_spinunlock(&ch->ch_lock, s);
-
+       write_unlock(&pag->pag_ici_lock);
+       radix_tree_preload_end();
+       if (new_icl)
+               kmem_zone_free(xfs_icluster_zone, new_icl);
  
         /*
          * Link ip to its mount and thread it on the mount's inode list.
@@ -478,6 +327,7 @@ finish_inode:
         mp->m_inodes = ip;
  
         XFS_MOUNT_IUNLOCK(mp);
+       xfs_put_perag(mp, pag);
  
   return_ip:
         ASSERT(ip->i_df.if_ext_max ==
@@ -587,32 +437,19 @@ xfs_inode_incore(xfs_mount_t      *mp,
                  xfs_ino_t      ino,
                  xfs_trans_t    *tp)
  {
-       xfs_ihash_t     *ih;
         xfs_inode_t     *ip;
-       ulong           version;
-
-       ih = XFS_IHASH(mp, ino);
-       read_lock(&ih->ih_lock);
-       for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
-               if (ip->i_ino == ino) {
-                       /*
-                        * If we find it and tp matches, return it.
-                        * Also move it to the front of the hash list
-                        * if we find it and it is not already there.
-                        * Otherwise break from the loop and return
-                        * NULL.
-                        */
-                       if (ip->i_transp == tp) {
-                               version = ih->ih_version;
-                               read_unlock(&ih->ih_lock);
-                               xfs_ihash_promote(ih, ip, version);
-                               return (ip);
-                       }
-                       break;
-               }
-       }
-       read_unlock(&ih->ih_lock);
-       return (NULL);
+       xfs_perag_t     *pag;
+
+       pag = xfs_get_perag(mp, ino);
+       read_lock(&pag->pag_ici_lock);
+       ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
+       read_unlock(&pag->pag_ici_lock);
+       xfs_put_perag(mp, pag);
+
+       /* the returned inode must match the transaction */
+       if (ip && (ip->i_transp != tp))
+               return NULL;
+       return ip;
  }
  
  /*
@@ -718,58 +555,26 @@ void
  xfs_iextract(
         xfs_inode_t     *ip)
  {
-       xfs_ihash_t     *ih;
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
         xfs_inode_t     *iq;
-       xfs_mount_t     *mp;
-       xfs_chash_t     *ch;
-       xfs_chashlist_t *chl, *chm;
-       SPLDECL(s);
-
-       ih = ip->i_hash;
-       write_lock(&ih->ih_lock);
-       if ((iq = ip->i_next)) {
-               iq->i_prevp = ip->i_prevp;
-       }
-       *ip->i_prevp = iq;
-       ih->ih_version++;
-       write_unlock(&ih->ih_lock);
+
+       write_lock(&pag->pag_ici_lock);
+       radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
+       write_unlock(&pag->pag_ici_lock);
+       xfs_put_perag(mp, pag);
  
         /*
-        * Remove from cluster hash list
-        *   1) delete the chashlist if this is the last inode on the chashlist
-        *   2) unchain from list of inodes
-        *   3) point chashlist->chl_ip to 'chl_next' if to this inode.
+        * Remove from cluster list
          */
         mp = ip->i_mount;
-       ch = XFS_CHASH(mp, ip->i_blkno);
-       s = mutex_spinlock(&ch->ch_lock);
-
-       if (ip->i_cnext == ip) {
-               /* Last inode on chashlist */
-               ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
-               ASSERT(ip->i_chash != NULL);
-               chm=NULL;
-               chl = ip->i_chash;
-               if (chl->chl_prev)
-                       chl->chl_prev->chl_next = chl->chl_next;
-               else
-                       ch->ch_list = chl->chl_next;
-               if (chl->chl_next)
-                       chl->chl_next->chl_prev = chl->chl_prev;
-               kmem_zone_free(xfs_chashlist_zone, chl);
-       } else {
-               /* delete one inode from a non-empty list */
-               iq = ip->i_cnext;
-               iq->i_cprev = ip->i_cprev;
-               ip->i_cprev->i_cnext = iq;
-               if (ip->i_chash->chl_ip == ip) {
-                       ip->i_chash->chl_ip = iq;
-               }
-               ip->i_chash = __return_address;
-               ip->i_cprev = __return_address;
-               ip->i_cnext = __return_address;
-       }
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_lock(&ip->i_cluster->icl_lock);
+       hlist_del(&ip->i_cnode);
+       spin_unlock(&ip->i_cluster->icl_lock);
+
+       /* was last inode in cluster? */
+       if (hlist_empty(&ip->i_cluster->icl_inodes))
+               kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
  
         /*
          * Remove from mount's inode list.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 41a0c73b601a64c3b1ac88564324a3793adddf79..c1b917bd5951e421aa517beee3a2046afa17266b 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -52,7 +52,7 @@
  
  kmem_zone_t *xfs_ifork_zone;
  kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_chashlist_zone;
+kmem_zone_t *xfs_icluster_zone;
  
  /*
   * Used in xfs_itruncate().  This is the maximum number of extents
@@ -2182,10 +2182,10 @@ xfs_ifree_cluster(
         int                     i, j, found, pre_flushed;
         xfs_daddr_t             blkno;
         xfs_buf_t               *bp;
-       xfs_ihash_t             *ih;
         xfs_inode_t             *ip, **ip_found;
         xfs_inode_log_item_t    *iip;
         xfs_log_item_t          *lip;
+       xfs_perag_t             *pag = xfs_get_perag(mp, inum);
         SPLDECL(s);
  
         if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
@@ -2220,23 +2220,20 @@ xfs_ifree_cluster(
                  */
                 found = 0;
                 for (i = 0; i < ninodes; i++) {
-                       ih = XFS_IHASH(mp, inum + i);
-                       read_lock(&ih->ih_lock);
-                       for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
-                               if (ip->i_ino == inum + i)
-                                       break;
-                       }
+                       read_lock(&pag->pag_ici_lock);
+                       ip = radix_tree_lookup(&pag->pag_ici_root,
+                                       XFS_INO_TO_AGINO(mp, (inum + i)));
  
                         /* Inode not in memory or we found it already,
                          * nothing to do
                          */
                         if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
                         if (xfs_inode_clean(ip)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
@@ -2259,7 +2256,7 @@ xfs_ifree_cluster(
                                                 ip_found[found++] = ip;
                                         }
                                 }
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
@@ -2277,8 +2274,7 @@ xfs_ifree_cluster(
                                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                 }
                         }
-
-                       read_unlock(&ih->ih_lock);
+                       read_unlock(&pag->pag_ici_lock);
                 }
  
                 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 
@@ -2333,6 +2329,7 @@ xfs_ifree_cluster(
         }
  
         kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+       xfs_put_perag(mp, pag);
  }
  
  /*
@@ -3050,12 +3047,11 @@ xfs_iflush(
         xfs_mount_t             *mp;
         int                     error;
         /* REFERENCED */
-       xfs_chash_t             *ch;
         xfs_inode_t             *iq;
         int                     clcount;        /* count of inodes clustered */
         int                     bufwasdelwri;
+       struct hlist_node       *entry;
         enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
-       SPLDECL(s);
  
         XFS_STATS_INC(xs_iflush_count);
  
@@ -3169,14 +3165,14 @@ xfs_iflush(
          * inode clustering:
          * see if other inodes can be gathered into this write
          */
-
-       ip->i_chash->chl_buf = bp;
-
-       ch = XFS_CHASH(mp, ip->i_blkno);
-       s = mutex_spinlock(&ch->ch_lock);
+       spin_lock(&ip->i_cluster->icl_lock);
+       ip->i_cluster->icl_buf = bp;
  
         clcount = 0;
-       for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) {
+       hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
+               if (iq == ip)
+                       continue;
+
                 /*
                  * Do an un-protected check to see if the inode is dirty and
                  * is a candidate for flushing.  These checks will be repeated
@@ -3227,7 +3223,7 @@ xfs_iflush(
                         xfs_iunlock(iq, XFS_ILOCK_SHARED);
                 }
         }
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
  
         if (clcount) {
                 XFS_STATS_INC(xs_icluster_flushcnt);
@@ -3264,7 +3260,7 @@ cluster_corrupt_out:
         /* Corruption detected in the clustering loop.  Invalidate the
          * inode buffer and shut down the filesystem.
          */
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
  
         /*
          * Clean up the buffer.  If it was B_DELWRI, just release it --
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 873b9f783d29403444cceac2661a35a78ba639fb..b6dd23d9b3d63e69b7a50013a4354a16aa8084ea 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -172,41 +172,18 @@ typedef struct xfs_iocore {
  extern void xfs_iocore_inode_init(struct xfs_inode *);
  extern void xfs_iocore_inode_reinit(struct xfs_inode *);
  
-
-/*
- * This is the type used in the xfs inode hash table.
- * An array of these is allocated for each mounted
- * file system to hash the inodes for that file system.
- */
-typedef struct xfs_ihash {
-       struct xfs_inode        *ih_next;
-       rwlock_t                ih_lock;
-       uint                    ih_version;
-} xfs_ihash_t;
-
-#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
-
  /*
- * This is the xfs inode cluster hash.  This hash is used by xfs_iflush to
- * find inodes that share a cluster and can be flushed to disk at the same
- * time.
+ * This is the xfs inode cluster structure.  This structure is used by
+ * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
+ * the same time.
   */
-typedef struct xfs_chashlist {
-       struct xfs_chashlist    *chl_next;
-       struct xfs_chashlist    *chl_prev;
-       struct xfs_inode        *chl_ip;
-       xfs_daddr_t             chl_blkno;      /* starting block number of
+typedef struct xfs_icluster {
+       struct hlist_head       icl_inodes;     /* list of inodes on cluster */
+       xfs_daddr_t             icl_blkno;      /* starting block number of
                                                  * the cluster */
-       struct xfs_buf          *chl_buf;       /* the inode buffer */
-} xfs_chashlist_t;
-
-typedef struct xfs_chash {
-       xfs_chashlist_t         *ch_list;
-       lock_t                  ch_lock;
-} xfs_chash_t;
-
-#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
-
+       struct xfs_buf          *icl_buf;       /* the inode buffer */
+       lock_t                  icl_lock;       /* inode list lock */
+} xfs_icluster_t;
  
  /*
   * This is the xfs in-core inode structure.
@@ -269,21 +246,15 @@ typedef struct xfs_icdinode {
  } xfs_icdinode_t;
  
  typedef struct {
-       struct xfs_ihash        *ip_hash;       /* pointer to hash header */
-       struct xfs_inode        *ip_next;       /* inode hash link forw */
         struct xfs_inode        *ip_mnext;      /* next inode in mount list */
         struct xfs_inode        *ip_mprev;      /* ptr to prev inode */
-       struct xfs_inode        **ip_prevp;     /* ptr to prev i_next */
         struct xfs_mount        *ip_mount;      /* fs mount struct ptr */
  } xfs_iptr_t;
  
  typedef struct xfs_inode {
         /* Inode linking and identification information. */
-       struct xfs_ihash        *i_hash;        /* pointer to hash header */
-       struct xfs_inode        *i_next;        /* inode hash link forw */
         struct xfs_inode        *i_mnext;       /* next inode in mount list */
         struct xfs_inode        *i_mprev;       /* ptr to prev inode */
-       struct xfs_inode        **i_prevp;      /* ptr to prev i_next */
         struct xfs_mount        *i_mount;       /* fs mount struct ptr */
         struct list_head        i_reclaim;      /* reclaim list */
         struct bhv_desc         i_bhv_desc;     /* inode behavior descriptor*/
@@ -324,9 +295,8 @@ typedef struct xfs_inode {
         unsigned int            i_delayed_blks; /* count of delay alloc blks */
  
         xfs_icdinode_t          i_d;            /* most of ondisk inode */
-       xfs_chashlist_t         *i_chash;       /* cluster hash list header */
-       struct xfs_inode        *i_cnext;       /* cluster hash link forward */
-       struct xfs_inode        *i_cprev;       /* cluster hash link backward */
+       xfs_icluster_t          *i_cluster;     /* cluster list header */
+       struct hlist_node       i_cnode;        /* cluster link node */
  
         xfs_fsize_t             i_size;         /* in-memory size */
         /* Trace buffers per inode. */
@@ -521,8 +491,6 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
   */
  void           xfs_ihash_init(struct xfs_mount *);
  void           xfs_ihash_free(struct xfs_mount *);
-void           xfs_chash_init(struct xfs_mount *);
-void           xfs_chash_free(struct xfs_mount *);
  xfs_inode_t    *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
                                   struct xfs_trans *);
  void            xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
@@ -633,7 +601,7 @@ void                xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
  #define        xfs_inobp_check(mp, bp)
  #endif /* DEBUG */
  
-extern struct kmem_zone        *xfs_chashlist_zone;
+extern struct kmem_zone        *xfs_icluster_zone;
  extern struct kmem_zone        *xfs_ifork_zone;
  extern struct kmem_zone        *xfs_inode_zone;
  extern struct kmem_zone        *xfs_ili_zone;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index f4daf1ec9931fee40f94ff81d0664bf67f3af518..71f25947251d81479fe984f041522624e9940732 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -160,11 +160,6 @@ xfs_mount_free(
         xfs_mount_t     *mp,
         int             remove_bhv)
  {
-       if (mp->m_ihash)
-               xfs_ihash_free(mp);
-       if (mp->m_chash)
-               xfs_chash_free(mp);
-
         if (mp->m_perag) {
                 int     agno;
  
@@ -342,6 +337,17 @@ xfs_mount_validate_sb(
         return 0;
  }
  
+STATIC void
+xfs_initialize_perag_icache(
+       xfs_perag_t     *pag)
+{
+       if (!pag->pag_ici_init) {
+               rwlock_init(&pag->pag_ici_lock);
+               INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+               pag->pag_ici_init = 1;
+       }
+}
+
  xfs_agnumber_t
  xfs_initialize_perag(
         bhv_vfs_t       *vfs,
@@ -396,12 +402,14 @@ xfs_initialize_perag(
                         pag->pagi_inodeok = 1;
                         if (index < max_metadata)
                                 pag->pagf_metadata = 1;
+                       xfs_initialize_perag_icache(pag);
                 }
         } else {
                 /* Setup default behavior for smaller filesystems */
                 for (index = 0; index < agcount; index++) {
                         pag = &mp->m_perag[index];
                         pag->pagi_inodeok = 1;
+                       xfs_initialize_perag_icache(pag);
                 }
         }
         return index;
@@ -1032,13 +1040,6 @@ xfs_mountfs(
          */
         xfs_trans_init(mp);
  
-       /*
-        * Allocate and initialize the inode hash table for this
-        * file system.
-        */
-       xfs_ihash_init(mp);
-       xfs_chash_init(mp);
-
         /*
          * Allocate and initialize the per-ag data.
          */
@@ -1190,8 +1191,6 @@ xfs_mountfs(
   error3:
         xfs_log_unmount_dealloc(mp);
   error2:
-       xfs_ihash_free(mp);
-       xfs_chash_free(mp);
         for (agno = 0; agno < sbp->sb_agcount; agno++)
                 if (mp->m_perag[agno].pagb_list)
                         kmem_free(mp->m_perag[agno].pagb_list,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 9ceff40326d00a29c3ff1dd1aa07081bd677cce0..bc23cb407701328c2bf33666faba1eb52d8be3f3 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -57,10 +57,7 @@ struct log;
  struct bhv_vfs;
  struct bhv_vnode;
  struct xfs_mount_args;
-struct xfs_ihash;
-struct xfs_chash;
  struct xfs_inode;
-struct xfs_perag;
  struct xfs_iocore;
  struct xfs_bmbt_irec;
  struct xfs_bmap_free;
@@ -335,8 +332,6 @@ typedef struct xfs_mount {
         xfs_agnumber_t          m_agirotor;     /* last ag dir inode alloced */
         lock_t                  m_agirotor_lock;/* .. and lock protecting it */
         xfs_agnumber_t          m_maxagi;       /* highest inode alloc group */
-       size_t                  m_ihsize;       /* size of next field */
-       struct xfs_ihash        *m_ihash;       /* fs private inode hash table*/
         struct xfs_inode        *m_inodes;      /* active inode list */
         struct list_head        m_del_inodes;   /* inodes to reclaim */
         mutex_t                 m_ilock;        /* inode list mutex */
@@ -458,7 +453,7 @@ typedef struct xfs_mount {
  #define XFS_MOUNT_IDELETE      (1ULL << 18)    /* delete empty inode clusters*/
  #define XFS_MOUNT_SWALLOC      (1ULL << 19)    /* turn on stripe width
                                                  * allocation */
-#define XFS_MOUNT_IHASHSIZE    (1ULL << 20)    /* inode hash table size */
+                            /* (1ULL << 20)    -- currently unused */
  #define XFS_MOUNT_DIRSYNC      (1ULL << 21)    /* synchronous directory ops */
  #define XFS_MOUNT_COMPAT_IOSIZE        (1ULL << 22)    /* don't report large preferred
                                                  * I/O size in stat() */
@@ -571,6 +566,21 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
         return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
  }
  
+/*
+ * perag get/put wrappers for eventual ref counting
+ */
+static inline xfs_perag_t *
+xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino)
+{
+       return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
+}
+
+static inline void
+xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
+{
+       /* nothing to see here, move along */
+}
+
  /*
   * Per-cpu superblock locking functions
   */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c

index 53d9600af4a424bc2b18e0f38e96597fd8f57c4e..187318e8d2594ce63ef7e4855d2b726ed1eb8ca4 100644 (file)
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dir2.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index ceb4f6e999608ea25e3984e7d4e5100ed9a75c1b..5b2ff59f19cf9b04dfa38e8fe533d7acba5068c9 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
  #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c

index b290270dd4a69f941d18d98a16bf840ed57d25c4..27cce2a9c7e9108debaab50b96a92a2c0109cb39 100644 (file)
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -22,6 +22,7 @@
  #include "xfs_inum.h"
  #include "xfs_trans.h"
  #include "xfs_sb.h"
+#include "xfs_ag.h"
  #include "xfs_dmapi.h"
  #include "xfs_mount.h"
  #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c

index 4a27648b544628b11bd3fb26000097090604d622..1644be14a144c1e6a0b77a137dccd56f06588baf 100644 (file)
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -117,8 +117,8 @@ xfs_init(void)
         xfs_ili_zone =
                 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
                                         KM_ZONE_SPREAD, NULL);
-       xfs_chashlist_zone =
-               kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist",
+       xfs_icluster_zone =
+               kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
                                         KM_ZONE_SPREAD, NULL);
  
         /*
@@ -163,7 +163,7 @@ xfs_cleanup(void)
         extern kmem_zone_t      *xfs_efd_zone;
         extern kmem_zone_t      *xfs_efi_zone;
         extern kmem_zone_t      *xfs_buf_item_zone;
-       extern kmem_zone_t      *xfs_chashlist_zone;
+       extern kmem_zone_t      *xfs_icluster_zone;
  
         xfs_cleanup_procfs();
         xfs_sysctl_unregister();
@@ -199,7 +199,7 @@ xfs_cleanup(void)
         kmem_zone_destroy(xfs_efi_zone);
         kmem_zone_destroy(xfs_ifork_zone);
         kmem_zone_destroy(xfs_ili_zone);
-       kmem_zone_destroy(xfs_chashlist_zone);
+       kmem_zone_destroy(xfs_icluster_zone);
  }
  
  /*
@@ -246,7 +246,6 @@ xfs_start_flags(
                         ap->logbufsize);
                 return XFS_ERROR(EINVAL);
         }
-       mp->m_ihsize = ap->ihashsize;
         mp->m_logbsize = ap->logbufsize;
         mp->m_fsname_len = strlen(ap->fsname) + 1;
         mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
@@ -293,8 +292,6 @@ xfs_start_flags(
                 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
         }
  
-       if (ap->flags & XFSMNT_IHASHSIZE)
-               mp->m_flags |= XFS_MOUNT_IHASHSIZE;
         if (ap->flags & XFSMNT_IDELETE)
                 mp->m_flags |= XFS_MOUNT_IDELETE;
         if (ap->flags & XFSMNT_DIRSYNC)
@@ -1673,7 +1670,6 @@ xfs_vget(
  #define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
  #define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
  #define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_IHASHSIZE    "ihashsize"    /* size of inode hash table */
  #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
  #define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
                                          * unwritten extent conversion */
@@ -1799,15 +1795,6 @@ xfs_parseargs(
                         iosize = suffix_strtoul(value, &eov, 10);
                         args->flags |= XFSMNT_IOSIZE;
                         args->iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
-                       if (!value || !*value) {
-                               cmn_err(CE_WARN,
-                                       "XFS: %s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       args->flags |= XFSMNT_IHASHSIZE;
-                       args->ihashsize = simple_strtoul(value, &eov, 10);
                 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
                            !strcmp(this_char, MNTOPT_BSDGROUPS)) {
                         vfsp->vfs_flag |= VFS_GRPID;
@@ -1876,6 +1863,9 @@ xfs_parseargs(
                         args->flags &= ~XFSMNT_ATTR2;
                 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
                         args->flags2 |= XFSMNT2_FILESTREAMS;
+               } else if (!strcmp(this_char, "ihashsize")) {
+                       cmn_err(CE_WARN,
+       "XFS: ihashsize no longer used, option is deprecated.");
                 } else if (!strcmp(this_char, "osyncisdsync")) {
                         /* no-op, this is now the default */
                         cmn_err(CE_WARN,
@@ -1966,9 +1956,6 @@ xfs_showargs(
                         seq_puts(m, xfs_infop->str);
         }
  
-       if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
-               seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize);
-
         if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
                 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
                                 (int)(1 << mp->m_writeio_log) >> 10);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index bde4a1ad90f2fb7bbc333a324f1e87131c4cc3a6..15bc01b2d6a03e58f12f4abe5ffe3e496c6a75b1 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3876,7 +3876,7 @@ xfs_finish_reclaim(
         int             locked,
         int             sync_mode)
  {
-       xfs_ihash_t     *ih = ip->i_hash;
+       xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
         bhv_vnode_t     *vp = XFS_ITOV_NULL(ip);
         int             error;
  
@@ -3888,12 +3888,12 @@ xfs_finish_reclaim(
          * Once we have the XFS_IRECLAIM flag set it will not touch
          * us.
          */
-       write_lock(&ih->ih_lock);
+       write_lock(&pag->pag_ici_lock);
         spin_lock(&ip->i_flags_lock);
         if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
             (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
                 spin_unlock(&ip->i_flags_lock);
-               write_unlock(&ih->ih_lock);
+               write_unlock(&pag->pag_ici_lock);
                 if (locked) {
                         xfs_ifunlock(ip);
                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -3902,7 +3902,8 @@ xfs_finish_reclaim(
         }
         __xfs_iflags_set(ip, XFS_IRECLAIM);
         spin_unlock(&ip->i_flags_lock);
-       write_unlock(&ih->ih_lock);
+       write_unlock(&pag->pag_ici_lock);
+       xfs_put_perag(ip->i_mount, pag);
  
         /*
          * If the inode is still dirty, then flush it out.  If the inode
author	David Chinner <dgc@sgi.com>
	Tue, 28 Aug 2007 04:00:13 +0000 (14:00 +1000)
committer	Tim Shimmin <tes@chook.melbourne.sgi.com>
	Mon, 15 Oct 2007 06:50:50 +0000 (16:50 +1000)
fs/xfs/linux-2.6/xfs_export.c		patch \| blob \| history
fs/xfs/xfs_ag.h		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_clnt.h		patch \| blob \| history
fs/xfs/xfs_dir2_block.c		patch \| blob \| history
fs/xfs/xfs_dir2_data.c		patch \| blob \| history
fs/xfs/xfs_dir2_node.c		patch \| blob \| history
fs/xfs/xfs_dir2_sf.c		patch \| blob \| history
fs/xfs/xfs_error.c		patch \| blob \| history
fs/xfs/xfs_extfree_item.c		patch \| blob \| history
fs/xfs/xfs_iget.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_rename.c		patch \| blob \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| history
fs/xfs/xfs_trans_extfree.c		patch \| blob \| history
fs/xfs/xfs_vfsops.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.c		patch \| blob \| history