[XFS] Don't block pdflush when writing back inodes

author David Chinner <dgc@sgi.com>

Thu, 6 Mar 2008 02:43:42 +0000 (13:43 +1100)

committer Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>

Fri, 18 Apr 2008 01:37:32 +0000 (11:37 +1000)
author David Chinner <dgc@sgi.com>
Thu, 6 Mar 2008 02:43:42 +0000 (13:43 +1100)
committer Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>
Fri, 18 Apr 2008 01:37:32 +0000 (11:37 +1000)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 8831d95187904f0ecf4a1aa1ac5b77cc767aec77..cb9ce90d1deb7010d109c590999b1229b5139dcb 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
         struct inode            *inode,
         int                     sync)
  {
-       int                     error = 0, flags = FLUSH_INODE;
+       int                     error = 0;
+       int                     flags = 0;
  
         xfs_itrace_entry(XFS_I(inode));
         if (sync) {
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h

index b5ea418693b1b4f1d76ef2b2dd8e6bf149733150..f200e02440827d8e4beb925b4dd18ee81560abb9 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -73,12 +73,9 @@ typedef enum bhv_vrwlock {
  #define IO_INVIS       0x00020         /* don't update inode timestamps */
  
  /*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
   */
  #define FLUSH_SYNC             1       /* wait for flush to complete   */
-#define FLUSH_INODE            2       /* flush the inode itself       */
-#define FLUSH_LOG              4       /* force the last log entry for
-                                        * this inode out to disk       */
  
  /*
   * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 6f156faf9d46e3801e39b23bcab9d4b25e04dab2..3c3e9e3c1da81613e0b222650baab4797ddf10d9 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -145,11 +145,16 @@ xfs_imap_to_bp(
         xfs_buf_t       *bp;
  
         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
-                                  (int)imap->im_len, XFS_BUF_LOCK, &bp);
+                                  (int)imap->im_len, buf_flags, &bp);
         if (error) {
-               cmn_err(CE_WARN, "xfs_imap_to_bp: xfs_trans_read_buf()returned "
+               if (error != EAGAIN) {
+                       cmn_err(CE_WARN,
+                               "xfs_imap_to_bp: xfs_trans_read_buf()returned "
                                 "an error %d on %s.  Returning error.",
                                 error, mp->m_fsname);
+               } else {
+                       ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+               }
                 return error;
         }
  
@@ -274,7 +279,8 @@ xfs_itobp(
         xfs_dinode_t    **dipp,
         xfs_buf_t       **bpp,
         xfs_daddr_t     bno,
-       uint            imap_flags)
+       uint            imap_flags,
+       uint            buf_flags)
  {
         xfs_imap_t      imap;
         xfs_buf_t       *bp;
@@ -305,10 +311,17 @@ xfs_itobp(
         }
         ASSERT(bno == 0 || bno == imap.im_blkno);
  
-       error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
+       error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
         if (error)
                 return error;
  
+       if (!bp) {
+               ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+               ASSERT(tp == NULL);
+               *bpp = NULL;
+               return EAGAIN;
+       }
+
         *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
         *bpp = bp;
         return 0;
@@ -812,7 +825,7 @@ xfs_iread(
          * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
          * know that this is a new incore inode.
          */
-       error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
+       error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
         if (error) {
                 kmem_zone_free(xfs_inode_zone, ip);
                 return error;
@@ -1901,7 +1914,7 @@ xfs_iunlink(
                  * Here we put the head pointer into our next pointer,
                  * and then we fall through to point the head at us.
                  */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                 if (error)
                         return error;
  
@@ -2009,7 +2022,7 @@ xfs_iunlink_remove(
                  * of dealing with the buffer when there is no need to
                  * change it.
                  */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                 if (error) {
                         cmn_err(CE_WARN,
                                 "xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2071,7 +2084,7 @@ xfs_iunlink_remove(
                  * Now last_ibp points to the buffer previous to us on
                  * the unlinked list.  Pull us from the list.
                  */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
                 if (error) {
                         cmn_err(CE_WARN,
                                 "xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2334,7 +2347,7 @@ xfs_ifree(
  
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  
-       error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+       error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
         if (error)
                 return error;
  
@@ -2777,38 +2790,41 @@ xfs_iunpin(
  }
  
  /*
- * This is called to wait for the given inode to be unpinned.
- * It will sleep until this happens.  The caller must have the
- * inode locked in at least shared mode so that the buffer cannot
- * be subsequently pinned once someone is waiting for it to be
- * unpinned.
+ * This is called to unpin an inode. It can be directed to wait or to return
+ * immediately without waiting for the inode to be unpinned.  The caller must
+ * have the inode locked in at least shared mode so that the buffer cannot be
+ * subsequently pinned once someone is waiting for it to be unpinned.
   */
  STATIC void
-xfs_iunpin_wait(
-       xfs_inode_t     *ip)
+__xfs_iunpin_wait(
+       xfs_inode_t     *ip,
+       int             wait)
  {
-       xfs_inode_log_item_t    *iip;
-       xfs_lsn_t       lsn;
+       xfs_inode_log_item_t    *iip = ip->i_itemp;
  
         ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
-
-       if (atomic_read(&ip->i_pincount) == 0) {
+       if (atomic_read(&ip->i_pincount) == 0)
                 return;
-       }
  
-       iip = ip->i_itemp;
-       if (iip && iip->ili_last_lsn) {
-               lsn = iip->ili_last_lsn;
-       } else {
-               lsn = (xfs_lsn_t)0;
-       }
+       /* Give the log a push to start the unpinning I/O */
+       xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
+                               iip->ili_last_lsn : 0, XFS_LOG_FORCE);
+       if (wait)
+               wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+}
  
-       /*
-        * Give the log a push so we don't wait here too long.
-        */
-       xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
+static inline void
+xfs_iunpin_wait(
+       xfs_inode_t     *ip)
+{
+       __xfs_iunpin_wait(ip, 1);
+}
  
-       wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+static inline void
+xfs_iunpin_nowait(
+       xfs_inode_t     *ip)
+{
+       __xfs_iunpin_wait(ip, 0);
  }
  
  
@@ -3003,6 +3019,7 @@ xfs_iflush(
         int                     bufwasdelwri;
         struct hlist_node       *entry;
         enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
+       int                     noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
  
         XFS_STATS_INC(xs_iflush_count);
  
@@ -3027,11 +3044,21 @@ xfs_iflush(
         }
  
         /*
-        * We can't flush the inode until it is unpinned, so
-        * wait for it.  We know noone new can pin it, because
-        * we are holding the inode lock shared and you need
-        * to hold it exclusively to pin the inode.
+        * We can't flush the inode until it is unpinned, so wait for it if we
+        * are allowed to block.  We know noone new can pin it, because we are
+        * holding the inode lock shared and you need to hold it exclusively to
+        * pin the inode.
+        *
+        * If we are not allowed to block, force the log out asynchronously so
+        * that when we come back the inode will be unpinned. If other inodes
+        * in the same cluster are dirty, they will probably write the inode
+        * out for us if they occur after the log force completes.
          */
+       if (noblock && xfs_ipincount(ip)) {
+               xfs_iunpin_nowait(ip);
+               xfs_ifunlock(ip);
+               return EAGAIN;
+       }
         xfs_iunpin_wait(ip);
  
         /*
@@ -3047,15 +3074,6 @@ xfs_iflush(
                 return XFS_ERROR(EIO);
         }
  
-       /*
-        * Get the buffer containing the on-disk inode.
-        */
-       error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
-       if (error) {
-               xfs_ifunlock(ip);
-               return error;
-       }
-
         /*
          * Decide how buffer will be flushed out.  This is done before
          * the call to xfs_iflush_int because this field is zeroed by it.
@@ -3072,6 +3090,7 @@ xfs_iflush(
                 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
                         flags = 0;
                         break;
+               case XFS_IFLUSH_ASYNC_NOBLOCK:
                 case XFS_IFLUSH_ASYNC:
                 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
                         flags = INT_ASYNC;
@@ -3091,6 +3110,7 @@ xfs_iflush(
                 case XFS_IFLUSH_DELWRI:
                         flags = INT_DELWRI;
                         break;
+               case XFS_IFLUSH_ASYNC_NOBLOCK:
                 case XFS_IFLUSH_ASYNC:
                         flags = INT_ASYNC;
                         break;
@@ -3104,6 +3124,16 @@ xfs_iflush(
                 }
         }
  
+       /*
+        * Get the buffer containing the on-disk inode.
+        */
+       error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
+                               noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
+       if (error || !bp) {
+               xfs_ifunlock(ip);
+               return error;
+       }
+
         /*
          * First flush out the inode that xfs_iflush was called with.
          */
@@ -3112,6 +3142,13 @@ xfs_iflush(
                 goto corrupt_out;
         }
  
+       /*
+        * If the buffer is pinned then push on the log now so we won't
+        * get stuck waiting in the write for too long.
+        */
+       if (XFS_BUF_ISPINNED(bp))
+               xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+
         /*
          * inode clustering:
          * see if other inodes can be gathered into this write
@@ -3181,14 +3218,6 @@ xfs_iflush(
                 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
         }
  
-       /*
-        * If the buffer is pinned then push on the log so we won't
-        * get stuck waiting in the write for too long.
-        */
-       if (XFS_BUF_ISPINNED(bp)){
-               xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-       }
-
         if (flags & INT_DELWRI) {
                 xfs_bdwrite(mp, bp);
         } else if (flags & INT_ASYNC) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index eaa01895ff93ed42e59efa3ca0d238ba9511255f..c3bfffca9214e0fea9c86f9cafbe760fcccff399 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -457,6 +457,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
  #define        XFS_IFLUSH_SYNC                 3
  #define        XFS_IFLUSH_ASYNC                4
  #define        XFS_IFLUSH_DELWRI               5
+#define        XFS_IFLUSH_ASYNC_NOBLOCK        6
  
  /*
   * Flags for xfs_itruncate_start().
@@ -511,7 +512,7 @@ int         xfs_finish_reclaim_all(struct xfs_mount *, int);
   */
  int            xfs_itobp(struct xfs_mount *, struct xfs_trans *,
                           xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
-                         xfs_daddr_t, uint);
+                         xfs_daddr_t, uint, uint);
  int            xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
                           xfs_inode_t **, xfs_daddr_t, uint);
  int            xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index f615e04364f47238c09f22d67a7fa74fe3548ba0..45d8776408ef2a4f0829c25caf9130adf6e57169 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -614,7 +614,8 @@ xfs_bulkstat(
                                                         xfs_buf_relse(bp);
                                                 error = xfs_itobp(mp, NULL, ip,
                                                                 &dip, &bp, bno,
-                                                               XFS_IMAP_BULKSTAT);
+                                                               XFS_IMAP_BULKSTAT,
+                                                               XFS_BUF_LOCK);
                                                 if (!error)
                                                         clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
                                                 kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index b2b70eba282cbaa65e9c43354a0696364d4b8e13..cd24711ae276a1d4edf0b6c4b955302e7a496288 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3214,7 +3214,8 @@ xlog_recover_process_iunlinks(
                                          * next inode in the bucket.
                                          */
                                         error = xfs_itobp(mp, NULL, ip, &dip,
-                                                       &ibp, 0, 0);
+                                                       &ibp, 0, 0,
+                                                       XFS_BUF_LOCK);
                                         ASSERT(error || (dip != NULL));
                                 }
  
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 60b6b898022bcb1be98f96f2a21615e1bff5e131..4e5c010f50406940a5b4758e300530a6922bae63 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
         if (tp == NULL) {
                 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
                 if (!bp)
-                       return XFS_ERROR(ENOMEM);
+                       return (flags & XFS_BUF_TRYLOCK) ?
+                                       EAGAIN : XFS_ERROR(ENOMEM);
  
                 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
                         xfs_ioerror_alert("xfs_trans_read_buf", mp,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index 40b95e3a88ba143c0972231eb4c49f0d842c6b56..14140f6225ba513cc0c68a38def2ea37a47aaec5 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3468,29 +3468,6 @@ xfs_inode_flush(
             ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
                 return 0;
  
-       if (flags & FLUSH_LOG) {
-               if (iip && iip->ili_last_lsn) {
-                       xlog_t          *log = mp->m_log;
-                       xfs_lsn_t       sync_lsn;
-                       int             log_flags = XFS_LOG_FORCE;
-
-                       spin_lock(&log->l_grant_lock);
-                       sync_lsn = log->l_last_sync_lsn;
-                       spin_unlock(&log->l_grant_lock);
-
-                       if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-                               if (flags & FLUSH_SYNC)
-                                       log_flags |= XFS_LOG_SYNC;
-                               error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
-                               if (error)
-                                       return error;
-                       }
-
-                       if (ip->i_update_core == 0)
-                               return 0;
-               }
-       }
-
         /*
          * We make this non-blocking if the inode is contended,
          * return EAGAIN to indicate to the caller that they
@@ -3498,30 +3475,22 @@ xfs_inode_flush(
          * blocking on inodes inside another operation right
          * now, they get caught later by xfs_sync.
          */
-       if (flags & FLUSH_INODE) {
-               int     flush_flags;
-
-               if (flags & FLUSH_SYNC) {
-                       xfs_ilock(ip, XFS_ILOCK_SHARED);
-                       xfs_iflock(ip);
-               } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-                       if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-                               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                               return EAGAIN;
-                       }
-               } else {
+       if (flags & FLUSH_SYNC) {
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+               xfs_iflock(ip);
+       } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
                         return EAGAIN;
                 }
-
-               if (flags & FLUSH_SYNC)
-                       flush_flags = XFS_IFLUSH_SYNC;
-               else
-                       flush_flags = XFS_IFLUSH_ASYNC;
-
-               error = xfs_iflush(ip, flush_flags);
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       } else {
+               return EAGAIN;
         }
  
+       error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+                                                   : XFS_IFLUSH_ASYNC_NOBLOCK);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
         return error;
  }
author	David Chinner <dgc@sgi.com>
	Thu, 6 Mar 2008 02:43:42 +0000 (13:43 +1100)
committer	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>
	Fri, 18 Apr 2008 01:37:32 +0000 (11:37 +1000)
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_vnode.h		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_itable.c		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.c		patch \| blob \| history