block: implement drain buffers

[linux-2.6-omap-h63xx.git] / block / ll_rw_blk.c
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c

index b01dee3ae7f36c1d29014bc91fee09466ed7b922..768987dc2697501921a606e22078b879a24f8fd3 100644 (file)
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -39,7 +39,7 @@
  
  static void blk_unplug_work(struct work_struct *work);
  static void blk_unplug_timeout(unsigned long data);
-static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
+static void drive_stat_acct(struct request *rq, int new_io);
  static void init_request_from_bio(struct request *req, struct bio *bio);
  static int __make_request(struct request_queue *q, struct bio *bio);
  static struct io_context *current_io_context(gfp_t gfp_flags, int node);
@@ -725,6 +725,45 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
  
  EXPORT_SYMBOL(blk_queue_stack_limits);
  
+/**
+ * blk_queue_dma_drain - Set up a drain buffer for excess dma.
+ *
+ * @q:  the request queue for the device
+ * @buf:       physically contiguous buffer
+ * @size:      size of the buffer in bytes
+ *
+ * Some devices have excess DMA problems and can't simply discard (or
+ * zero fill) the unwanted piece of the transfer.  They have to have a
+ * real area of memory to transfer it into.  The use case for this is
+ * ATAPI devices in DMA mode.  If the packet command causes a transfer
+ * bigger than the transfer size some HBAs will lock up if there
+ * aren't DMA elements to contain the excess transfer.  What this API
+ * does is adjust the queue so that the buf is always appended
+ * silently to the scatterlist.
+ *
+ * Note: This routine adjusts max_hw_segments to make room for
+ * appending the drain buffer.  If you call
+ * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
+ * calling this routine, you must set the limit to one fewer than your
+ * device can support otherwise there won't be room for the drain
+ * buffer.
+ */
+int blk_queue_dma_drain(struct request_queue *q, void *buf,
+                               unsigned int size)
+{
+       if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+               return -EINVAL;
+       /* make room for appending the drain */
+       --q->max_hw_segments;
+       --q->max_phys_segments;
+       q->dma_drain_buffer = buf;
+       q->dma_drain_size = size;
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
+
  /**
   * blk_queue_segment_boundary - set boundary rules for segment merging
   * @q:  the request queue for the device
@@ -759,6 +798,30 @@ void blk_queue_dma_alignment(struct request_queue *q, int mask)
  
  EXPORT_SYMBOL(blk_queue_dma_alignment);
  
+/**
+ * blk_queue_update_dma_alignment - update dma length and memory alignment
+ * @q:     the request queue for the device
+ * @mask:  alignment mask
+ *
+ * description:
+ *    update required memory and length aligment for direct dma transactions.
+ *    If the requested alignment is larger than the current alignment, then
+ *    the current queue alignment is updated to the new value, otherwise it
+ *    is left alone.  The design of this is to allow multiple objects
+ *    (driver, device, transport etc) to set their respective
+ *    alignments without having them interfere.
+ *
+ **/
+void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
+{
+       BUG_ON(mask > PAGE_SIZE);
+
+       if (mask > q->dma_alignment)
+               q->dma_alignment = mask;
+}
+
+EXPORT_SYMBOL(blk_queue_update_dma_alignment);
+
  /**
   * blk_queue_find_tag - find a request by its tag and queue
   * @q:  The request queue for the device
@@ -791,7 +854,6 @@ static int __blk_free_tags(struct blk_queue_tag *bqt)
         retval = atomic_dec_and_test(&bqt->refcnt);
         if (retval) {
                 BUG_ON(bqt->busy);
-               BUG_ON(!list_empty(&bqt->busy_list));
  
                 kfree(bqt->tag_index);
                 bqt->tag_index = NULL;
@@ -903,7 +965,6 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
         if (init_tag_map(q, tags, depth))
                 goto fail;
  
-       INIT_LIST_HEAD(&tags->busy_list);
         tags->busy = 0;
         atomic_set(&tags->refcnt, 1);
         return tags;
@@ -954,6 +1015,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
          */
         q->queue_tags = tags;
         q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+       INIT_LIST_HEAD(&q->tag_busy_list);
         return 0;
  fail:
         kfree(tags);
@@ -1057,18 +1119,16 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
  
         bqt->tag_index[tag] = NULL;
  
-       /*
-        * We use test_and_clear_bit's memory ordering properties here.
-        * The tag_map bit acts as a lock for tag_index[bit], so we need
-        * a barrer before clearing the bit (precisely: release semantics).
-        * Could use clear_bit_unlock when it is merged.
-        */
-       if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) {
+       if (unlikely(!test_bit(tag, bqt->tag_map))) {
                 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
                        __FUNCTION__, tag);
                 return;
         }
-
+       /*
+        * The tag_map bit acts as a lock for tag_index[bit], so we need
+        * unlock memory barrier semantics.
+        */
+       clear_bit_unlock(tag, bqt->tag_map);
         bqt->busy--;
  }
  
@@ -1114,17 +1174,17 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
                 if (tag >= bqt->max_depth)
                         return 1;
  
-       } while (test_and_set_bit(tag, bqt->tag_map));
+       } while (test_and_set_bit_lock(tag, bqt->tag_map));
         /*
-        * We rely on test_and_set_bit providing lock memory ordering semantics
-        * (could use test_and_set_bit_lock when it is merged).
+        * We need lock ordering semantics given by test_and_set_bit_lock.
+        * See blk_queue_end_tag for details.
          */
  
         rq->cmd_flags |= REQ_QUEUED;
         rq->tag = tag;
         bqt->tag_index[tag] = rq;
         blkdev_dequeue_request(rq);
-       list_add(&rq->queuelist, &bqt->busy_list);
+       list_add(&rq->queuelist, &q->tag_busy_list);
         bqt->busy++;
         return 0;
  }
@@ -1145,24 +1205,10 @@ EXPORT_SYMBOL(blk_queue_start_tag);
   **/
  void blk_queue_invalidate_tags(struct request_queue *q)
  {
-       struct blk_queue_tag *bqt = q->queue_tags;
         struct list_head *tmp, *n;
-       struct request *rq;
-
-       list_for_each_safe(tmp, n, &bqt->busy_list) {
-               rq = list_entry_rq(tmp);
  
-               if (rq->tag == -1) {
-                       printk(KERN_ERR
-                              "%s: bad tag found on list\n", __FUNCTION__);
-                       list_del_init(&rq->queuelist);
-                       rq->cmd_flags &= ~REQ_QUEUED;
-               } else
-                       blk_queue_end_tag(q, rq);
-
-               rq->cmd_flags &= ~REQ_STARTED;
-               __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
-       }
+       list_for_each_safe(tmp, n, &q->tag_busy_list)
+               blk_requeue_request(q, list_entry_rq(tmp));
  }
  
  EXPORT_SYMBOL(blk_queue_invalidate_tags);
@@ -1372,8 +1418,18 @@ new_segment:
                 bvprv = bvec;
         } /* segments in rq */
  
+       if (q->dma_drain_size) {
+               sg->page_link &= ~0x02;
+               sg = sg_next(sg);
+               sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+                           q->dma_drain_size,
+                           ((unsigned long)q->dma_drain_buffer) &
+                           (PAGE_SIZE - 1));
+               nsegs++;
+       }
+
         if (sg)
-               __sg_mark_end(sg);
+               sg_mark_end(sg);
  
         return nsegs;
  }
@@ -1638,15 +1694,7 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
  {
         struct request_queue *q = bdi->unplug_io_data;
  
-       /*
-        * devices don't necessarily have an ->unplug_fn defined
-        */
-       if (q->unplug_fn) {
-               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-                                       q->rq.count[READ] + q->rq.count[WRITE]);
-
-               q->unplug_fn(q);
-       }
+       blk_unplug(q);
  }
  
  static void blk_unplug_work(struct work_struct *work)
@@ -1670,6 +1718,20 @@ static void blk_unplug_timeout(unsigned long data)
         kblockd_schedule_work(&q->unplug_work);
  }
  
+void blk_unplug(struct request_queue *q)
+{
+       /*
+        * devices don't necessarily have an ->unplug_fn defined
+        */
+       if (q->unplug_fn) {
+               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+                                       q->rq.count[READ] + q->rq.count[WRITE]);
+
+               q->unplug_fn(q);
+       }
+}
+EXPORT_SYMBOL(blk_unplug);
+
  /**
   * blk_start_queue - restart a previously stopped queue
   * @q:    The &struct request_queue in question
@@ -1738,6 +1800,7 @@ EXPORT_SYMBOL(blk_stop_queue);
  void blk_sync_queue(struct request_queue *q)
  {
         del_timer_sync(&q->unplug_timer);
+       kblockd_flush_work(&q->unplug_work);
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
@@ -1872,9 +1935,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  
         init_timer(&q->unplug_timer);
  
-       kobject_set_name(&q->kobj, "%s", "queue");
-       q->kobj.ktype = &queue_ktype;
-       kobject_init(&q->kobj);
+       kobject_init(&q->kobj, &queue_ktype);
  
         mutex_init(&q->sysfs_lock);
  
@@ -2341,7 +2402,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
         if (blk_rq_tagged(rq))
                 blk_queue_end_tag(q, rq);
  
-       drive_stat_acct(rq, rq->nr_sectors, 1);
+       drive_stat_acct(rq, 1);
         __elv_add_request(q, rq, where, 0);
         blk_start_queueing(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2736,7 +2797,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
  
  EXPORT_SYMBOL(blkdev_issue_flush);
  
-static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
+static void drive_stat_acct(struct request *rq, int new_io)
  {
         int rw = rq_data_dir(rq);
  
@@ -2758,7 +2819,7 @@ static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
   */
  static inline void add_request(struct request_queue * q, struct request * req)
  {
-       drive_stat_acct(req, req->nr_sectors, 1);
+       drive_stat_acct(req, 1);
  
         /*
          * elevator indicated where it wants this request to be
@@ -3015,7 +3076,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                         req->biotail = bio;
                         req->nr_sectors = req->hard_nr_sectors += nr_sectors;
                         req->ioprio = ioprio_best(req->ioprio, prio);
-                       drive_stat_acct(req, nr_sectors, 0);
+                       drive_stat_acct(req, 0);
                         if (!attempt_back_merge(q, req))
                                 elv_merged_request(q, req, el_ret);
                         goto out;
@@ -3042,7 +3103,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                         req->sector = req->hard_sector = bio->bi_sector;
                         req->nr_sectors = req->hard_nr_sectors += nr_sectors;
                         req->ioprio = ioprio_best(req->ioprio, prio);
-                       drive_stat_acct(req, nr_sectors, 0);
+                       drive_stat_acct(req, 0);
                         if (!attempt_front_merge(q, req))
                                 elv_merged_request(q, req, el_ret);
                         goto out;
@@ -3224,6 +3285,7 @@ static inline void __generic_make_request(struct bio *bio)
         sector_t old_sector;
         int ret, nr_sectors = bio_sectors(bio);
         dev_t old_dev;
+       int err = -EIO;
  
         might_sleep();
  
@@ -3251,7 +3313,7 @@ static inline void __generic_make_request(struct bio *bio)
                                 bdevname(bio->bi_bdev, b),
                                 (long long) bio->bi_sector);
  end_io:
-                       bio_endio(bio, -EIO);
+                       bio_endio(bio, err);
                         break;
                 }
  
@@ -3286,6 +3348,10 @@ end_io:
  
                 if (bio_check_eod(bio, nr_sectors))
                         goto end_io;
+               if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+                       err = -EOPNOTSUPP;
+                       goto end_io;
+               }
  
                 ret = q->make_request_fn(q, bio);
         } while (ret);
@@ -4085,23 +4151,7 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
         return queue_var_show(max_hw_sectors_kb, (page));
  }
  
-static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
-{
-       return queue_var_show(q->max_phys_segments, page);
-}
-
-static ssize_t queue_max_segments_store(struct request_queue *q,
-                                       const char *page, size_t count)
-{
-       unsigned long segments;
-       ssize_t ret = queue_var_store(&segments, page, count);
  
-       spin_lock_irq(q->queue_lock);
-       q->max_phys_segments = segments;
-       spin_unlock_irq(q->queue_lock);
-
-       return ret;
-}
  static struct queue_sysfs_entry queue_requests_entry = {
         .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
         .show = queue_requests_show,
@@ -4125,12 +4175,6 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
         .show = queue_max_hw_sectors_show,
  };
  
-static struct queue_sysfs_entry queue_max_segments_entry = {
-       .attr = {.name = "max_segments", .mode = S_IRUGO | S_IWUSR },
-       .show = queue_max_segments_show,
-       .store = queue_max_segments_store,
-};
-
  static struct queue_sysfs_entry queue_iosched_entry = {
         .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
         .show = elv_iosched_show,
@@ -4142,7 +4186,6 @@ static struct attribute *default_attrs[] = {
         &queue_ra_entry.attr,
         &queue_max_hw_sectors_entry.attr,
         &queue_max_sectors_entry.attr,
-       &queue_max_segments_entry.attr,
         &queue_iosched_entry.attr,
         NULL,
  };
@@ -4210,9 +4253,8 @@ int blk_register_queue(struct gendisk *disk)
         if (!q || !q->request_fn)
                 return -ENXIO;
  
-       q->kobj.parent = kobject_get(&disk->kobj);
-
-       ret = kobject_add(&q->kobj);
+       ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
+                         "%s", "queue");
         if (ret < 0)
                 return ret;
  
@@ -4237,6 +4279,6 @@ void blk_unregister_queue(struct gendisk *disk)
  
                 kobject_uevent(&q->kobj, KOBJ_REMOVE);
                 kobject_del(&q->kobj);
-               kobject_put(&disk->kobj);
+               kobject_put(&disk->dev.kobj);
         }
  }