]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - block/ll_rw_blk.c
block: implement drain buffers
[linux-2.6-omap-h63xx.git] / block / ll_rw_blk.c
index b01dee3ae7f36c1d29014bc91fee09466ed7b922..768987dc2697501921a606e22078b879a24f8fd3 100644 (file)
@@ -39,7 +39,7 @@
 
 static void blk_unplug_work(struct work_struct *work);
 static void blk_unplug_timeout(unsigned long data);
-static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
+static void drive_stat_acct(struct request *rq, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(struct request_queue *q, struct bio *bio);
 static struct io_context *current_io_context(gfp_t gfp_flags, int node);
@@ -725,6 +725,45 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+/**
+ * blk_queue_dma_drain - Set up a drain buffer for excess dma.
+ *
+ * @q:  the request queue for the device
+ * @buf:       physically contiguous buffer
+ * @size:      size of the buffer in bytes
+ *
+ * Some devices have excess DMA problems and can't simply discard (or
+ * zero fill) the unwanted piece of the transfer.  They have to have a
+ * real area of memory to transfer it into.  The use case for this is
+ * ATAPI devices in DMA mode.  If the packet command causes a transfer
+ * bigger than the transfer size some HBAs will lock up if there
+ * aren't DMA elements to contain the excess transfer.  What this API
+ * does is adjust the queue so that the buf is always appended
+ * silently to the scatterlist.
+ *
+ * Note: This routine adjusts max_hw_segments to make room for
+ * appending the drain buffer.  If you call
+ * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
+ * calling this routine, you must set the limit to one fewer than your
+ * device can support otherwise there won't be room for the drain
+ * buffer.
+ */
+int blk_queue_dma_drain(struct request_queue *q, void *buf,
+                               unsigned int size)
+{
+       if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+               return -EINVAL;
+       /* make room for appending the drain */
+       --q->max_hw_segments;
+       --q->max_phys_segments;
+       q->dma_drain_buffer = buf;
+       q->dma_drain_size = size;
+
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
+
 /**
  * blk_queue_segment_boundary - set boundary rules for segment merging
  * @q:  the request queue for the device
@@ -759,6 +798,30 @@ void blk_queue_dma_alignment(struct request_queue *q, int mask)
 
 EXPORT_SYMBOL(blk_queue_dma_alignment);
 
+/**
+ * blk_queue_update_dma_alignment - update dma length and memory alignment
+ * @q:     the request queue for the device
+ * @mask:  alignment mask
+ *
+ * description:
+ *    update required memory and length aligment for direct dma transactions.
+ *    If the requested alignment is larger than the current alignment, then
+ *    the current queue alignment is updated to the new value, otherwise it
+ *    is left alone.  The design of this is to allow multiple objects
+ *    (driver, device, transport etc) to set their respective
+ *    alignments without having them interfere.
+ *
+ **/
+void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
+{
+       BUG_ON(mask > PAGE_SIZE);
+
+       if (mask > q->dma_alignment)
+               q->dma_alignment = mask;
+}
+
+EXPORT_SYMBOL(blk_queue_update_dma_alignment);
+
 /**
  * blk_queue_find_tag - find a request by its tag and queue
  * @q:  The request queue for the device
@@ -791,7 +854,6 @@ static int __blk_free_tags(struct blk_queue_tag *bqt)
        retval = atomic_dec_and_test(&bqt->refcnt);
        if (retval) {
                BUG_ON(bqt->busy);
-               BUG_ON(!list_empty(&bqt->busy_list));
 
                kfree(bqt->tag_index);
                bqt->tag_index = NULL;
@@ -903,7 +965,6 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
        if (init_tag_map(q, tags, depth))
                goto fail;
 
-       INIT_LIST_HEAD(&tags->busy_list);
        tags->busy = 0;
        atomic_set(&tags->refcnt, 1);
        return tags;
@@ -954,6 +1015,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
         */
        q->queue_tags = tags;
        q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+       INIT_LIST_HEAD(&q->tag_busy_list);
        return 0;
 fail:
        kfree(tags);
@@ -1057,18 +1119,16 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 
        bqt->tag_index[tag] = NULL;
 
-       /*
-        * We use test_and_clear_bit's memory ordering properties here.
-        * The tag_map bit acts as a lock for tag_index[bit], so we need
-        * a barrer before clearing the bit (precisely: release semantics).
-        * Could use clear_bit_unlock when it is merged.
-        */
-       if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) {
+       if (unlikely(!test_bit(tag, bqt->tag_map))) {
                printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
                       __FUNCTION__, tag);
                return;
        }
-
+       /*
+        * The tag_map bit acts as a lock for tag_index[bit], so we need
+        * unlock memory barrier semantics.
+        */
+       clear_bit_unlock(tag, bqt->tag_map);
        bqt->busy--;
 }
 
@@ -1114,17 +1174,17 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
                if (tag >= bqt->max_depth)
                        return 1;
 
-       } while (test_and_set_bit(tag, bqt->tag_map));
+       } while (test_and_set_bit_lock(tag, bqt->tag_map));
        /*
-        * We rely on test_and_set_bit providing lock memory ordering semantics
-        * (could use test_and_set_bit_lock when it is merged).
+        * We need lock ordering semantics given by test_and_set_bit_lock.
+        * See blk_queue_end_tag for details.
         */
 
        rq->cmd_flags |= REQ_QUEUED;
        rq->tag = tag;
        bqt->tag_index[tag] = rq;
        blkdev_dequeue_request(rq);
-       list_add(&rq->queuelist, &bqt->busy_list);
+       list_add(&rq->queuelist, &q->tag_busy_list);
        bqt->busy++;
        return 0;
 }
@@ -1145,24 +1205,10 @@ EXPORT_SYMBOL(blk_queue_start_tag);
  **/
 void blk_queue_invalidate_tags(struct request_queue *q)
 {
-       struct blk_queue_tag *bqt = q->queue_tags;
        struct list_head *tmp, *n;
-       struct request *rq;
-
-       list_for_each_safe(tmp, n, &bqt->busy_list) {
-               rq = list_entry_rq(tmp);
 
-               if (rq->tag == -1) {
-                       printk(KERN_ERR
-                              "%s: bad tag found on list\n", __FUNCTION__);
-                       list_del_init(&rq->queuelist);
-                       rq->cmd_flags &= ~REQ_QUEUED;
-               } else
-                       blk_queue_end_tag(q, rq);
-
-               rq->cmd_flags &= ~REQ_STARTED;
-               __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
-       }
+       list_for_each_safe(tmp, n, &q->tag_busy_list)
+               blk_requeue_request(q, list_entry_rq(tmp));
 }
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
@@ -1372,8 +1418,18 @@ new_segment:
                bvprv = bvec;
        } /* segments in rq */
 
+       if (q->dma_drain_size) {
+               sg->page_link &= ~0x02;
+               sg = sg_next(sg);
+               sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+                           q->dma_drain_size,
+                           ((unsigned long)q->dma_drain_buffer) &
+                           (PAGE_SIZE - 1));
+               nsegs++;
+       }
+
        if (sg)
-               __sg_mark_end(sg);
+               sg_mark_end(sg);
 
        return nsegs;
 }
@@ -1638,15 +1694,7 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 {
        struct request_queue *q = bdi->unplug_io_data;
 
-       /*
-        * devices don't necessarily have an ->unplug_fn defined
-        */
-       if (q->unplug_fn) {
-               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-                                       q->rq.count[READ] + q->rq.count[WRITE]);
-
-               q->unplug_fn(q);
-       }
+       blk_unplug(q);
 }
 
 static void blk_unplug_work(struct work_struct *work)
@@ -1670,6 +1718,20 @@ static void blk_unplug_timeout(unsigned long data)
        kblockd_schedule_work(&q->unplug_work);
 }
 
+void blk_unplug(struct request_queue *q)
+{
+       /*
+        * devices don't necessarily have an ->unplug_fn defined
+        */
+       if (q->unplug_fn) {
+               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+                                       q->rq.count[READ] + q->rq.count[WRITE]);
+
+               q->unplug_fn(q);
+       }
+}
+EXPORT_SYMBOL(blk_unplug);
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -1738,6 +1800,7 @@ EXPORT_SYMBOL(blk_stop_queue);
 void blk_sync_queue(struct request_queue *q)
 {
        del_timer_sync(&q->unplug_timer);
+       kblockd_flush_work(&q->unplug_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -1872,9 +1935,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
        init_timer(&q->unplug_timer);
 
-       kobject_set_name(&q->kobj, "%s", "queue");
-       q->kobj.ktype = &queue_ktype;
-       kobject_init(&q->kobj);
+       kobject_init(&q->kobj, &queue_ktype);
 
        mutex_init(&q->sysfs_lock);
 
@@ -2341,7 +2402,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
        if (blk_rq_tagged(rq))
                blk_queue_end_tag(q, rq);
 
-       drive_stat_acct(rq, rq->nr_sectors, 1);
+       drive_stat_acct(rq, 1);
        __elv_add_request(q, rq, where, 0);
        blk_start_queueing(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2736,7 +2797,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
+static void drive_stat_acct(struct request *rq, int new_io)
 {
        int rw = rq_data_dir(rq);
 
@@ -2758,7 +2819,7 @@ static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
  */
 static inline void add_request(struct request_queue * q, struct request * req)
 {
-       drive_stat_acct(req, req->nr_sectors, 1);
+       drive_stat_acct(req, 1);
 
        /*
         * elevator indicated where it wants this request to be
@@ -3015,7 +3076,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                        req->biotail = bio;
                        req->nr_sectors = req->hard_nr_sectors += nr_sectors;
                        req->ioprio = ioprio_best(req->ioprio, prio);
-                       drive_stat_acct(req, nr_sectors, 0);
+                       drive_stat_acct(req, 0);
                        if (!attempt_back_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out;
@@ -3042,7 +3103,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                        req->sector = req->hard_sector = bio->bi_sector;
                        req->nr_sectors = req->hard_nr_sectors += nr_sectors;
                        req->ioprio = ioprio_best(req->ioprio, prio);
-                       drive_stat_acct(req, nr_sectors, 0);
+                       drive_stat_acct(req, 0);
                        if (!attempt_front_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out;
@@ -3224,6 +3285,7 @@ static inline void __generic_make_request(struct bio *bio)
        sector_t old_sector;
        int ret, nr_sectors = bio_sectors(bio);
        dev_t old_dev;
+       int err = -EIO;
 
        might_sleep();
 
@@ -3251,7 +3313,7 @@ static inline void __generic_make_request(struct bio *bio)
                                bdevname(bio->bi_bdev, b),
                                (long long) bio->bi_sector);
 end_io:
-                       bio_endio(bio, -EIO);
+                       bio_endio(bio, err);
                        break;
                }
 
@@ -3286,6 +3348,10 @@ end_io:
 
                if (bio_check_eod(bio, nr_sectors))
                        goto end_io;
+               if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+                       err = -EOPNOTSUPP;
+                       goto end_io;
+               }
 
                ret = q->make_request_fn(q, bio);
        } while (ret);
@@ -4085,23 +4151,7 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
        return queue_var_show(max_hw_sectors_kb, (page));
 }
 
-static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
-{
-       return queue_var_show(q->max_phys_segments, page);
-}
-
-static ssize_t queue_max_segments_store(struct request_queue *q,
-                                       const char *page, size_t count)
-{
-       unsigned long segments;
-       ssize_t ret = queue_var_store(&segments, page, count);
 
-       spin_lock_irq(q->queue_lock);
-       q->max_phys_segments = segments;
-       spin_unlock_irq(q->queue_lock);
-
-       return ret;
-}
 static struct queue_sysfs_entry queue_requests_entry = {
        .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
        .show = queue_requests_show,
@@ -4125,12 +4175,6 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
        .show = queue_max_hw_sectors_show,
 };
 
-static struct queue_sysfs_entry queue_max_segments_entry = {
-       .attr = {.name = "max_segments", .mode = S_IRUGO | S_IWUSR },
-       .show = queue_max_segments_show,
-       .store = queue_max_segments_store,
-};
-
 static struct queue_sysfs_entry queue_iosched_entry = {
        .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
        .show = elv_iosched_show,
@@ -4142,7 +4186,6 @@ static struct attribute *default_attrs[] = {
        &queue_ra_entry.attr,
        &queue_max_hw_sectors_entry.attr,
        &queue_max_sectors_entry.attr,
-       &queue_max_segments_entry.attr,
        &queue_iosched_entry.attr,
        NULL,
 };
@@ -4210,9 +4253,8 @@ int blk_register_queue(struct gendisk *disk)
        if (!q || !q->request_fn)
                return -ENXIO;
 
-       q->kobj.parent = kobject_get(&disk->kobj);
-
-       ret = kobject_add(&q->kobj);
+       ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
+                         "%s", "queue");
        if (ret < 0)
                return ret;
 
@@ -4237,6 +4279,6 @@ void blk_unregister_queue(struct gendisk *disk)
 
                kobject_uevent(&q->kobj, KOBJ_REMOVE);
                kobject_del(&q->kobj);
-               kobject_put(&disk->kobj);
+               kobject_put(&disk->dev.kobj);
        }
 }