]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/gpu/drm/i915/i915_gem.c
i915: use io-mapping interfaces instead of a variety of mapping kludges
[linux-2.6-omap-h63xx.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include <linux/swap.h>
33
34 static int
35 i915_gem_object_set_domain(struct drm_gem_object *obj,
36                             uint32_t read_domains,
37                             uint32_t write_domain);
38 static int
39 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
40                                  uint64_t offset,
41                                  uint64_t size,
42                                  uint32_t read_domains,
43                                  uint32_t write_domain);
44 static int
45 i915_gem_set_domain(struct drm_gem_object *obj,
46                     struct drm_file *file_priv,
47                     uint32_t read_domains,
48                     uint32_t write_domain);
49 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
50 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
52
53 static void
54 i915_gem_cleanup_ringbuffer(struct drm_device *dev);
55
56 int
57 i915_gem_init_ioctl(struct drm_device *dev, void *data,
58                     struct drm_file *file_priv)
59 {
60         drm_i915_private_t *dev_priv = dev->dev_private;
61         struct drm_i915_gem_init *args = data;
62
63         mutex_lock(&dev->struct_mutex);
64
65         if (args->gtt_start >= args->gtt_end ||
66             (args->gtt_start & (PAGE_SIZE - 1)) != 0 ||
67             (args->gtt_end & (PAGE_SIZE - 1)) != 0) {
68                 mutex_unlock(&dev->struct_mutex);
69                 return -EINVAL;
70         }
71
72         drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start,
73             args->gtt_end - args->gtt_start);
74
75         dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start);
76
77         mutex_unlock(&dev->struct_mutex);
78
79         return 0;
80 }
81
82
83 /**
84  * Creates a new mm object and returns a handle to it.
85  */
86 int
87 i915_gem_create_ioctl(struct drm_device *dev, void *data,
88                       struct drm_file *file_priv)
89 {
90         struct drm_i915_gem_create *args = data;
91         struct drm_gem_object *obj;
92         int handle, ret;
93
94         args->size = roundup(args->size, PAGE_SIZE);
95
96         /* Allocate the new object */
97         obj = drm_gem_object_alloc(dev, args->size);
98         if (obj == NULL)
99                 return -ENOMEM;
100
101         ret = drm_gem_handle_create(file_priv, obj, &handle);
102         mutex_lock(&dev->struct_mutex);
103         drm_gem_object_handle_unreference(obj);
104         mutex_unlock(&dev->struct_mutex);
105
106         if (ret)
107                 return ret;
108
109         args->handle = handle;
110
111         return 0;
112 }
113
114 /**
115  * Reads data from the object referenced by handle.
116  *
117  * On error, the contents of *data are undefined.
118  */
119 int
120 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
121                      struct drm_file *file_priv)
122 {
123         struct drm_i915_gem_pread *args = data;
124         struct drm_gem_object *obj;
125         struct drm_i915_gem_object *obj_priv;
126         ssize_t read;
127         loff_t offset;
128         int ret;
129
130         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
131         if (obj == NULL)
132                 return -EBADF;
133         obj_priv = obj->driver_private;
134
135         /* Bounds check source.
136          *
137          * XXX: This could use review for overflow issues...
138          */
139         if (args->offset > obj->size || args->size > obj->size ||
140             args->offset + args->size > obj->size) {
141                 drm_gem_object_unreference(obj);
142                 return -EINVAL;
143         }
144
145         mutex_lock(&dev->struct_mutex);
146
147         ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
148                                                I915_GEM_DOMAIN_CPU, 0);
149         if (ret != 0) {
150                 drm_gem_object_unreference(obj);
151                 mutex_unlock(&dev->struct_mutex);
152                 return ret;
153         }
154
155         offset = args->offset;
156
157         read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr,
158                         args->size, &offset);
159         if (read != args->size) {
160                 drm_gem_object_unreference(obj);
161                 mutex_unlock(&dev->struct_mutex);
162                 if (read < 0)
163                         return read;
164                 else
165                         return -EINVAL;
166         }
167
168         drm_gem_object_unreference(obj);
169         mutex_unlock(&dev->struct_mutex);
170
171         return 0;
172 }
173
174 /* This is the fast write path which cannot handle
175  * page faults in the source data
176  */
177
178 static inline int
179 fast_user_write(struct io_mapping *mapping,
180                 loff_t page_base, int page_offset,
181                 char __user *user_data,
182                 int length)
183 {
184         char *vaddr_atomic;
185         unsigned long unwritten;
186
187         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
188         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
189                                                       user_data, length);
190         io_mapping_unmap_atomic(vaddr_atomic);
191         if (unwritten)
192                 return -EFAULT;
193         return 0;
194 }
195
196 /* Here's the write path which can sleep for
197  * page faults
198  */
199
200 static inline int
201 slow_user_write(struct io_mapping *mapping,
202                 loff_t page_base, int page_offset,
203                 char __user *user_data,
204                 int length)
205 {
206         char __iomem *vaddr;
207         unsigned long unwritten;
208
209         vaddr = io_mapping_map_wc(mapping, page_base);
210         if (vaddr == NULL)
211                 return -EFAULT;
212         unwritten = __copy_from_user(vaddr + page_offset,
213                                      user_data, length);
214         io_mapping_unmap(vaddr);
215         if (unwritten)
216                 return -EFAULT;
217         return 0;
218 }
219
220 static int
221 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
222                     struct drm_i915_gem_pwrite *args,
223                     struct drm_file *file_priv)
224 {
225         struct drm_i915_gem_object *obj_priv = obj->driver_private;
226         drm_i915_private_t *dev_priv = dev->dev_private;
227         ssize_t remain;
228         loff_t offset, page_base;
229         char __user *user_data;
230         int page_offset, page_length;
231         int ret;
232
233         user_data = (char __user *) (uintptr_t) args->data_ptr;
234         remain = args->size;
235         if (!access_ok(VERIFY_READ, user_data, remain))
236                 return -EFAULT;
237
238
239         mutex_lock(&dev->struct_mutex);
240         ret = i915_gem_object_pin(obj, 0);
241         if (ret) {
242                 mutex_unlock(&dev->struct_mutex);
243                 return ret;
244         }
245         ret = i915_gem_set_domain(obj, file_priv,
246                                   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
247         if (ret)
248                 goto fail;
249
250         obj_priv = obj->driver_private;
251         offset = obj_priv->gtt_offset + args->offset;
252         obj_priv->dirty = 1;
253
254         while (remain > 0) {
255                 /* Operation in this page
256                  *
257                  * page_base = page offset within aperture
258                  * page_offset = offset within page
259                  * page_length = bytes to copy for this page
260                  */
261                 page_base = (offset & ~(PAGE_SIZE-1));
262                 page_offset = offset & (PAGE_SIZE-1);
263                 page_length = remain;
264                 if ((page_offset + remain) > PAGE_SIZE)
265                         page_length = PAGE_SIZE - page_offset;
266
267                 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
268                                        page_offset, user_data, page_length);
269
270                 /* If we get a fault while copying data, then (presumably) our
271                  * source page isn't available. In this case, use the
272                  * non-atomic function
273                  */
274                 if (ret) {
275                         ret = slow_user_write (dev_priv->mm.gtt_mapping,
276                                                page_base, page_offset,
277                                                user_data, page_length);
278                         if (ret)
279                                 goto fail;
280                 }
281
282                 remain -= page_length;
283                 user_data += page_length;
284                 offset += page_length;
285         }
286
287 fail:
288         i915_gem_object_unpin(obj);
289         mutex_unlock(&dev->struct_mutex);
290
291         return ret;
292 }
293
294 static int
295 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
296                       struct drm_i915_gem_pwrite *args,
297                       struct drm_file *file_priv)
298 {
299         int ret;
300         loff_t offset;
301         ssize_t written;
302
303         mutex_lock(&dev->struct_mutex);
304
305         ret = i915_gem_set_domain(obj, file_priv,
306                                   I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
307         if (ret) {
308                 mutex_unlock(&dev->struct_mutex);
309                 return ret;
310         }
311
312         offset = args->offset;
313
314         written = vfs_write(obj->filp,
315                             (char __user *)(uintptr_t) args->data_ptr,
316                             args->size, &offset);
317         if (written != args->size) {
318                 mutex_unlock(&dev->struct_mutex);
319                 if (written < 0)
320                         return written;
321                 else
322                         return -EINVAL;
323         }
324
325         mutex_unlock(&dev->struct_mutex);
326
327         return 0;
328 }
329
330 /**
331  * Writes data to the object referenced by handle.
332  *
333  * On error, the contents of the buffer that were to be modified are undefined.
334  */
335 int
336 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
337                       struct drm_file *file_priv)
338 {
339         struct drm_i915_gem_pwrite *args = data;
340         struct drm_gem_object *obj;
341         struct drm_i915_gem_object *obj_priv;
342         int ret = 0;
343
344         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
345         if (obj == NULL)
346                 return -EBADF;
347         obj_priv = obj->driver_private;
348
349         /* Bounds check destination.
350          *
351          * XXX: This could use review for overflow issues...
352          */
353         if (args->offset > obj->size || args->size > obj->size ||
354             args->offset + args->size > obj->size) {
355                 drm_gem_object_unreference(obj);
356                 return -EINVAL;
357         }
358
359         /* We can only do the GTT pwrite on untiled buffers, as otherwise
360          * it would end up going through the fenced access, and we'll get
361          * different detiling behavior between reading and writing.
362          * pread/pwrite currently are reading and writing from the CPU
363          * perspective, requiring manual detiling by the client.
364          */
365         if (obj_priv->tiling_mode == I915_TILING_NONE &&
366             dev->gtt_total != 0)
367                 ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv);
368         else
369                 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv);
370
371 #if WATCH_PWRITE
372         if (ret)
373                 DRM_INFO("pwrite failed %d\n", ret);
374 #endif
375
376         drm_gem_object_unreference(obj);
377
378         return ret;
379 }
380
381 /**
382  * Called when user space prepares to use an object
383  */
384 int
385 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
386                           struct drm_file *file_priv)
387 {
388         struct drm_i915_gem_set_domain *args = data;
389         struct drm_gem_object *obj;
390         int ret;
391
392         if (!(dev->driver->driver_features & DRIVER_GEM))
393                 return -ENODEV;
394
395         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
396         if (obj == NULL)
397                 return -EBADF;
398
399         mutex_lock(&dev->struct_mutex);
400 #if WATCH_BUF
401         DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n",
402                  obj, obj->size, args->read_domains, args->write_domain);
403 #endif
404         ret = i915_gem_set_domain(obj, file_priv,
405                                   args->read_domains, args->write_domain);
406         drm_gem_object_unreference(obj);
407         mutex_unlock(&dev->struct_mutex);
408         return ret;
409 }
410
411 /**
412  * Called when user space has done writes to this buffer
413  */
414 int
415 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
416                       struct drm_file *file_priv)
417 {
418         struct drm_i915_gem_sw_finish *args = data;
419         struct drm_gem_object *obj;
420         struct drm_i915_gem_object *obj_priv;
421         int ret = 0;
422
423         if (!(dev->driver->driver_features & DRIVER_GEM))
424                 return -ENODEV;
425
426         mutex_lock(&dev->struct_mutex);
427         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
428         if (obj == NULL) {
429                 mutex_unlock(&dev->struct_mutex);
430                 return -EBADF;
431         }
432
433 #if WATCH_BUF
434         DRM_INFO("%s: sw_finish %d (%p %d)\n",
435                  __func__, args->handle, obj, obj->size);
436 #endif
437         obj_priv = obj->driver_private;
438
439         /* Pinned buffers may be scanout, so flush the cache */
440         if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) {
441                 i915_gem_clflush_object(obj);
442                 drm_agp_chipset_flush(dev);
443         }
444         drm_gem_object_unreference(obj);
445         mutex_unlock(&dev->struct_mutex);
446         return ret;
447 }
448
449 /**
450  * Maps the contents of an object, returning the address it is mapped
451  * into.
452  *
453  * While the mapping holds a reference on the contents of the object, it doesn't
454  * imply a ref on the object itself.
455  */
456 int
457 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
458                    struct drm_file *file_priv)
459 {
460         struct drm_i915_gem_mmap *args = data;
461         struct drm_gem_object *obj;
462         loff_t offset;
463         unsigned long addr;
464
465         if (!(dev->driver->driver_features & DRIVER_GEM))
466                 return -ENODEV;
467
468         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
469         if (obj == NULL)
470                 return -EBADF;
471
472         offset = args->offset;
473
474         down_write(&current->mm->mmap_sem);
475         addr = do_mmap(obj->filp, 0, args->size,
476                        PROT_READ | PROT_WRITE, MAP_SHARED,
477                        args->offset);
478         up_write(&current->mm->mmap_sem);
479         mutex_lock(&dev->struct_mutex);
480         drm_gem_object_unreference(obj);
481         mutex_unlock(&dev->struct_mutex);
482         if (IS_ERR((void *)addr))
483                 return addr;
484
485         args->addr_ptr = (uint64_t) addr;
486
487         return 0;
488 }
489
490 static void
491 i915_gem_object_free_page_list(struct drm_gem_object *obj)
492 {
493         struct drm_i915_gem_object *obj_priv = obj->driver_private;
494         int page_count = obj->size / PAGE_SIZE;
495         int i;
496
497         if (obj_priv->page_list == NULL)
498                 return;
499
500
501         for (i = 0; i < page_count; i++)
502                 if (obj_priv->page_list[i] != NULL) {
503                         if (obj_priv->dirty)
504                                 set_page_dirty(obj_priv->page_list[i]);
505                         mark_page_accessed(obj_priv->page_list[i]);
506                         page_cache_release(obj_priv->page_list[i]);
507                 }
508         obj_priv->dirty = 0;
509
510         drm_free(obj_priv->page_list,
511                  page_count * sizeof(struct page *),
512                  DRM_MEM_DRIVER);
513         obj_priv->page_list = NULL;
514 }
515
516 static void
517 i915_gem_object_move_to_active(struct drm_gem_object *obj)
518 {
519         struct drm_device *dev = obj->dev;
520         drm_i915_private_t *dev_priv = dev->dev_private;
521         struct drm_i915_gem_object *obj_priv = obj->driver_private;
522
523         /* Add a reference if we're newly entering the active list. */
524         if (!obj_priv->active) {
525                 drm_gem_object_reference(obj);
526                 obj_priv->active = 1;
527         }
528         /* Move from whatever list we were on to the tail of execution. */
529         list_move_tail(&obj_priv->list,
530                        &dev_priv->mm.active_list);
531 }
532
533
534 static void
535 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
536 {
537         struct drm_device *dev = obj->dev;
538         drm_i915_private_t *dev_priv = dev->dev_private;
539         struct drm_i915_gem_object *obj_priv = obj->driver_private;
540
541         i915_verify_inactive(dev, __FILE__, __LINE__);
542         if (obj_priv->pin_count != 0)
543                 list_del_init(&obj_priv->list);
544         else
545                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
546
547         if (obj_priv->active) {
548                 obj_priv->active = 0;
549                 drm_gem_object_unreference(obj);
550         }
551         i915_verify_inactive(dev, __FILE__, __LINE__);
552 }
553
554 /**
555  * Creates a new sequence number, emitting a write of it to the status page
556  * plus an interrupt, which will trigger i915_user_interrupt_handler.
557  *
558  * Must be called with struct_lock held.
559  *
560  * Returned sequence numbers are nonzero on success.
561  */
562 static uint32_t
563 i915_add_request(struct drm_device *dev, uint32_t flush_domains)
564 {
565         drm_i915_private_t *dev_priv = dev->dev_private;
566         struct drm_i915_gem_request *request;
567         uint32_t seqno;
568         int was_empty;
569         RING_LOCALS;
570
571         request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
572         if (request == NULL)
573                 return 0;
574
575         /* Grab the seqno we're going to make this request be, and bump the
576          * next (skipping 0 so it can be the reserved no-seqno value).
577          */
578         seqno = dev_priv->mm.next_gem_seqno;
579         dev_priv->mm.next_gem_seqno++;
580         if (dev_priv->mm.next_gem_seqno == 0)
581                 dev_priv->mm.next_gem_seqno++;
582
583         BEGIN_LP_RING(4);
584         OUT_RING(MI_STORE_DWORD_INDEX);
585         OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
586         OUT_RING(seqno);
587
588         OUT_RING(MI_USER_INTERRUPT);
589         ADVANCE_LP_RING();
590
591         DRM_DEBUG("%d\n", seqno);
592
593         request->seqno = seqno;
594         request->emitted_jiffies = jiffies;
595         request->flush_domains = flush_domains;
596         was_empty = list_empty(&dev_priv->mm.request_list);
597         list_add_tail(&request->list, &dev_priv->mm.request_list);
598
599         if (was_empty && !dev_priv->mm.suspended)
600                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
601         return seqno;
602 }
603
604 /**
605  * Command execution barrier
606  *
607  * Ensures that all commands in the ring are finished
608  * before signalling the CPU
609  */
610 static uint32_t
611 i915_retire_commands(struct drm_device *dev)
612 {
613         drm_i915_private_t *dev_priv = dev->dev_private;
614         uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
615         uint32_t flush_domains = 0;
616         RING_LOCALS;
617
618         /* The sampler always gets flushed on i965 (sigh) */
619         if (IS_I965G(dev))
620                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
621         BEGIN_LP_RING(2);
622         OUT_RING(cmd);
623         OUT_RING(0); /* noop */
624         ADVANCE_LP_RING();
625         return flush_domains;
626 }
627
628 /**
629  * Moves buffers associated only with the given active seqno from the active
630  * to inactive list, potentially freeing them.
631  */
632 static void
633 i915_gem_retire_request(struct drm_device *dev,
634                         struct drm_i915_gem_request *request)
635 {
636         drm_i915_private_t *dev_priv = dev->dev_private;
637
638         /* Move any buffers on the active list that are no longer referenced
639          * by the ringbuffer to the flushing/inactive lists as appropriate.
640          */
641         while (!list_empty(&dev_priv->mm.active_list)) {
642                 struct drm_gem_object *obj;
643                 struct drm_i915_gem_object *obj_priv;
644
645                 obj_priv = list_first_entry(&dev_priv->mm.active_list,
646                                             struct drm_i915_gem_object,
647                                             list);
648                 obj = obj_priv->obj;
649
650                 /* If the seqno being retired doesn't match the oldest in the
651                  * list, then the oldest in the list must still be newer than
652                  * this seqno.
653                  */
654                 if (obj_priv->last_rendering_seqno != request->seqno)
655                         return;
656 #if WATCH_LRU
657                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
658                          __func__, request->seqno, obj);
659 #endif
660
661                 if (obj->write_domain != 0) {
662                         list_move_tail(&obj_priv->list,
663                                        &dev_priv->mm.flushing_list);
664                 } else {
665                         i915_gem_object_move_to_inactive(obj);
666                 }
667         }
668
669         if (request->flush_domains != 0) {
670                 struct drm_i915_gem_object *obj_priv, *next;
671
672                 /* Clear the write domain and activity from any buffers
673                  * that are just waiting for a flush matching the one retired.
674                  */
675                 list_for_each_entry_safe(obj_priv, next,
676                                          &dev_priv->mm.flushing_list, list) {
677                         struct drm_gem_object *obj = obj_priv->obj;
678
679                         if (obj->write_domain & request->flush_domains) {
680                                 obj->write_domain = 0;
681                                 i915_gem_object_move_to_inactive(obj);
682                         }
683                 }
684
685         }
686 }
687
688 /**
689  * Returns true if seq1 is later than seq2.
690  */
691 static int
692 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
693 {
694         return (int32_t)(seq1 - seq2) >= 0;
695 }
696
697 uint32_t
698 i915_get_gem_seqno(struct drm_device *dev)
699 {
700         drm_i915_private_t *dev_priv = dev->dev_private;
701
702         return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
703 }
704
705 /**
706  * This function clears the request list as sequence numbers are passed.
707  */
708 void
709 i915_gem_retire_requests(struct drm_device *dev)
710 {
711         drm_i915_private_t *dev_priv = dev->dev_private;
712         uint32_t seqno;
713
714         seqno = i915_get_gem_seqno(dev);
715
716         while (!list_empty(&dev_priv->mm.request_list)) {
717                 struct drm_i915_gem_request *request;
718                 uint32_t retiring_seqno;
719
720                 request = list_first_entry(&dev_priv->mm.request_list,
721                                            struct drm_i915_gem_request,
722                                            list);
723                 retiring_seqno = request->seqno;
724
725                 if (i915_seqno_passed(seqno, retiring_seqno) ||
726                     dev_priv->mm.wedged) {
727                         i915_gem_retire_request(dev, request);
728
729                         list_del(&request->list);
730                         drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
731                 } else
732                         break;
733         }
734 }
735
736 void
737 i915_gem_retire_work_handler(struct work_struct *work)
738 {
739         drm_i915_private_t *dev_priv;
740         struct drm_device *dev;
741
742         dev_priv = container_of(work, drm_i915_private_t,
743                                 mm.retire_work.work);
744         dev = dev_priv->dev;
745
746         mutex_lock(&dev->struct_mutex);
747         i915_gem_retire_requests(dev);
748         if (!dev_priv->mm.suspended &&
749             !list_empty(&dev_priv->mm.request_list))
750                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
751         mutex_unlock(&dev->struct_mutex);
752 }
753
754 /**
755  * Waits for a sequence number to be signaled, and cleans up the
756  * request and object lists appropriately for that event.
757  */
758 static int
759 i915_wait_request(struct drm_device *dev, uint32_t seqno)
760 {
761         drm_i915_private_t *dev_priv = dev->dev_private;
762         int ret = 0;
763
764         BUG_ON(seqno == 0);
765
766         if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
767                 dev_priv->mm.waiting_gem_seqno = seqno;
768                 i915_user_irq_get(dev);
769                 ret = wait_event_interruptible(dev_priv->irq_queue,
770                                                i915_seqno_passed(i915_get_gem_seqno(dev),
771                                                                  seqno) ||
772                                                dev_priv->mm.wedged);
773                 i915_user_irq_put(dev);
774                 dev_priv->mm.waiting_gem_seqno = 0;
775         }
776         if (dev_priv->mm.wedged)
777                 ret = -EIO;
778
779         if (ret && ret != -ERESTARTSYS)
780                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
781                           __func__, ret, seqno, i915_get_gem_seqno(dev));
782
783         /* Directly dispatch request retiring.  While we have the work queue
784          * to handle this, the waiter on a request often wants an associated
785          * buffer to have made it to the inactive list, and we would need
786          * a separate wait queue to handle that.
787          */
788         if (ret == 0)
789                 i915_gem_retire_requests(dev);
790
791         return ret;
792 }
793
794 static void
795 i915_gem_flush(struct drm_device *dev,
796                uint32_t invalidate_domains,
797                uint32_t flush_domains)
798 {
799         drm_i915_private_t *dev_priv = dev->dev_private;
800         uint32_t cmd;
801         RING_LOCALS;
802
803 #if WATCH_EXEC
804         DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
805                   invalidate_domains, flush_domains);
806 #endif
807
808         if (flush_domains & I915_GEM_DOMAIN_CPU)
809                 drm_agp_chipset_flush(dev);
810
811         if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
812                                                      I915_GEM_DOMAIN_GTT)) {
813                 /*
814                  * read/write caches:
815                  *
816                  * I915_GEM_DOMAIN_RENDER is always invalidated, but is
817                  * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
818                  * also flushed at 2d versus 3d pipeline switches.
819                  *
820                  * read-only caches:
821                  *
822                  * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
823                  * MI_READ_FLUSH is set, and is always flushed on 965.
824                  *
825                  * I915_GEM_DOMAIN_COMMAND may not exist?
826                  *
827                  * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
828                  * invalidated when MI_EXE_FLUSH is set.
829                  *
830                  * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
831                  * invalidated with every MI_FLUSH.
832                  *
833                  * TLBs:
834                  *
835                  * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
836                  * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
837                  * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
838                  * are flushed at any MI_FLUSH.
839                  */
840
841                 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
842                 if ((invalidate_domains|flush_domains) &
843                     I915_GEM_DOMAIN_RENDER)
844                         cmd &= ~MI_NO_WRITE_FLUSH;
845                 if (!IS_I965G(dev)) {
846                         /*
847                          * On the 965, the sampler cache always gets flushed
848                          * and this bit is reserved.
849                          */
850                         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
851                                 cmd |= MI_READ_FLUSH;
852                 }
853                 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
854                         cmd |= MI_EXE_FLUSH;
855
856 #if WATCH_EXEC
857                 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
858 #endif
859                 BEGIN_LP_RING(2);
860                 OUT_RING(cmd);
861                 OUT_RING(0); /* noop */
862                 ADVANCE_LP_RING();
863         }
864 }
865
866 /**
867  * Ensures that all rendering to the object has completed and the object is
868  * safe to unbind from the GTT or access from the CPU.
869  */
870 static int
871 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
872 {
873         struct drm_device *dev = obj->dev;
874         struct drm_i915_gem_object *obj_priv = obj->driver_private;
875         int ret;
876
877         /* If there are writes queued to the buffer, flush and
878          * create a new seqno to wait for.
879          */
880         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
881                 uint32_t write_domain = obj->write_domain;
882 #if WATCH_BUF
883                 DRM_INFO("%s: flushing object %p from write domain %08x\n",
884                           __func__, obj, write_domain);
885 #endif
886                 i915_gem_flush(dev, 0, write_domain);
887
888                 i915_gem_object_move_to_active(obj);
889                 obj_priv->last_rendering_seqno = i915_add_request(dev,
890                                                                   write_domain);
891                 BUG_ON(obj_priv->last_rendering_seqno == 0);
892 #if WATCH_LRU
893                 DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj);
894 #endif
895         }
896
897         /* If there is rendering queued on the buffer being evicted, wait for
898          * it.
899          */
900         if (obj_priv->active) {
901 #if WATCH_BUF
902                 DRM_INFO("%s: object %p wait for seqno %08x\n",
903                           __func__, obj, obj_priv->last_rendering_seqno);
904 #endif
905                 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
906                 if (ret != 0)
907                         return ret;
908         }
909
910         return 0;
911 }
912
913 /**
914  * Unbinds an object from the GTT aperture.
915  */
916 static int
917 i915_gem_object_unbind(struct drm_gem_object *obj)
918 {
919         struct drm_device *dev = obj->dev;
920         struct drm_i915_gem_object *obj_priv = obj->driver_private;
921         int ret = 0;
922
923 #if WATCH_BUF
924         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
925         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
926 #endif
927         if (obj_priv->gtt_space == NULL)
928                 return 0;
929
930         if (obj_priv->pin_count != 0) {
931                 DRM_ERROR("Attempting to unbind pinned buffer\n");
932                 return -EINVAL;
933         }
934
935         /* Wait for any rendering to complete
936          */
937         ret = i915_gem_object_wait_rendering(obj);
938         if (ret) {
939                 DRM_ERROR("wait_rendering failed: %d\n", ret);
940                 return ret;
941         }
942
943         /* Move the object to the CPU domain to ensure that
944          * any possible CPU writes while it's not in the GTT
945          * are flushed when we go to remap it. This will
946          * also ensure that all pending GPU writes are finished
947          * before we unbind.
948          */
949         ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU,
950                                          I915_GEM_DOMAIN_CPU);
951         if (ret) {
952                 DRM_ERROR("set_domain failed: %d\n", ret);
953                 return ret;
954         }
955
956         if (obj_priv->agp_mem != NULL) {
957                 drm_unbind_agp(obj_priv->agp_mem);
958                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
959                 obj_priv->agp_mem = NULL;
960         }
961
962         BUG_ON(obj_priv->active);
963
964         i915_gem_object_free_page_list(obj);
965
966         if (obj_priv->gtt_space) {
967                 atomic_dec(&dev->gtt_count);
968                 atomic_sub(obj->size, &dev->gtt_memory);
969
970                 drm_mm_put_block(obj_priv->gtt_space);
971                 obj_priv->gtt_space = NULL;
972         }
973
974         /* Remove ourselves from the LRU list if present. */
975         if (!list_empty(&obj_priv->list))
976                 list_del_init(&obj_priv->list);
977
978         return 0;
979 }
980
981 static int
982 i915_gem_evict_something(struct drm_device *dev)
983 {
984         drm_i915_private_t *dev_priv = dev->dev_private;
985         struct drm_gem_object *obj;
986         struct drm_i915_gem_object *obj_priv;
987         int ret = 0;
988
989         for (;;) {
990                 /* If there's an inactive buffer available now, grab it
991                  * and be done.
992                  */
993                 if (!list_empty(&dev_priv->mm.inactive_list)) {
994                         obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
995                                                     struct drm_i915_gem_object,
996                                                     list);
997                         obj = obj_priv->obj;
998                         BUG_ON(obj_priv->pin_count != 0);
999 #if WATCH_LRU
1000                         DRM_INFO("%s: evicting %p\n", __func__, obj);
1001 #endif
1002                         BUG_ON(obj_priv->active);
1003
1004                         /* Wait on the rendering and unbind the buffer. */
1005                         ret = i915_gem_object_unbind(obj);
1006                         break;
1007                 }
1008
1009                 /* If we didn't get anything, but the ring is still processing
1010                  * things, wait for one of those things to finish and hopefully
1011                  * leave us a buffer to evict.
1012                  */
1013                 if (!list_empty(&dev_priv->mm.request_list)) {
1014                         struct drm_i915_gem_request *request;
1015
1016                         request = list_first_entry(&dev_priv->mm.request_list,
1017                                                    struct drm_i915_gem_request,
1018                                                    list);
1019
1020                         ret = i915_wait_request(dev, request->seqno);
1021                         if (ret)
1022                                 break;
1023
1024                         /* if waiting caused an object to become inactive,
1025                          * then loop around and wait for it. Otherwise, we
1026                          * assume that waiting freed and unbound something,
1027                          * so there should now be some space in the GTT
1028                          */
1029                         if (!list_empty(&dev_priv->mm.inactive_list))
1030                                 continue;
1031                         break;
1032                 }
1033
1034                 /* If we didn't have anything on the request list but there
1035                  * are buffers awaiting a flush, emit one and try again.
1036                  * When we wait on it, those buffers waiting for that flush
1037                  * will get moved to inactive.
1038                  */
1039                 if (!list_empty(&dev_priv->mm.flushing_list)) {
1040                         obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1041                                                     struct drm_i915_gem_object,
1042                                                     list);
1043                         obj = obj_priv->obj;
1044
1045                         i915_gem_flush(dev,
1046                                        obj->write_domain,
1047                                        obj->write_domain);
1048                         i915_add_request(dev, obj->write_domain);
1049
1050                         obj = NULL;
1051                         continue;
1052                 }
1053
1054                 DRM_ERROR("inactive empty %d request empty %d "
1055                           "flushing empty %d\n",
1056                           list_empty(&dev_priv->mm.inactive_list),
1057                           list_empty(&dev_priv->mm.request_list),
1058                           list_empty(&dev_priv->mm.flushing_list));
1059                 /* If we didn't do any of the above, there's nothing to be done
1060                  * and we just can't fit it in.
1061                  */
1062                 return -ENOMEM;
1063         }
1064         return ret;
1065 }
1066
1067 static int
1068 i915_gem_object_get_page_list(struct drm_gem_object *obj)
1069 {
1070         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1071         int page_count, i;
1072         struct address_space *mapping;
1073         struct inode *inode;
1074         struct page *page;
1075         int ret;
1076
1077         if (obj_priv->page_list)
1078                 return 0;
1079
1080         /* Get the list of pages out of our struct file.  They'll be pinned
1081          * at this point until we release them.
1082          */
1083         page_count = obj->size / PAGE_SIZE;
1084         BUG_ON(obj_priv->page_list != NULL);
1085         obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *),
1086                                          DRM_MEM_DRIVER);
1087         if (obj_priv->page_list == NULL) {
1088                 DRM_ERROR("Faled to allocate page list\n");
1089                 return -ENOMEM;
1090         }
1091
1092         inode = obj->filp->f_path.dentry->d_inode;
1093         mapping = inode->i_mapping;
1094         for (i = 0; i < page_count; i++) {
1095                 page = read_mapping_page(mapping, i, NULL);
1096                 if (IS_ERR(page)) {
1097                         ret = PTR_ERR(page);
1098                         DRM_ERROR("read_mapping_page failed: %d\n", ret);
1099                         i915_gem_object_free_page_list(obj);
1100                         return ret;
1101                 }
1102                 obj_priv->page_list[i] = page;
1103         }
1104         return 0;
1105 }
1106
1107 /**
1108  * Finds free space in the GTT aperture and binds the object there.
1109  */
1110 static int
1111 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
1112 {
1113         struct drm_device *dev = obj->dev;
1114         drm_i915_private_t *dev_priv = dev->dev_private;
1115         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1116         struct drm_mm_node *free_space;
1117         int page_count, ret;
1118
1119         if (alignment == 0)
1120                 alignment = PAGE_SIZE;
1121         if (alignment & (PAGE_SIZE - 1)) {
1122                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
1123                 return -EINVAL;
1124         }
1125
1126  search_free:
1127         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
1128                                         obj->size, alignment, 0);
1129         if (free_space != NULL) {
1130                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
1131                                                        alignment);
1132                 if (obj_priv->gtt_space != NULL) {
1133                         obj_priv->gtt_space->private = obj;
1134                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
1135                 }
1136         }
1137         if (obj_priv->gtt_space == NULL) {
1138                 /* If the gtt is empty and we're still having trouble
1139                  * fitting our object in, we're out of memory.
1140                  */
1141 #if WATCH_LRU
1142                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
1143 #endif
1144                 if (list_empty(&dev_priv->mm.inactive_list) &&
1145                     list_empty(&dev_priv->mm.flushing_list) &&
1146                     list_empty(&dev_priv->mm.active_list)) {
1147                         DRM_ERROR("GTT full, but LRU list empty\n");
1148                         return -ENOMEM;
1149                 }
1150
1151                 ret = i915_gem_evict_something(dev);
1152                 if (ret != 0) {
1153                         DRM_ERROR("Failed to evict a buffer %d\n", ret);
1154                         return ret;
1155                 }
1156                 goto search_free;
1157         }
1158
1159 #if WATCH_BUF
1160         DRM_INFO("Binding object of size %d at 0x%08x\n",
1161                  obj->size, obj_priv->gtt_offset);
1162 #endif
1163         ret = i915_gem_object_get_page_list(obj);
1164         if (ret) {
1165                 drm_mm_put_block(obj_priv->gtt_space);
1166                 obj_priv->gtt_space = NULL;
1167                 return ret;
1168         }
1169
1170         page_count = obj->size / PAGE_SIZE;
1171         /* Create an AGP memory structure pointing at our pages, and bind it
1172          * into the GTT.
1173          */
1174         obj_priv->agp_mem = drm_agp_bind_pages(dev,
1175                                                obj_priv->page_list,
1176                                                page_count,
1177                                                obj_priv->gtt_offset,
1178                                                obj_priv->agp_type);
1179         if (obj_priv->agp_mem == NULL) {
1180                 i915_gem_object_free_page_list(obj);
1181                 drm_mm_put_block(obj_priv->gtt_space);
1182                 obj_priv->gtt_space = NULL;
1183                 return -ENOMEM;
1184         }
1185         atomic_inc(&dev->gtt_count);
1186         atomic_add(obj->size, &dev->gtt_memory);
1187
1188         /* Assert that the object is not currently in any GPU domain. As it
1189          * wasn't in the GTT, there shouldn't be any way it could have been in
1190          * a GPU cache
1191          */
1192         BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1193         BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1194
1195         return 0;
1196 }
1197
1198 void
1199 i915_gem_clflush_object(struct drm_gem_object *obj)
1200 {
1201         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1202
1203         /* If we don't have a page list set up, then we're not pinned
1204          * to GPU, and we can ignore the cache flush because it'll happen
1205          * again at bind time.
1206          */
1207         if (obj_priv->page_list == NULL)
1208                 return;
1209
1210         drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE);
1211 }
1212
1213 /*
1214  * Set the next domain for the specified object. This
1215  * may not actually perform the necessary flushing/invaliding though,
1216  * as that may want to be batched with other set_domain operations
1217  *
1218  * This is (we hope) the only really tricky part of gem. The goal
1219  * is fairly simple -- track which caches hold bits of the object
1220  * and make sure they remain coherent. A few concrete examples may
1221  * help to explain how it works. For shorthand, we use the notation
1222  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
1223  * a pair of read and write domain masks.
1224  *
1225  * Case 1: the batch buffer
1226  *
1227  *      1. Allocated
1228  *      2. Written by CPU
1229  *      3. Mapped to GTT
1230  *      4. Read by GPU
1231  *      5. Unmapped from GTT
1232  *      6. Freed
1233  *
1234  *      Let's take these a step at a time
1235  *
1236  *      1. Allocated
1237  *              Pages allocated from the kernel may still have
1238  *              cache contents, so we set them to (CPU, CPU) always.
1239  *      2. Written by CPU (using pwrite)
1240  *              The pwrite function calls set_domain (CPU, CPU) and
1241  *              this function does nothing (as nothing changes)
1242  *      3. Mapped by GTT
1243  *              This function asserts that the object is not
1244  *              currently in any GPU-based read or write domains
1245  *      4. Read by GPU
1246  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
1247  *              As write_domain is zero, this function adds in the
1248  *              current read domains (CPU+COMMAND, 0).
1249  *              flush_domains is set to CPU.
1250  *              invalidate_domains is set to COMMAND
1251  *              clflush is run to get data out of the CPU caches
1252  *              then i915_dev_set_domain calls i915_gem_flush to
1253  *              emit an MI_FLUSH and drm_agp_chipset_flush
1254  *      5. Unmapped from GTT
1255  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
1256  *              flush_domains and invalidate_domains end up both zero
1257  *              so no flushing/invalidating happens
1258  *      6. Freed
1259  *              yay, done
1260  *
1261  * Case 2: The shared render buffer
1262  *
1263  *      1. Allocated
1264  *      2. Mapped to GTT
1265  *      3. Read/written by GPU
1266  *      4. set_domain to (CPU,CPU)
1267  *      5. Read/written by CPU
1268  *      6. Read/written by GPU
1269  *
1270  *      1. Allocated
1271  *              Same as last example, (CPU, CPU)
1272  *      2. Mapped to GTT
1273  *              Nothing changes (assertions find that it is not in the GPU)
1274  *      3. Read/written by GPU
1275  *              execbuffer calls set_domain (RENDER, RENDER)
1276  *              flush_domains gets CPU
1277  *              invalidate_domains gets GPU
1278  *              clflush (obj)
1279  *              MI_FLUSH and drm_agp_chipset_flush
1280  *      4. set_domain (CPU, CPU)
1281  *              flush_domains gets GPU
1282  *              invalidate_domains gets CPU
1283  *              wait_rendering (obj) to make sure all drawing is complete.
1284  *              This will include an MI_FLUSH to get the data from GPU
1285  *              to memory
1286  *              clflush (obj) to invalidate the CPU cache
1287  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
1288  *      5. Read/written by CPU
1289  *              cache lines are loaded and dirtied
1290  *      6. Read written by GPU
1291  *              Same as last GPU access
1292  *
1293  * Case 3: The constant buffer
1294  *
1295  *      1. Allocated
1296  *      2. Written by CPU
1297  *      3. Read by GPU
1298  *      4. Updated (written) by CPU again
1299  *      5. Read by GPU
1300  *
1301  *      1. Allocated
1302  *              (CPU, CPU)
1303  *      2. Written by CPU
1304  *              (CPU, CPU)
1305  *      3. Read by GPU
1306  *              (CPU+RENDER, 0)
1307  *              flush_domains = CPU
1308  *              invalidate_domains = RENDER
1309  *              clflush (obj)
1310  *              MI_FLUSH
1311  *              drm_agp_chipset_flush
1312  *      4. Updated (written) by CPU again
1313  *              (CPU, CPU)
1314  *              flush_domains = 0 (no previous write domain)
1315  *              invalidate_domains = 0 (no new read domains)
1316  *      5. Read by GPU
1317  *              (CPU+RENDER, 0)
1318  *              flush_domains = CPU
1319  *              invalidate_domains = RENDER
1320  *              clflush (obj)
1321  *              MI_FLUSH
1322  *              drm_agp_chipset_flush
1323  */
1324 static int
1325 i915_gem_object_set_domain(struct drm_gem_object *obj,
1326                             uint32_t read_domains,
1327                             uint32_t write_domain)
1328 {
1329         struct drm_device               *dev = obj->dev;
1330         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1331         uint32_t                        invalidate_domains = 0;
1332         uint32_t                        flush_domains = 0;
1333         int                             ret;
1334
1335 #if WATCH_BUF
1336         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
1337                  __func__, obj,
1338                  obj->read_domains, read_domains,
1339                  obj->write_domain, write_domain);
1340 #endif
1341         /*
1342          * If the object isn't moving to a new write domain,
1343          * let the object stay in multiple read domains
1344          */
1345         if (write_domain == 0)
1346                 read_domains |= obj->read_domains;
1347         else
1348                 obj_priv->dirty = 1;
1349
1350         /*
1351          * Flush the current write domain if
1352          * the new read domains don't match. Invalidate
1353          * any read domains which differ from the old
1354          * write domain
1355          */
1356         if (obj->write_domain && obj->write_domain != read_domains) {
1357                 flush_domains |= obj->write_domain;
1358                 invalidate_domains |= read_domains & ~obj->write_domain;
1359         }
1360         /*
1361          * Invalidate any read caches which may have
1362          * stale data. That is, any new read domains.
1363          */
1364         invalidate_domains |= read_domains & ~obj->read_domains;
1365         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
1366 #if WATCH_BUF
1367                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
1368                          __func__, flush_domains, invalidate_domains);
1369 #endif
1370                 /*
1371                  * If we're invaliding the CPU cache and flushing a GPU cache,
1372                  * then pause for rendering so that the GPU caches will be
1373                  * flushed before the cpu cache is invalidated
1374                  */
1375                 if ((invalidate_domains & I915_GEM_DOMAIN_CPU) &&
1376                     (flush_domains & ~(I915_GEM_DOMAIN_CPU |
1377                                        I915_GEM_DOMAIN_GTT))) {
1378                         ret = i915_gem_object_wait_rendering(obj);
1379                         if (ret)
1380                                 return ret;
1381                 }
1382                 i915_gem_clflush_object(obj);
1383         }
1384
1385         if ((write_domain | flush_domains) != 0)
1386                 obj->write_domain = write_domain;
1387
1388         /* If we're invalidating the CPU domain, clear the per-page CPU
1389          * domain list as well.
1390          */
1391         if (obj_priv->page_cpu_valid != NULL &&
1392             (write_domain != 0 ||
1393              read_domains & I915_GEM_DOMAIN_CPU)) {
1394                 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
1395                          DRM_MEM_DRIVER);
1396                 obj_priv->page_cpu_valid = NULL;
1397         }
1398         obj->read_domains = read_domains;
1399
1400         dev->invalidate_domains |= invalidate_domains;
1401         dev->flush_domains |= flush_domains;
1402 #if WATCH_BUF
1403         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
1404                  __func__,
1405                  obj->read_domains, obj->write_domain,
1406                  dev->invalidate_domains, dev->flush_domains);
1407 #endif
1408         return 0;
1409 }
1410
1411 /**
1412  * Set the read/write domain on a range of the object.
1413  *
1414  * Currently only implemented for CPU reads, otherwise drops to normal
1415  * i915_gem_object_set_domain().
1416  */
1417 static int
1418 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
1419                                  uint64_t offset,
1420                                  uint64_t size,
1421                                  uint32_t read_domains,
1422                                  uint32_t write_domain)
1423 {
1424         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1425         int ret, i;
1426
1427         if (obj->read_domains & I915_GEM_DOMAIN_CPU)
1428                 return 0;
1429
1430         if (read_domains != I915_GEM_DOMAIN_CPU ||
1431             write_domain != 0)
1432                 return i915_gem_object_set_domain(obj,
1433                                                   read_domains, write_domain);
1434
1435         /* Wait on any GPU rendering to the object to be flushed. */
1436         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
1437                 ret = i915_gem_object_wait_rendering(obj);
1438                 if (ret)
1439                         return ret;
1440         }
1441
1442         if (obj_priv->page_cpu_valid == NULL) {
1443                 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
1444                                                       DRM_MEM_DRIVER);
1445         }
1446
1447         /* Flush the cache on any pages that are still invalid from the CPU's
1448          * perspective.
1449          */
1450         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) {
1451                 if (obj_priv->page_cpu_valid[i])
1452                         continue;
1453
1454                 drm_clflush_pages(obj_priv->page_list + i, 1);
1455
1456                 obj_priv->page_cpu_valid[i] = 1;
1457         }
1458
1459         return 0;
1460 }
1461
1462 /**
1463  * Once all of the objects have been set in the proper domain,
1464  * perform the necessary flush and invalidate operations.
1465  *
1466  * Returns the write domains flushed, for use in flush tracking.
1467  */
1468 static uint32_t
1469 i915_gem_dev_set_domain(struct drm_device *dev)
1470 {
1471         uint32_t flush_domains = dev->flush_domains;
1472
1473         /*
1474          * Now that all the buffers are synced to the proper domains,
1475          * flush and invalidate the collected domains
1476          */
1477         if (dev->invalidate_domains | dev->flush_domains) {
1478 #if WATCH_EXEC
1479                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
1480                           __func__,
1481                          dev->invalidate_domains,
1482                          dev->flush_domains);
1483 #endif
1484                 i915_gem_flush(dev,
1485                                dev->invalidate_domains,
1486                                dev->flush_domains);
1487                 dev->invalidate_domains = 0;
1488                 dev->flush_domains = 0;
1489         }
1490
1491         return flush_domains;
1492 }
1493
1494 /**
1495  * Pin an object to the GTT and evaluate the relocations landing in it.
1496  */
1497 static int
1498 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1499                                  struct drm_file *file_priv,
1500                                  struct drm_i915_gem_exec_object *entry)
1501 {
1502         struct drm_device *dev = obj->dev;
1503         drm_i915_private_t *dev_priv = dev->dev_private;
1504         struct drm_i915_gem_relocation_entry reloc;
1505         struct drm_i915_gem_relocation_entry __user *relocs;
1506         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1507         int i, ret;
1508         void __iomem *reloc_page;
1509
1510         /* Choose the GTT offset for our buffer and put it there. */
1511         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
1512         if (ret)
1513                 return ret;
1514
1515         entry->offset = obj_priv->gtt_offset;
1516
1517         relocs = (struct drm_i915_gem_relocation_entry __user *)
1518                  (uintptr_t) entry->relocs_ptr;
1519         /* Apply the relocations, using the GTT aperture to avoid cache
1520          * flushing requirements.
1521          */
1522         for (i = 0; i < entry->relocation_count; i++) {
1523                 struct drm_gem_object *target_obj;
1524                 struct drm_i915_gem_object *target_obj_priv;
1525                 uint32_t reloc_val, reloc_offset;
1526                 uint32_t __iomem *reloc_entry;
1527
1528                 ret = copy_from_user(&reloc, relocs + i, sizeof(reloc));
1529                 if (ret != 0) {
1530                         i915_gem_object_unpin(obj);
1531                         return ret;
1532                 }
1533
1534                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
1535                                                    reloc.target_handle);
1536                 if (target_obj == NULL) {
1537                         i915_gem_object_unpin(obj);
1538                         return -EBADF;
1539                 }
1540                 target_obj_priv = target_obj->driver_private;
1541
1542                 /* The target buffer should have appeared before us in the
1543                  * exec_object list, so it should have a GTT space bound by now.
1544                  */
1545                 if (target_obj_priv->gtt_space == NULL) {
1546                         DRM_ERROR("No GTT space found for object %d\n",
1547                                   reloc.target_handle);
1548                         drm_gem_object_unreference(target_obj);
1549                         i915_gem_object_unpin(obj);
1550                         return -EINVAL;
1551                 }
1552
1553                 if (reloc.offset > obj->size - 4) {
1554                         DRM_ERROR("Relocation beyond object bounds: "
1555                                   "obj %p target %d offset %d size %d.\n",
1556                                   obj, reloc.target_handle,
1557                                   (int) reloc.offset, (int) obj->size);
1558                         drm_gem_object_unreference(target_obj);
1559                         i915_gem_object_unpin(obj);
1560                         return -EINVAL;
1561                 }
1562                 if (reloc.offset & 3) {
1563                         DRM_ERROR("Relocation not 4-byte aligned: "
1564                                   "obj %p target %d offset %d.\n",
1565                                   obj, reloc.target_handle,
1566                                   (int) reloc.offset);
1567                         drm_gem_object_unreference(target_obj);
1568                         i915_gem_object_unpin(obj);
1569                         return -EINVAL;
1570                 }
1571
1572                 if (reloc.write_domain && target_obj->pending_write_domain &&
1573                     reloc.write_domain != target_obj->pending_write_domain) {
1574                         DRM_ERROR("Write domain conflict: "
1575                                   "obj %p target %d offset %d "
1576                                   "new %08x old %08x\n",
1577                                   obj, reloc.target_handle,
1578                                   (int) reloc.offset,
1579                                   reloc.write_domain,
1580                                   target_obj->pending_write_domain);
1581                         drm_gem_object_unreference(target_obj);
1582                         i915_gem_object_unpin(obj);
1583                         return -EINVAL;
1584                 }
1585
1586 #if WATCH_RELOC
1587                 DRM_INFO("%s: obj %p offset %08x target %d "
1588                          "read %08x write %08x gtt %08x "
1589                          "presumed %08x delta %08x\n",
1590                          __func__,
1591                          obj,
1592                          (int) reloc.offset,
1593                          (int) reloc.target_handle,
1594                          (int) reloc.read_domains,
1595                          (int) reloc.write_domain,
1596                          (int) target_obj_priv->gtt_offset,
1597                          (int) reloc.presumed_offset,
1598                          reloc.delta);
1599 #endif
1600
1601                 target_obj->pending_read_domains |= reloc.read_domains;
1602                 target_obj->pending_write_domain |= reloc.write_domain;
1603
1604                 /* If the relocation already has the right value in it, no
1605                  * more work needs to be done.
1606                  */
1607                 if (target_obj_priv->gtt_offset == reloc.presumed_offset) {
1608                         drm_gem_object_unreference(target_obj);
1609                         continue;
1610                 }
1611
1612                 /* Now that we're going to actually write some data in,
1613                  * make sure that any rendering using this buffer's contents
1614                  * is completed.
1615                  */
1616                 i915_gem_object_wait_rendering(obj);
1617
1618                 /* As we're writing through the gtt, flush
1619                  * any CPU writes before we write the relocations
1620                  */
1621                 if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
1622                         i915_gem_clflush_object(obj);
1623                         drm_agp_chipset_flush(dev);
1624                         obj->write_domain = 0;
1625                 }
1626
1627                 /* Map the page containing the relocation we're going to
1628                  * perform.
1629                  */
1630                 reloc_offset = obj_priv->gtt_offset + reloc.offset;
1631                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
1632                                                       (reloc_offset &
1633                                                        ~(PAGE_SIZE - 1)));
1634                 reloc_entry = (uint32_t __iomem *)(reloc_page +
1635                                                    (reloc_offset & (PAGE_SIZE - 1)));
1636                 reloc_val = target_obj_priv->gtt_offset + reloc.delta;
1637
1638 #if WATCH_BUF
1639                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
1640                           obj, (unsigned int) reloc.offset,
1641                           readl(reloc_entry), reloc_val);
1642 #endif
1643                 writel(reloc_val, reloc_entry);
1644                 io_mapping_unmap_atomic(reloc_page);
1645
1646                 /* Write the updated presumed offset for this entry back out
1647                  * to the user.
1648                  */
1649                 reloc.presumed_offset = target_obj_priv->gtt_offset;
1650                 ret = copy_to_user(relocs + i, &reloc, sizeof(reloc));
1651                 if (ret != 0) {
1652                         drm_gem_object_unreference(target_obj);
1653                         i915_gem_object_unpin(obj);
1654                         return ret;
1655                 }
1656
1657                 drm_gem_object_unreference(target_obj);
1658         }
1659
1660 #if WATCH_BUF
1661         if (0)
1662                 i915_gem_dump_object(obj, 128, __func__, ~0);
1663 #endif
1664         return 0;
1665 }
1666
1667 /** Dispatch a batchbuffer to the ring
1668  */
1669 static int
1670 i915_dispatch_gem_execbuffer(struct drm_device *dev,
1671                               struct drm_i915_gem_execbuffer *exec,
1672                               uint64_t exec_offset)
1673 {
1674         drm_i915_private_t *dev_priv = dev->dev_private;
1675         struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *)
1676                                              (uintptr_t) exec->cliprects_ptr;
1677         int nbox = exec->num_cliprects;
1678         int i = 0, count;
1679         uint32_t        exec_start, exec_len;
1680         RING_LOCALS;
1681
1682         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
1683         exec_len = (uint32_t) exec->batch_len;
1684
1685         if ((exec_start | exec_len) & 0x7) {
1686                 DRM_ERROR("alignment\n");
1687                 return -EINVAL;
1688         }
1689
1690         if (!exec_start)
1691                 return -EINVAL;
1692
1693         count = nbox ? nbox : 1;
1694
1695         for (i = 0; i < count; i++) {
1696                 if (i < nbox) {
1697                         int ret = i915_emit_box(dev, boxes, i,
1698                                                 exec->DR1, exec->DR4);
1699                         if (ret)
1700                                 return ret;
1701                 }
1702
1703                 if (IS_I830(dev) || IS_845G(dev)) {
1704                         BEGIN_LP_RING(4);
1705                         OUT_RING(MI_BATCH_BUFFER);
1706                         OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1707                         OUT_RING(exec_start + exec_len - 4);
1708                         OUT_RING(0);
1709                         ADVANCE_LP_RING();
1710                 } else {
1711                         BEGIN_LP_RING(2);
1712                         if (IS_I965G(dev)) {
1713                                 OUT_RING(MI_BATCH_BUFFER_START |
1714                                          (2 << 6) |
1715                                          MI_BATCH_NON_SECURE_I965);
1716                                 OUT_RING(exec_start);
1717                         } else {
1718                                 OUT_RING(MI_BATCH_BUFFER_START |
1719                                          (2 << 6));
1720                                 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1721                         }
1722                         ADVANCE_LP_RING();
1723                 }
1724         }
1725
1726         /* XXX breadcrumb */
1727         return 0;
1728 }
1729
1730 /* Throttle our rendering by waiting until the ring has completed our requests
1731  * emitted over 20 msec ago.
1732  *
1733  * This should get us reasonable parallelism between CPU and GPU but also
1734  * relatively low latency when blocking on a particular request to finish.
1735  */
1736 static int
1737 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
1738 {
1739         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1740         int ret = 0;
1741         uint32_t seqno;
1742
1743         mutex_lock(&dev->struct_mutex);
1744         seqno = i915_file_priv->mm.last_gem_throttle_seqno;
1745         i915_file_priv->mm.last_gem_throttle_seqno =
1746                 i915_file_priv->mm.last_gem_seqno;
1747         if (seqno)
1748                 ret = i915_wait_request(dev, seqno);
1749         mutex_unlock(&dev->struct_mutex);
1750         return ret;
1751 }
1752
1753 int
1754 i915_gem_execbuffer(struct drm_device *dev, void *data,
1755                     struct drm_file *file_priv)
1756 {
1757         drm_i915_private_t *dev_priv = dev->dev_private;
1758         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1759         struct drm_i915_gem_execbuffer *args = data;
1760         struct drm_i915_gem_exec_object *exec_list = NULL;
1761         struct drm_gem_object **object_list = NULL;
1762         struct drm_gem_object *batch_obj;
1763         int ret, i, pinned = 0;
1764         uint64_t exec_offset;
1765         uint32_t seqno, flush_domains;
1766
1767 #if WATCH_EXEC
1768         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1769                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1770 #endif
1771
1772         if (args->buffer_count < 1) {
1773                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1774                 return -EINVAL;
1775         }
1776         /* Copy in the exec list from userland */
1777         exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
1778                                DRM_MEM_DRIVER);
1779         object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
1780                                  DRM_MEM_DRIVER);
1781         if (exec_list == NULL || object_list == NULL) {
1782                 DRM_ERROR("Failed to allocate exec or object list "
1783                           "for %d buffers\n",
1784                           args->buffer_count);
1785                 ret = -ENOMEM;
1786                 goto pre_mutex_err;
1787         }
1788         ret = copy_from_user(exec_list,
1789                              (struct drm_i915_relocation_entry __user *)
1790                              (uintptr_t) args->buffers_ptr,
1791                              sizeof(*exec_list) * args->buffer_count);
1792         if (ret != 0) {
1793                 DRM_ERROR("copy %d exec entries failed %d\n",
1794                           args->buffer_count, ret);
1795                 goto pre_mutex_err;
1796         }
1797
1798         mutex_lock(&dev->struct_mutex);
1799
1800         i915_verify_inactive(dev, __FILE__, __LINE__);
1801
1802         if (dev_priv->mm.wedged) {
1803                 DRM_ERROR("Execbuf while wedged\n");
1804                 mutex_unlock(&dev->struct_mutex);
1805                 return -EIO;
1806         }
1807
1808         if (dev_priv->mm.suspended) {
1809                 DRM_ERROR("Execbuf while VT-switched.\n");
1810                 mutex_unlock(&dev->struct_mutex);
1811                 return -EBUSY;
1812         }
1813
1814         /* Zero the gloabl flush/invalidate flags. These
1815          * will be modified as each object is bound to the
1816          * gtt
1817          */
1818         dev->invalidate_domains = 0;
1819         dev->flush_domains = 0;
1820
1821         /* Look up object handles and perform the relocations */
1822         for (i = 0; i < args->buffer_count; i++) {
1823                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
1824                                                        exec_list[i].handle);
1825                 if (object_list[i] == NULL) {
1826                         DRM_ERROR("Invalid object handle %d at index %d\n",
1827                                    exec_list[i].handle, i);
1828                         ret = -EBADF;
1829                         goto err;
1830                 }
1831
1832                 object_list[i]->pending_read_domains = 0;
1833                 object_list[i]->pending_write_domain = 0;
1834                 ret = i915_gem_object_pin_and_relocate(object_list[i],
1835                                                        file_priv,
1836                                                        &exec_list[i]);
1837                 if (ret) {
1838                         DRM_ERROR("object bind and relocate failed %d\n", ret);
1839                         goto err;
1840                 }
1841                 pinned = i + 1;
1842         }
1843
1844         /* Set the pending read domains for the batch buffer to COMMAND */
1845         batch_obj = object_list[args->buffer_count-1];
1846         batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1847         batch_obj->pending_write_domain = 0;
1848
1849         i915_verify_inactive(dev, __FILE__, __LINE__);
1850
1851         for (i = 0; i < args->buffer_count; i++) {
1852                 struct drm_gem_object *obj = object_list[i];
1853                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1854
1855                 if (obj_priv->gtt_space == NULL) {
1856                         /* We evicted the buffer in the process of validating
1857                          * our set of buffers in.  We could try to recover by
1858                          * kicking them everything out and trying again from
1859                          * the start.
1860                          */
1861                         ret = -ENOMEM;
1862                         goto err;
1863                 }
1864
1865                 /* make sure all previous memory operations have passed */
1866                 ret = i915_gem_object_set_domain(obj,
1867                                                  obj->pending_read_domains,
1868                                                  obj->pending_write_domain);
1869                 if (ret)
1870                         goto err;
1871         }
1872
1873         i915_verify_inactive(dev, __FILE__, __LINE__);
1874
1875         /* Flush/invalidate caches and chipset buffer */
1876         flush_domains = i915_gem_dev_set_domain(dev);
1877
1878         i915_verify_inactive(dev, __FILE__, __LINE__);
1879
1880 #if WATCH_COHERENCY
1881         for (i = 0; i < args->buffer_count; i++) {
1882                 i915_gem_object_check_coherency(object_list[i],
1883                                                 exec_list[i].handle);
1884         }
1885 #endif
1886
1887         exec_offset = exec_list[args->buffer_count - 1].offset;
1888
1889 #if WATCH_EXEC
1890         i915_gem_dump_object(object_list[args->buffer_count - 1],
1891                               args->batch_len,
1892                               __func__,
1893                               ~0);
1894 #endif
1895
1896         (void)i915_add_request(dev, flush_domains);
1897
1898         /* Exec the batchbuffer */
1899         ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset);
1900         if (ret) {
1901                 DRM_ERROR("dispatch failed %d\n", ret);
1902                 goto err;
1903         }
1904
1905         /*
1906          * Ensure that the commands in the batch buffer are
1907          * finished before the interrupt fires
1908          */
1909         flush_domains = i915_retire_commands(dev);
1910
1911         i915_verify_inactive(dev, __FILE__, __LINE__);
1912
1913         /*
1914          * Get a seqno representing the execution of the current buffer,
1915          * which we can wait on.  We would like to mitigate these interrupts,
1916          * likely by only creating seqnos occasionally (so that we have
1917          * *some* interrupts representing completion of buffers that we can
1918          * wait on when trying to clear up gtt space).
1919          */
1920         seqno = i915_add_request(dev, flush_domains);
1921         BUG_ON(seqno == 0);
1922         i915_file_priv->mm.last_gem_seqno = seqno;
1923         for (i = 0; i < args->buffer_count; i++) {
1924                 struct drm_gem_object *obj = object_list[i];
1925                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1926
1927                 i915_gem_object_move_to_active(obj);
1928                 obj_priv->last_rendering_seqno = seqno;
1929 #if WATCH_LRU
1930                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
1931 #endif
1932         }
1933 #if WATCH_LRU
1934         i915_dump_lru(dev, __func__);
1935 #endif
1936
1937         i915_verify_inactive(dev, __FILE__, __LINE__);
1938
1939         /* Copy the new buffer offsets back to the user's exec list. */
1940         ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1941                            (uintptr_t) args->buffers_ptr,
1942                            exec_list,
1943                            sizeof(*exec_list) * args->buffer_count);
1944         if (ret)
1945                 DRM_ERROR("failed to copy %d exec entries "
1946                           "back to user (%d)\n",
1947                            args->buffer_count, ret);
1948 err:
1949         if (object_list != NULL) {
1950                 for (i = 0; i < pinned; i++)
1951                         i915_gem_object_unpin(object_list[i]);
1952
1953                 for (i = 0; i < args->buffer_count; i++)
1954                         drm_gem_object_unreference(object_list[i]);
1955         }
1956         mutex_unlock(&dev->struct_mutex);
1957
1958 pre_mutex_err:
1959         drm_free(object_list, sizeof(*object_list) * args->buffer_count,
1960                  DRM_MEM_DRIVER);
1961         drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
1962                  DRM_MEM_DRIVER);
1963
1964         return ret;
1965 }
1966
1967 int
1968 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
1969 {
1970         struct drm_device *dev = obj->dev;
1971         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1972         int ret;
1973
1974         i915_verify_inactive(dev, __FILE__, __LINE__);
1975         if (obj_priv->gtt_space == NULL) {
1976                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
1977                 if (ret != 0) {
1978                         DRM_ERROR("Failure to bind: %d", ret);
1979                         return ret;
1980                 }
1981         }
1982         obj_priv->pin_count++;
1983
1984         /* If the object is not active and not pending a flush,
1985          * remove it from the inactive list
1986          */
1987         if (obj_priv->pin_count == 1) {
1988                 atomic_inc(&dev->pin_count);
1989                 atomic_add(obj->size, &dev->pin_memory);
1990                 if (!obj_priv->active &&
1991                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
1992                                            I915_GEM_DOMAIN_GTT)) == 0 &&
1993                     !list_empty(&obj_priv->list))
1994                         list_del_init(&obj_priv->list);
1995         }
1996         i915_verify_inactive(dev, __FILE__, __LINE__);
1997
1998         return 0;
1999 }
2000
2001 void
2002 i915_gem_object_unpin(struct drm_gem_object *obj)
2003 {
2004         struct drm_device *dev = obj->dev;
2005         drm_i915_private_t *dev_priv = dev->dev_private;
2006         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2007
2008         i915_verify_inactive(dev, __FILE__, __LINE__);
2009         obj_priv->pin_count--;
2010         BUG_ON(obj_priv->pin_count < 0);
2011         BUG_ON(obj_priv->gtt_space == NULL);
2012
2013         /* If the object is no longer pinned, and is
2014          * neither active nor being flushed, then stick it on
2015          * the inactive list
2016          */
2017         if (obj_priv->pin_count == 0) {
2018                 if (!obj_priv->active &&
2019                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
2020                                            I915_GEM_DOMAIN_GTT)) == 0)
2021                         list_move_tail(&obj_priv->list,
2022                                        &dev_priv->mm.inactive_list);
2023                 atomic_dec(&dev->pin_count);
2024                 atomic_sub(obj->size, &dev->pin_memory);
2025         }
2026         i915_verify_inactive(dev, __FILE__, __LINE__);
2027 }
2028
2029 int
2030 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
2031                    struct drm_file *file_priv)
2032 {
2033         struct drm_i915_gem_pin *args = data;
2034         struct drm_gem_object *obj;
2035         struct drm_i915_gem_object *obj_priv;
2036         int ret;
2037
2038         mutex_lock(&dev->struct_mutex);
2039
2040         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2041         if (obj == NULL) {
2042                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
2043                           args->handle);
2044                 mutex_unlock(&dev->struct_mutex);
2045                 return -EBADF;
2046         }
2047         obj_priv = obj->driver_private;
2048
2049         ret = i915_gem_object_pin(obj, args->alignment);
2050         if (ret != 0) {
2051                 drm_gem_object_unreference(obj);
2052                 mutex_unlock(&dev->struct_mutex);
2053                 return ret;
2054         }
2055
2056         /* XXX - flush the CPU caches for pinned objects
2057          * as the X server doesn't manage domains yet
2058          */
2059         if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
2060                 i915_gem_clflush_object(obj);
2061                 drm_agp_chipset_flush(dev);
2062                 obj->write_domain = 0;
2063         }
2064         args->offset = obj_priv->gtt_offset;
2065         drm_gem_object_unreference(obj);
2066         mutex_unlock(&dev->struct_mutex);
2067
2068         return 0;
2069 }
2070
2071 int
2072 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
2073                      struct drm_file *file_priv)
2074 {
2075         struct drm_i915_gem_pin *args = data;
2076         struct drm_gem_object *obj;
2077
2078         mutex_lock(&dev->struct_mutex);
2079
2080         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2081         if (obj == NULL) {
2082                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
2083                           args->handle);
2084                 mutex_unlock(&dev->struct_mutex);
2085                 return -EBADF;
2086         }
2087
2088         i915_gem_object_unpin(obj);
2089
2090         drm_gem_object_unreference(obj);
2091         mutex_unlock(&dev->struct_mutex);
2092         return 0;
2093 }
2094
2095 int
2096 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2097                     struct drm_file *file_priv)
2098 {
2099         struct drm_i915_gem_busy *args = data;
2100         struct drm_gem_object *obj;
2101         struct drm_i915_gem_object *obj_priv;
2102
2103         mutex_lock(&dev->struct_mutex);
2104         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2105         if (obj == NULL) {
2106                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
2107                           args->handle);
2108                 mutex_unlock(&dev->struct_mutex);
2109                 return -EBADF;
2110         }
2111
2112         obj_priv = obj->driver_private;
2113         args->busy = obj_priv->active;
2114
2115         drm_gem_object_unreference(obj);
2116         mutex_unlock(&dev->struct_mutex);
2117         return 0;
2118 }
2119
2120 int
2121 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
2122                         struct drm_file *file_priv)
2123 {
2124     return i915_gem_ring_throttle(dev, file_priv);
2125 }
2126
2127 int i915_gem_init_object(struct drm_gem_object *obj)
2128 {
2129         struct drm_i915_gem_object *obj_priv;
2130
2131         obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
2132         if (obj_priv == NULL)
2133                 return -ENOMEM;
2134
2135         /*
2136          * We've just allocated pages from the kernel,
2137          * so they've just been written by the CPU with
2138          * zeros. They'll need to be clflushed before we
2139          * use them with the GPU.
2140          */
2141         obj->write_domain = I915_GEM_DOMAIN_CPU;
2142         obj->read_domains = I915_GEM_DOMAIN_CPU;
2143
2144         obj_priv->agp_type = AGP_USER_MEMORY;
2145
2146         obj->driver_private = obj_priv;
2147         obj_priv->obj = obj;
2148         INIT_LIST_HEAD(&obj_priv->list);
2149         return 0;
2150 }
2151
2152 void i915_gem_free_object(struct drm_gem_object *obj)
2153 {
2154         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2155
2156         while (obj_priv->pin_count > 0)
2157                 i915_gem_object_unpin(obj);
2158
2159         i915_gem_object_unbind(obj);
2160
2161         drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
2162         drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
2163 }
2164
2165 static int
2166 i915_gem_set_domain(struct drm_gem_object *obj,
2167                     struct drm_file *file_priv,
2168                     uint32_t read_domains,
2169                     uint32_t write_domain)
2170 {
2171         struct drm_device *dev = obj->dev;
2172         int ret;
2173         uint32_t flush_domains;
2174
2175         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2176
2177         ret = i915_gem_object_set_domain(obj, read_domains, write_domain);
2178         if (ret)
2179                 return ret;
2180         flush_domains = i915_gem_dev_set_domain(obj->dev);
2181
2182         if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))
2183                 (void) i915_add_request(dev, flush_domains);
2184
2185         return 0;
2186 }
2187
2188 /** Unbinds all objects that are on the given buffer list. */
2189 static int
2190 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
2191 {
2192         struct drm_gem_object *obj;
2193         struct drm_i915_gem_object *obj_priv;
2194         int ret;
2195
2196         while (!list_empty(head)) {
2197                 obj_priv = list_first_entry(head,
2198                                             struct drm_i915_gem_object,
2199                                             list);
2200                 obj = obj_priv->obj;
2201
2202                 if (obj_priv->pin_count != 0) {
2203                         DRM_ERROR("Pinned object in unbind list\n");
2204                         mutex_unlock(&dev->struct_mutex);
2205                         return -EINVAL;
2206                 }
2207
2208                 ret = i915_gem_object_unbind(obj);
2209                 if (ret != 0) {
2210                         DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
2211                                   ret);
2212                         mutex_unlock(&dev->struct_mutex);
2213                         return ret;
2214                 }
2215         }
2216
2217
2218         return 0;
2219 }
2220
2221 static int
2222 i915_gem_idle(struct drm_device *dev)
2223 {
2224         drm_i915_private_t *dev_priv = dev->dev_private;
2225         uint32_t seqno, cur_seqno, last_seqno;
2226         int stuck, ret;
2227
2228         mutex_lock(&dev->struct_mutex);
2229
2230         if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
2231                 mutex_unlock(&dev->struct_mutex);
2232                 return 0;
2233         }
2234
2235         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
2236          * We need to replace this with a semaphore, or something.
2237          */
2238         dev_priv->mm.suspended = 1;
2239
2240         /* Cancel the retire work handler, wait for it to finish if running
2241          */
2242         mutex_unlock(&dev->struct_mutex);
2243         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
2244         mutex_lock(&dev->struct_mutex);
2245
2246         i915_kernel_lost_context(dev);
2247
2248         /* Flush the GPU along with all non-CPU write domains
2249          */
2250         i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
2251                        ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
2252         seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU |
2253                                         I915_GEM_DOMAIN_GTT));
2254
2255         if (seqno == 0) {
2256                 mutex_unlock(&dev->struct_mutex);
2257                 return -ENOMEM;
2258         }
2259
2260         dev_priv->mm.waiting_gem_seqno = seqno;
2261         last_seqno = 0;
2262         stuck = 0;
2263         for (;;) {
2264                 cur_seqno = i915_get_gem_seqno(dev);
2265                 if (i915_seqno_passed(cur_seqno, seqno))
2266                         break;
2267                 if (last_seqno == cur_seqno) {
2268                         if (stuck++ > 100) {
2269                                 DRM_ERROR("hardware wedged\n");
2270                                 dev_priv->mm.wedged = 1;
2271                                 DRM_WAKEUP(&dev_priv->irq_queue);
2272                                 break;
2273                         }
2274                 }
2275                 msleep(10);
2276                 last_seqno = cur_seqno;
2277         }
2278         dev_priv->mm.waiting_gem_seqno = 0;
2279
2280         i915_gem_retire_requests(dev);
2281
2282         /* Active and flushing should now be empty as we've
2283          * waited for a sequence higher than any pending execbuffer
2284          */
2285         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2286         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2287
2288         /* Request should now be empty as we've also waited
2289          * for the last request in the list
2290          */
2291         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2292
2293         /* Move all buffers out of the GTT. */
2294         ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
2295         if (ret) {
2296                 mutex_unlock(&dev->struct_mutex);
2297                 return ret;
2298         }
2299
2300         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2301         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2302         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2303         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2304
2305         i915_gem_cleanup_ringbuffer(dev);
2306         mutex_unlock(&dev->struct_mutex);
2307
2308         return 0;
2309 }
2310
2311 static int
2312 i915_gem_init_hws(struct drm_device *dev)
2313 {
2314         drm_i915_private_t *dev_priv = dev->dev_private;
2315         struct drm_gem_object *obj;
2316         struct drm_i915_gem_object *obj_priv;
2317         int ret;
2318
2319         /* If we need a physical address for the status page, it's already
2320          * initialized at driver load time.
2321          */
2322         if (!I915_NEED_GFX_HWS(dev))
2323                 return 0;
2324
2325         obj = drm_gem_object_alloc(dev, 4096);
2326         if (obj == NULL) {
2327                 DRM_ERROR("Failed to allocate status page\n");
2328                 return -ENOMEM;
2329         }
2330         obj_priv = obj->driver_private;
2331         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
2332
2333         ret = i915_gem_object_pin(obj, 4096);
2334         if (ret != 0) {
2335                 drm_gem_object_unreference(obj);
2336                 return ret;
2337         }
2338
2339         dev_priv->status_gfx_addr = obj_priv->gtt_offset;
2340
2341         dev_priv->hw_status_page = kmap(obj_priv->page_list[0]);
2342         if (dev_priv->hw_status_page == NULL) {
2343                 DRM_ERROR("Failed to map status page.\n");
2344                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2345                 drm_gem_object_unreference(obj);
2346                 return -EINVAL;
2347         }
2348         dev_priv->hws_obj = obj;
2349         memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
2350         I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
2351         I915_READ(HWS_PGA); /* posting read */
2352         DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
2353
2354         return 0;
2355 }
2356
2357 static int
2358 i915_gem_init_ringbuffer(struct drm_device *dev)
2359 {
2360         drm_i915_private_t *dev_priv = dev->dev_private;
2361         struct drm_gem_object *obj;
2362         struct drm_i915_gem_object *obj_priv;
2363         int ret;
2364         u32 head;
2365
2366         ret = i915_gem_init_hws(dev);
2367         if (ret != 0)
2368                 return ret;
2369
2370         obj = drm_gem_object_alloc(dev, 128 * 1024);
2371         if (obj == NULL) {
2372                 DRM_ERROR("Failed to allocate ringbuffer\n");
2373                 return -ENOMEM;
2374         }
2375         obj_priv = obj->driver_private;
2376
2377         ret = i915_gem_object_pin(obj, 4096);
2378         if (ret != 0) {
2379                 drm_gem_object_unreference(obj);
2380                 return ret;
2381         }
2382
2383         /* Set up the kernel mapping for the ring. */
2384         dev_priv->ring.Size = obj->size;
2385         dev_priv->ring.tail_mask = obj->size - 1;
2386
2387         dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset;
2388         dev_priv->ring.map.size = obj->size;
2389         dev_priv->ring.map.type = 0;
2390         dev_priv->ring.map.flags = 0;
2391         dev_priv->ring.map.mtrr = 0;
2392
2393         drm_core_ioremap_wc(&dev_priv->ring.map, dev);
2394         if (dev_priv->ring.map.handle == NULL) {
2395                 DRM_ERROR("Failed to map ringbuffer.\n");
2396                 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2397                 drm_gem_object_unreference(obj);
2398                 return -EINVAL;
2399         }
2400         dev_priv->ring.ring_obj = obj;
2401         dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
2402
2403         /* Stop the ring if it's running. */
2404         I915_WRITE(PRB0_CTL, 0);
2405         I915_WRITE(PRB0_TAIL, 0);
2406         I915_WRITE(PRB0_HEAD, 0);
2407
2408         /* Initialize the ring. */
2409         I915_WRITE(PRB0_START, obj_priv->gtt_offset);
2410         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2411
2412         /* G45 ring initialization fails to reset head to zero */
2413         if (head != 0) {
2414                 DRM_ERROR("Ring head not reset to zero "
2415                           "ctl %08x head %08x tail %08x start %08x\n",
2416                           I915_READ(PRB0_CTL),
2417                           I915_READ(PRB0_HEAD),
2418                           I915_READ(PRB0_TAIL),
2419                           I915_READ(PRB0_START));
2420                 I915_WRITE(PRB0_HEAD, 0);
2421
2422                 DRM_ERROR("Ring head forced to zero "
2423                           "ctl %08x head %08x tail %08x start %08x\n",
2424                           I915_READ(PRB0_CTL),
2425                           I915_READ(PRB0_HEAD),
2426                           I915_READ(PRB0_TAIL),
2427                           I915_READ(PRB0_START));
2428         }
2429
2430         I915_WRITE(PRB0_CTL,
2431                    ((obj->size - 4096) & RING_NR_PAGES) |
2432                    RING_NO_REPORT |
2433                    RING_VALID);
2434
2435         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2436
2437         /* If the head is still not zero, the ring is dead */
2438         if (head != 0) {
2439                 DRM_ERROR("Ring initialization failed "
2440                           "ctl %08x head %08x tail %08x start %08x\n",
2441                           I915_READ(PRB0_CTL),
2442                           I915_READ(PRB0_HEAD),
2443                           I915_READ(PRB0_TAIL),
2444                           I915_READ(PRB0_START));
2445                 return -EIO;
2446         }
2447
2448         /* Update our cache of the ring state */
2449         i915_kernel_lost_context(dev);
2450
2451         return 0;
2452 }
2453
2454 static void
2455 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
2456 {
2457         drm_i915_private_t *dev_priv = dev->dev_private;
2458
2459         if (dev_priv->ring.ring_obj == NULL)
2460                 return;
2461
2462         drm_core_ioremapfree(&dev_priv->ring.map, dev);
2463
2464         i915_gem_object_unpin(dev_priv->ring.ring_obj);
2465         drm_gem_object_unreference(dev_priv->ring.ring_obj);
2466         dev_priv->ring.ring_obj = NULL;
2467         memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2468
2469         if (dev_priv->hws_obj != NULL) {
2470                 struct drm_gem_object *obj = dev_priv->hws_obj;
2471                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2472
2473                 kunmap(obj_priv->page_list[0]);
2474                 i915_gem_object_unpin(obj);
2475                 drm_gem_object_unreference(obj);
2476                 dev_priv->hws_obj = NULL;
2477                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2478                 dev_priv->hw_status_page = NULL;
2479
2480                 /* Write high address into HWS_PGA when disabling. */
2481                 I915_WRITE(HWS_PGA, 0x1ffff000);
2482         }
2483 }
2484
2485 int
2486 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
2487                        struct drm_file *file_priv)
2488 {
2489         drm_i915_private_t *dev_priv = dev->dev_private;
2490         int ret;
2491
2492         if (dev_priv->mm.wedged) {
2493                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
2494                 dev_priv->mm.wedged = 0;
2495         }
2496
2497         ret = i915_gem_init_ringbuffer(dev);
2498         if (ret != 0)
2499                 return ret;
2500
2501         dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base,
2502                                                         dev->agp->agp_info.aper_size
2503                                                         * 1024 * 1024);
2504
2505         mutex_lock(&dev->struct_mutex);
2506         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2507         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2508         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2509         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2510         dev_priv->mm.suspended = 0;
2511         mutex_unlock(&dev->struct_mutex);
2512
2513         drm_irq_install(dev);
2514
2515         return 0;
2516 }
2517
2518 int
2519 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
2520                        struct drm_file *file_priv)
2521 {
2522         drm_i915_private_t *dev_priv = dev->dev_private;
2523         int ret;
2524
2525         ret = i915_gem_idle(dev);
2526         drm_irq_uninstall(dev);
2527
2528         io_mapping_free(dev_priv->mm.gtt_mapping);
2529         return ret;
2530 }
2531
2532 void
2533 i915_gem_lastclose(struct drm_device *dev)
2534 {
2535         int ret;
2536
2537         ret = i915_gem_idle(dev);
2538         if (ret)
2539                 DRM_ERROR("failed to idle hardware: %d\n", ret);
2540 }
2541
2542 void
2543 i915_gem_load(struct drm_device *dev)
2544 {
2545         drm_i915_private_t *dev_priv = dev->dev_private;
2546
2547         INIT_LIST_HEAD(&dev_priv->mm.active_list);
2548         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
2549         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
2550         INIT_LIST_HEAD(&dev_priv->mm.request_list);
2551         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
2552                           i915_gem_retire_work_handler);
2553         dev_priv->mm.next_gem_seqno = 1;
2554
2555         i915_gem_detect_bit_6_swizzle(dev);
2556 }