]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/gpu/drm/i915/i915_gem.c
i915: Map status page cached for chips with GTT-based HWS location.
[linux-2.6-omap-h63xx.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include <linux/swap.h>
33
34 static int
35 i915_gem_object_set_domain(struct drm_gem_object *obj,
36                             uint32_t read_domains,
37                             uint32_t write_domain);
38 static int
39 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
40                                  uint64_t offset,
41                                  uint64_t size,
42                                  uint32_t read_domains,
43                                  uint32_t write_domain);
44 static int
45 i915_gem_set_domain(struct drm_gem_object *obj,
46                     struct drm_file *file_priv,
47                     uint32_t read_domains,
48                     uint32_t write_domain);
49 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
50 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
52
53 int
54 i915_gem_init_ioctl(struct drm_device *dev, void *data,
55                     struct drm_file *file_priv)
56 {
57         drm_i915_private_t *dev_priv = dev->dev_private;
58         struct drm_i915_gem_init *args = data;
59
60         mutex_lock(&dev->struct_mutex);
61
62         if (args->gtt_start >= args->gtt_end ||
63             (args->gtt_start & (PAGE_SIZE - 1)) != 0 ||
64             (args->gtt_end & (PAGE_SIZE - 1)) != 0) {
65                 mutex_unlock(&dev->struct_mutex);
66                 return -EINVAL;
67         }
68
69         drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start,
70             args->gtt_end - args->gtt_start);
71
72         dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start);
73
74         mutex_unlock(&dev->struct_mutex);
75
76         return 0;
77 }
78
79
80 /**
81  * Creates a new mm object and returns a handle to it.
82  */
83 int
84 i915_gem_create_ioctl(struct drm_device *dev, void *data,
85                       struct drm_file *file_priv)
86 {
87         struct drm_i915_gem_create *args = data;
88         struct drm_gem_object *obj;
89         int handle, ret;
90
91         args->size = roundup(args->size, PAGE_SIZE);
92
93         /* Allocate the new object */
94         obj = drm_gem_object_alloc(dev, args->size);
95         if (obj == NULL)
96                 return -ENOMEM;
97
98         ret = drm_gem_handle_create(file_priv, obj, &handle);
99         mutex_lock(&dev->struct_mutex);
100         drm_gem_object_handle_unreference(obj);
101         mutex_unlock(&dev->struct_mutex);
102
103         if (ret)
104                 return ret;
105
106         args->handle = handle;
107
108         return 0;
109 }
110
111 /**
112  * Reads data from the object referenced by handle.
113  *
114  * On error, the contents of *data are undefined.
115  */
116 int
117 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
118                      struct drm_file *file_priv)
119 {
120         struct drm_i915_gem_pread *args = data;
121         struct drm_gem_object *obj;
122         struct drm_i915_gem_object *obj_priv;
123         ssize_t read;
124         loff_t offset;
125         int ret;
126
127         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
128         if (obj == NULL)
129                 return -EBADF;
130         obj_priv = obj->driver_private;
131
132         /* Bounds check source.
133          *
134          * XXX: This could use review for overflow issues...
135          */
136         if (args->offset > obj->size || args->size > obj->size ||
137             args->offset + args->size > obj->size) {
138                 drm_gem_object_unreference(obj);
139                 return -EINVAL;
140         }
141
142         mutex_lock(&dev->struct_mutex);
143
144         ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
145                                                I915_GEM_DOMAIN_CPU, 0);
146         if (ret != 0) {
147                 drm_gem_object_unreference(obj);
148                 mutex_unlock(&dev->struct_mutex);
149                 return ret;
150         }
151
152         offset = args->offset;
153
154         read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr,
155                         args->size, &offset);
156         if (read != args->size) {
157                 drm_gem_object_unreference(obj);
158                 mutex_unlock(&dev->struct_mutex);
159                 if (read < 0)
160                         return read;
161                 else
162                         return -EINVAL;
163         }
164
165         drm_gem_object_unreference(obj);
166         mutex_unlock(&dev->struct_mutex);
167
168         return 0;
169 }
170
171 static int
172 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
173                     struct drm_i915_gem_pwrite *args,
174                     struct drm_file *file_priv)
175 {
176         struct drm_i915_gem_object *obj_priv = obj->driver_private;
177         ssize_t remain;
178         loff_t offset;
179         char __user *user_data;
180         char __iomem *vaddr;
181         char *vaddr_atomic;
182         int i, o, l;
183         int ret = 0;
184         unsigned long pfn;
185         unsigned long unwritten;
186
187         user_data = (char __user *) (uintptr_t) args->data_ptr;
188         remain = args->size;
189         if (!access_ok(VERIFY_READ, user_data, remain))
190                 return -EFAULT;
191
192
193         mutex_lock(&dev->struct_mutex);
194         ret = i915_gem_object_pin(obj, 0);
195         if (ret) {
196                 mutex_unlock(&dev->struct_mutex);
197                 return ret;
198         }
199         ret = i915_gem_set_domain(obj, file_priv,
200                                   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
201         if (ret)
202                 goto fail;
203
204         obj_priv = obj->driver_private;
205         offset = obj_priv->gtt_offset + args->offset;
206         obj_priv->dirty = 1;
207
208         while (remain > 0) {
209                 /* Operation in this page
210                  *
211                  * i = page number
212                  * o = offset within page
213                  * l = bytes to copy
214                  */
215                 i = offset >> PAGE_SHIFT;
216                 o = offset & (PAGE_SIZE-1);
217                 l = remain;
218                 if ((o + l) > PAGE_SIZE)
219                         l = PAGE_SIZE - o;
220
221                 pfn = (dev->agp->base >> PAGE_SHIFT) + i;
222
223 #ifdef CONFIG_HIGHMEM
224                 /* This is a workaround for the low performance of iounmap
225                  * (approximate 10% cpu cost on normal 3D workloads).
226                  * kmap_atomic on HIGHMEM kernels happens to let us map card
227                  * memory without taking IPIs.  When the vmap rework lands
228                  * we should be able to dump this hack.
229                  */
230                 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
231 #if WATCH_PWRITE
232                 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
233                          i, o, l, pfn, vaddr_atomic);
234 #endif
235                 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
236                                                               user_data, l);
237                 kunmap_atomic(vaddr_atomic, KM_USER0);
238
239                 if (unwritten)
240 #endif /* CONFIG_HIGHMEM */
241                 {
242                         vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
243 #if WATCH_PWRITE
244                         DRM_INFO("pwrite slow i %d o %d l %d "
245                                  "pfn %ld vaddr %p\n",
246                                  i, o, l, pfn, vaddr);
247 #endif
248                         if (vaddr == NULL) {
249                                 ret = -EFAULT;
250                                 goto fail;
251                         }
252                         unwritten = __copy_from_user(vaddr + o, user_data, l);
253 #if WATCH_PWRITE
254                         DRM_INFO("unwritten %ld\n", unwritten);
255 #endif
256                         iounmap(vaddr);
257                         if (unwritten) {
258                                 ret = -EFAULT;
259                                 goto fail;
260                         }
261                 }
262
263                 remain -= l;
264                 user_data += l;
265                 offset += l;
266         }
267 #if WATCH_PWRITE && 1
268         i915_gem_clflush_object(obj);
269         i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
270         i915_gem_clflush_object(obj);
271 #endif
272
273 fail:
274         i915_gem_object_unpin(obj);
275         mutex_unlock(&dev->struct_mutex);
276
277         return ret;
278 }
279
280 static int
281 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
282                       struct drm_i915_gem_pwrite *args,
283                       struct drm_file *file_priv)
284 {
285         int ret;
286         loff_t offset;
287         ssize_t written;
288
289         mutex_lock(&dev->struct_mutex);
290
291         ret = i915_gem_set_domain(obj, file_priv,
292                                   I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
293         if (ret) {
294                 mutex_unlock(&dev->struct_mutex);
295                 return ret;
296         }
297
298         offset = args->offset;
299
300         written = vfs_write(obj->filp,
301                             (char __user *)(uintptr_t) args->data_ptr,
302                             args->size, &offset);
303         if (written != args->size) {
304                 mutex_unlock(&dev->struct_mutex);
305                 if (written < 0)
306                         return written;
307                 else
308                         return -EINVAL;
309         }
310
311         mutex_unlock(&dev->struct_mutex);
312
313         return 0;
314 }
315
316 /**
317  * Writes data to the object referenced by handle.
318  *
319  * On error, the contents of the buffer that were to be modified are undefined.
320  */
321 int
322 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
323                       struct drm_file *file_priv)
324 {
325         struct drm_i915_gem_pwrite *args = data;
326         struct drm_gem_object *obj;
327         struct drm_i915_gem_object *obj_priv;
328         int ret = 0;
329
330         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
331         if (obj == NULL)
332                 return -EBADF;
333         obj_priv = obj->driver_private;
334
335         /* Bounds check destination.
336          *
337          * XXX: This could use review for overflow issues...
338          */
339         if (args->offset > obj->size || args->size > obj->size ||
340             args->offset + args->size > obj->size) {
341                 drm_gem_object_unreference(obj);
342                 return -EINVAL;
343         }
344
345         /* We can only do the GTT pwrite on untiled buffers, as otherwise
346          * it would end up going through the fenced access, and we'll get
347          * different detiling behavior between reading and writing.
348          * pread/pwrite currently are reading and writing from the CPU
349          * perspective, requiring manual detiling by the client.
350          */
351         if (obj_priv->tiling_mode == I915_TILING_NONE &&
352             dev->gtt_total != 0)
353                 ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv);
354         else
355                 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv);
356
357 #if WATCH_PWRITE
358         if (ret)
359                 DRM_INFO("pwrite failed %d\n", ret);
360 #endif
361
362         drm_gem_object_unreference(obj);
363
364         return ret;
365 }
366
367 /**
368  * Called when user space prepares to use an object
369  */
370 int
371 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
372                           struct drm_file *file_priv)
373 {
374         struct drm_i915_gem_set_domain *args = data;
375         struct drm_gem_object *obj;
376         int ret;
377
378         if (!(dev->driver->driver_features & DRIVER_GEM))
379                 return -ENODEV;
380
381         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
382         if (obj == NULL)
383                 return -EBADF;
384
385         mutex_lock(&dev->struct_mutex);
386 #if WATCH_BUF
387         DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n",
388                  obj, obj->size, args->read_domains, args->write_domain);
389 #endif
390         ret = i915_gem_set_domain(obj, file_priv,
391                                   args->read_domains, args->write_domain);
392         drm_gem_object_unreference(obj);
393         mutex_unlock(&dev->struct_mutex);
394         return ret;
395 }
396
397 /**
398  * Called when user space has done writes to this buffer
399  */
400 int
401 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
402                       struct drm_file *file_priv)
403 {
404         struct drm_i915_gem_sw_finish *args = data;
405         struct drm_gem_object *obj;
406         struct drm_i915_gem_object *obj_priv;
407         int ret = 0;
408
409         if (!(dev->driver->driver_features & DRIVER_GEM))
410                 return -ENODEV;
411
412         mutex_lock(&dev->struct_mutex);
413         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
414         if (obj == NULL) {
415                 mutex_unlock(&dev->struct_mutex);
416                 return -EBADF;
417         }
418
419 #if WATCH_BUF
420         DRM_INFO("%s: sw_finish %d (%p %d)\n",
421                  __func__, args->handle, obj, obj->size);
422 #endif
423         obj_priv = obj->driver_private;
424
425         /* Pinned buffers may be scanout, so flush the cache */
426         if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) {
427                 i915_gem_clflush_object(obj);
428                 drm_agp_chipset_flush(dev);
429         }
430         drm_gem_object_unreference(obj);
431         mutex_unlock(&dev->struct_mutex);
432         return ret;
433 }
434
435 /**
436  * Maps the contents of an object, returning the address it is mapped
437  * into.
438  *
439  * While the mapping holds a reference on the contents of the object, it doesn't
440  * imply a ref on the object itself.
441  */
442 int
443 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
444                    struct drm_file *file_priv)
445 {
446         struct drm_i915_gem_mmap *args = data;
447         struct drm_gem_object *obj;
448         loff_t offset;
449         unsigned long addr;
450
451         if (!(dev->driver->driver_features & DRIVER_GEM))
452                 return -ENODEV;
453
454         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
455         if (obj == NULL)
456                 return -EBADF;
457
458         offset = args->offset;
459
460         down_write(&current->mm->mmap_sem);
461         addr = do_mmap(obj->filp, 0, args->size,
462                        PROT_READ | PROT_WRITE, MAP_SHARED,
463                        args->offset);
464         up_write(&current->mm->mmap_sem);
465         mutex_lock(&dev->struct_mutex);
466         drm_gem_object_unreference(obj);
467         mutex_unlock(&dev->struct_mutex);
468         if (IS_ERR((void *)addr))
469                 return addr;
470
471         args->addr_ptr = (uint64_t) addr;
472
473         return 0;
474 }
475
476 static void
477 i915_gem_object_free_page_list(struct drm_gem_object *obj)
478 {
479         struct drm_i915_gem_object *obj_priv = obj->driver_private;
480         int page_count = obj->size / PAGE_SIZE;
481         int i;
482
483         if (obj_priv->page_list == NULL)
484                 return;
485
486
487         for (i = 0; i < page_count; i++)
488                 if (obj_priv->page_list[i] != NULL) {
489                         if (obj_priv->dirty)
490                                 set_page_dirty(obj_priv->page_list[i]);
491                         mark_page_accessed(obj_priv->page_list[i]);
492                         page_cache_release(obj_priv->page_list[i]);
493                 }
494         obj_priv->dirty = 0;
495
496         drm_free(obj_priv->page_list,
497                  page_count * sizeof(struct page *),
498                  DRM_MEM_DRIVER);
499         obj_priv->page_list = NULL;
500 }
501
502 static void
503 i915_gem_object_move_to_active(struct drm_gem_object *obj)
504 {
505         struct drm_device *dev = obj->dev;
506         drm_i915_private_t *dev_priv = dev->dev_private;
507         struct drm_i915_gem_object *obj_priv = obj->driver_private;
508
509         /* Add a reference if we're newly entering the active list. */
510         if (!obj_priv->active) {
511                 drm_gem_object_reference(obj);
512                 obj_priv->active = 1;
513         }
514         /* Move from whatever list we were on to the tail of execution. */
515         list_move_tail(&obj_priv->list,
516                        &dev_priv->mm.active_list);
517 }
518
519
520 static void
521 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
522 {
523         struct drm_device *dev = obj->dev;
524         drm_i915_private_t *dev_priv = dev->dev_private;
525         struct drm_i915_gem_object *obj_priv = obj->driver_private;
526
527         i915_verify_inactive(dev, __FILE__, __LINE__);
528         if (obj_priv->pin_count != 0)
529                 list_del_init(&obj_priv->list);
530         else
531                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
532
533         if (obj_priv->active) {
534                 obj_priv->active = 0;
535                 drm_gem_object_unreference(obj);
536         }
537         i915_verify_inactive(dev, __FILE__, __LINE__);
538 }
539
540 /**
541  * Creates a new sequence number, emitting a write of it to the status page
542  * plus an interrupt, which will trigger i915_user_interrupt_handler.
543  *
544  * Must be called with struct_lock held.
545  *
546  * Returned sequence numbers are nonzero on success.
547  */
548 static uint32_t
549 i915_add_request(struct drm_device *dev, uint32_t flush_domains)
550 {
551         drm_i915_private_t *dev_priv = dev->dev_private;
552         struct drm_i915_gem_request *request;
553         uint32_t seqno;
554         int was_empty;
555         RING_LOCALS;
556
557         request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
558         if (request == NULL)
559                 return 0;
560
561         /* Grab the seqno we're going to make this request be, and bump the
562          * next (skipping 0 so it can be the reserved no-seqno value).
563          */
564         seqno = dev_priv->mm.next_gem_seqno;
565         dev_priv->mm.next_gem_seqno++;
566         if (dev_priv->mm.next_gem_seqno == 0)
567                 dev_priv->mm.next_gem_seqno++;
568
569         BEGIN_LP_RING(4);
570         OUT_RING(MI_STORE_DWORD_INDEX);
571         OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
572         OUT_RING(seqno);
573
574         OUT_RING(MI_USER_INTERRUPT);
575         ADVANCE_LP_RING();
576
577         DRM_DEBUG("%d\n", seqno);
578
579         request->seqno = seqno;
580         request->emitted_jiffies = jiffies;
581         request->flush_domains = flush_domains;
582         was_empty = list_empty(&dev_priv->mm.request_list);
583         list_add_tail(&request->list, &dev_priv->mm.request_list);
584
585         if (was_empty)
586                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
587         return seqno;
588 }
589
590 /**
591  * Command execution barrier
592  *
593  * Ensures that all commands in the ring are finished
594  * before signalling the CPU
595  */
596 static uint32_t
597 i915_retire_commands(struct drm_device *dev)
598 {
599         drm_i915_private_t *dev_priv = dev->dev_private;
600         uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
601         uint32_t flush_domains = 0;
602         RING_LOCALS;
603
604         /* The sampler always gets flushed on i965 (sigh) */
605         if (IS_I965G(dev))
606                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
607         BEGIN_LP_RING(2);
608         OUT_RING(cmd);
609         OUT_RING(0); /* noop */
610         ADVANCE_LP_RING();
611         return flush_domains;
612 }
613
614 /**
615  * Moves buffers associated only with the given active seqno from the active
616  * to inactive list, potentially freeing them.
617  */
618 static void
619 i915_gem_retire_request(struct drm_device *dev,
620                         struct drm_i915_gem_request *request)
621 {
622         drm_i915_private_t *dev_priv = dev->dev_private;
623
624         /* Move any buffers on the active list that are no longer referenced
625          * by the ringbuffer to the flushing/inactive lists as appropriate.
626          */
627         while (!list_empty(&dev_priv->mm.active_list)) {
628                 struct drm_gem_object *obj;
629                 struct drm_i915_gem_object *obj_priv;
630
631                 obj_priv = list_first_entry(&dev_priv->mm.active_list,
632                                             struct drm_i915_gem_object,
633                                             list);
634                 obj = obj_priv->obj;
635
636                 /* If the seqno being retired doesn't match the oldest in the
637                  * list, then the oldest in the list must still be newer than
638                  * this seqno.
639                  */
640                 if (obj_priv->last_rendering_seqno != request->seqno)
641                         return;
642 #if WATCH_LRU
643                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
644                          __func__, request->seqno, obj);
645 #endif
646
647                 if (obj->write_domain != 0) {
648                         list_move_tail(&obj_priv->list,
649                                        &dev_priv->mm.flushing_list);
650                 } else {
651                         i915_gem_object_move_to_inactive(obj);
652                 }
653         }
654
655         if (request->flush_domains != 0) {
656                 struct drm_i915_gem_object *obj_priv, *next;
657
658                 /* Clear the write domain and activity from any buffers
659                  * that are just waiting for a flush matching the one retired.
660                  */
661                 list_for_each_entry_safe(obj_priv, next,
662                                          &dev_priv->mm.flushing_list, list) {
663                         struct drm_gem_object *obj = obj_priv->obj;
664
665                         if (obj->write_domain & request->flush_domains) {
666                                 obj->write_domain = 0;
667                                 i915_gem_object_move_to_inactive(obj);
668                         }
669                 }
670
671         }
672 }
673
674 /**
675  * Returns true if seq1 is later than seq2.
676  */
677 static int
678 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
679 {
680         return (int32_t)(seq1 - seq2) >= 0;
681 }
682
683 uint32_t
684 i915_get_gem_seqno(struct drm_device *dev)
685 {
686         drm_i915_private_t *dev_priv = dev->dev_private;
687
688         return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
689 }
690
691 /**
692  * This function clears the request list as sequence numbers are passed.
693  */
694 void
695 i915_gem_retire_requests(struct drm_device *dev)
696 {
697         drm_i915_private_t *dev_priv = dev->dev_private;
698         uint32_t seqno;
699
700         seqno = i915_get_gem_seqno(dev);
701
702         while (!list_empty(&dev_priv->mm.request_list)) {
703                 struct drm_i915_gem_request *request;
704                 uint32_t retiring_seqno;
705
706                 request = list_first_entry(&dev_priv->mm.request_list,
707                                            struct drm_i915_gem_request,
708                                            list);
709                 retiring_seqno = request->seqno;
710
711                 if (i915_seqno_passed(seqno, retiring_seqno) ||
712                     dev_priv->mm.wedged) {
713                         i915_gem_retire_request(dev, request);
714
715                         list_del(&request->list);
716                         drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
717                 } else
718                         break;
719         }
720 }
721
722 void
723 i915_gem_retire_work_handler(struct work_struct *work)
724 {
725         drm_i915_private_t *dev_priv;
726         struct drm_device *dev;
727
728         dev_priv = container_of(work, drm_i915_private_t,
729                                 mm.retire_work.work);
730         dev = dev_priv->dev;
731
732         mutex_lock(&dev->struct_mutex);
733         i915_gem_retire_requests(dev);
734         if (!list_empty(&dev_priv->mm.request_list))
735                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
736         mutex_unlock(&dev->struct_mutex);
737 }
738
739 /**
740  * Waits for a sequence number to be signaled, and cleans up the
741  * request and object lists appropriately for that event.
742  */
743 static int
744 i915_wait_request(struct drm_device *dev, uint32_t seqno)
745 {
746         drm_i915_private_t *dev_priv = dev->dev_private;
747         int ret = 0;
748
749         BUG_ON(seqno == 0);
750
751         if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
752                 dev_priv->mm.waiting_gem_seqno = seqno;
753                 i915_user_irq_get(dev);
754                 ret = wait_event_interruptible(dev_priv->irq_queue,
755                                                i915_seqno_passed(i915_get_gem_seqno(dev),
756                                                                  seqno) ||
757                                                dev_priv->mm.wedged);
758                 i915_user_irq_put(dev);
759                 dev_priv->mm.waiting_gem_seqno = 0;
760         }
761         if (dev_priv->mm.wedged)
762                 ret = -EIO;
763
764         if (ret && ret != -ERESTARTSYS)
765                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
766                           __func__, ret, seqno, i915_get_gem_seqno(dev));
767
768         /* Directly dispatch request retiring.  While we have the work queue
769          * to handle this, the waiter on a request often wants an associated
770          * buffer to have made it to the inactive list, and we would need
771          * a separate wait queue to handle that.
772          */
773         if (ret == 0)
774                 i915_gem_retire_requests(dev);
775
776         return ret;
777 }
778
779 static void
780 i915_gem_flush(struct drm_device *dev,
781                uint32_t invalidate_domains,
782                uint32_t flush_domains)
783 {
784         drm_i915_private_t *dev_priv = dev->dev_private;
785         uint32_t cmd;
786         RING_LOCALS;
787
788 #if WATCH_EXEC
789         DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
790                   invalidate_domains, flush_domains);
791 #endif
792
793         if (flush_domains & I915_GEM_DOMAIN_CPU)
794                 drm_agp_chipset_flush(dev);
795
796         if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
797                                                      I915_GEM_DOMAIN_GTT)) {
798                 /*
799                  * read/write caches:
800                  *
801                  * I915_GEM_DOMAIN_RENDER is always invalidated, but is
802                  * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
803                  * also flushed at 2d versus 3d pipeline switches.
804                  *
805                  * read-only caches:
806                  *
807                  * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
808                  * MI_READ_FLUSH is set, and is always flushed on 965.
809                  *
810                  * I915_GEM_DOMAIN_COMMAND may not exist?
811                  *
812                  * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
813                  * invalidated when MI_EXE_FLUSH is set.
814                  *
815                  * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
816                  * invalidated with every MI_FLUSH.
817                  *
818                  * TLBs:
819                  *
820                  * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
821                  * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
822                  * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
823                  * are flushed at any MI_FLUSH.
824                  */
825
826                 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
827                 if ((invalidate_domains|flush_domains) &
828                     I915_GEM_DOMAIN_RENDER)
829                         cmd &= ~MI_NO_WRITE_FLUSH;
830                 if (!IS_I965G(dev)) {
831                         /*
832                          * On the 965, the sampler cache always gets flushed
833                          * and this bit is reserved.
834                          */
835                         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
836                                 cmd |= MI_READ_FLUSH;
837                 }
838                 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
839                         cmd |= MI_EXE_FLUSH;
840
841 #if WATCH_EXEC
842                 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
843 #endif
844                 BEGIN_LP_RING(2);
845                 OUT_RING(cmd);
846                 OUT_RING(0); /* noop */
847                 ADVANCE_LP_RING();
848         }
849 }
850
851 /**
852  * Ensures that all rendering to the object has completed and the object is
853  * safe to unbind from the GTT or access from the CPU.
854  */
855 static int
856 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
857 {
858         struct drm_device *dev = obj->dev;
859         struct drm_i915_gem_object *obj_priv = obj->driver_private;
860         int ret;
861
862         /* If there are writes queued to the buffer, flush and
863          * create a new seqno to wait for.
864          */
865         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
866                 uint32_t write_domain = obj->write_domain;
867 #if WATCH_BUF
868                 DRM_INFO("%s: flushing object %p from write domain %08x\n",
869                           __func__, obj, write_domain);
870 #endif
871                 i915_gem_flush(dev, 0, write_domain);
872
873                 i915_gem_object_move_to_active(obj);
874                 obj_priv->last_rendering_seqno = i915_add_request(dev,
875                                                                   write_domain);
876                 BUG_ON(obj_priv->last_rendering_seqno == 0);
877 #if WATCH_LRU
878                 DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj);
879 #endif
880         }
881
882         /* If there is rendering queued on the buffer being evicted, wait for
883          * it.
884          */
885         if (obj_priv->active) {
886 #if WATCH_BUF
887                 DRM_INFO("%s: object %p wait for seqno %08x\n",
888                           __func__, obj, obj_priv->last_rendering_seqno);
889 #endif
890                 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
891                 if (ret != 0)
892                         return ret;
893         }
894
895         return 0;
896 }
897
898 /**
899  * Unbinds an object from the GTT aperture.
900  */
901 static int
902 i915_gem_object_unbind(struct drm_gem_object *obj)
903 {
904         struct drm_device *dev = obj->dev;
905         struct drm_i915_gem_object *obj_priv = obj->driver_private;
906         int ret = 0;
907
908 #if WATCH_BUF
909         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
910         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
911 #endif
912         if (obj_priv->gtt_space == NULL)
913                 return 0;
914
915         if (obj_priv->pin_count != 0) {
916                 DRM_ERROR("Attempting to unbind pinned buffer\n");
917                 return -EINVAL;
918         }
919
920         /* Wait for any rendering to complete
921          */
922         ret = i915_gem_object_wait_rendering(obj);
923         if (ret) {
924                 DRM_ERROR("wait_rendering failed: %d\n", ret);
925                 return ret;
926         }
927
928         /* Move the object to the CPU domain to ensure that
929          * any possible CPU writes while it's not in the GTT
930          * are flushed when we go to remap it. This will
931          * also ensure that all pending GPU writes are finished
932          * before we unbind.
933          */
934         ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU,
935                                          I915_GEM_DOMAIN_CPU);
936         if (ret) {
937                 DRM_ERROR("set_domain failed: %d\n", ret);
938                 return ret;
939         }
940
941         if (obj_priv->agp_mem != NULL) {
942                 drm_unbind_agp(obj_priv->agp_mem);
943                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
944                 obj_priv->agp_mem = NULL;
945         }
946
947         BUG_ON(obj_priv->active);
948
949         i915_gem_object_free_page_list(obj);
950
951         if (obj_priv->gtt_space) {
952                 atomic_dec(&dev->gtt_count);
953                 atomic_sub(obj->size, &dev->gtt_memory);
954
955                 drm_mm_put_block(obj_priv->gtt_space);
956                 obj_priv->gtt_space = NULL;
957         }
958
959         /* Remove ourselves from the LRU list if present. */
960         if (!list_empty(&obj_priv->list))
961                 list_del_init(&obj_priv->list);
962
963         return 0;
964 }
965
966 static int
967 i915_gem_evict_something(struct drm_device *dev)
968 {
969         drm_i915_private_t *dev_priv = dev->dev_private;
970         struct drm_gem_object *obj;
971         struct drm_i915_gem_object *obj_priv;
972         int ret = 0;
973
974         for (;;) {
975                 /* If there's an inactive buffer available now, grab it
976                  * and be done.
977                  */
978                 if (!list_empty(&dev_priv->mm.inactive_list)) {
979                         obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
980                                                     struct drm_i915_gem_object,
981                                                     list);
982                         obj = obj_priv->obj;
983                         BUG_ON(obj_priv->pin_count != 0);
984 #if WATCH_LRU
985                         DRM_INFO("%s: evicting %p\n", __func__, obj);
986 #endif
987                         BUG_ON(obj_priv->active);
988
989                         /* Wait on the rendering and unbind the buffer. */
990                         ret = i915_gem_object_unbind(obj);
991                         break;
992                 }
993
994                 /* If we didn't get anything, but the ring is still processing
995                  * things, wait for one of those things to finish and hopefully
996                  * leave us a buffer to evict.
997                  */
998                 if (!list_empty(&dev_priv->mm.request_list)) {
999                         struct drm_i915_gem_request *request;
1000
1001                         request = list_first_entry(&dev_priv->mm.request_list,
1002                                                    struct drm_i915_gem_request,
1003                                                    list);
1004
1005                         ret = i915_wait_request(dev, request->seqno);
1006                         if (ret)
1007                                 break;
1008
1009                         /* if waiting caused an object to become inactive,
1010                          * then loop around and wait for it. Otherwise, we
1011                          * assume that waiting freed and unbound something,
1012                          * so there should now be some space in the GTT
1013                          */
1014                         if (!list_empty(&dev_priv->mm.inactive_list))
1015                                 continue;
1016                         break;
1017                 }
1018
1019                 /* If we didn't have anything on the request list but there
1020                  * are buffers awaiting a flush, emit one and try again.
1021                  * When we wait on it, those buffers waiting for that flush
1022                  * will get moved to inactive.
1023                  */
1024                 if (!list_empty(&dev_priv->mm.flushing_list)) {
1025                         obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1026                                                     struct drm_i915_gem_object,
1027                                                     list);
1028                         obj = obj_priv->obj;
1029
1030                         i915_gem_flush(dev,
1031                                        obj->write_domain,
1032                                        obj->write_domain);
1033                         i915_add_request(dev, obj->write_domain);
1034
1035                         obj = NULL;
1036                         continue;
1037                 }
1038
1039                 DRM_ERROR("inactive empty %d request empty %d "
1040                           "flushing empty %d\n",
1041                           list_empty(&dev_priv->mm.inactive_list),
1042                           list_empty(&dev_priv->mm.request_list),
1043                           list_empty(&dev_priv->mm.flushing_list));
1044                 /* If we didn't do any of the above, there's nothing to be done
1045                  * and we just can't fit it in.
1046                  */
1047                 return -ENOMEM;
1048         }
1049         return ret;
1050 }
1051
1052 static int
1053 i915_gem_object_get_page_list(struct drm_gem_object *obj)
1054 {
1055         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1056         int page_count, i;
1057         struct address_space *mapping;
1058         struct inode *inode;
1059         struct page *page;
1060         int ret;
1061
1062         if (obj_priv->page_list)
1063                 return 0;
1064
1065         /* Get the list of pages out of our struct file.  They'll be pinned
1066          * at this point until we release them.
1067          */
1068         page_count = obj->size / PAGE_SIZE;
1069         BUG_ON(obj_priv->page_list != NULL);
1070         obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *),
1071                                          DRM_MEM_DRIVER);
1072         if (obj_priv->page_list == NULL) {
1073                 DRM_ERROR("Faled to allocate page list\n");
1074                 return -ENOMEM;
1075         }
1076
1077         inode = obj->filp->f_path.dentry->d_inode;
1078         mapping = inode->i_mapping;
1079         for (i = 0; i < page_count; i++) {
1080                 page = read_mapping_page(mapping, i, NULL);
1081                 if (IS_ERR(page)) {
1082                         ret = PTR_ERR(page);
1083                         DRM_ERROR("read_mapping_page failed: %d\n", ret);
1084                         i915_gem_object_free_page_list(obj);
1085                         return ret;
1086                 }
1087                 obj_priv->page_list[i] = page;
1088         }
1089         return 0;
1090 }
1091
1092 /**
1093  * Finds free space in the GTT aperture and binds the object there.
1094  */
1095 static int
1096 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
1097 {
1098         struct drm_device *dev = obj->dev;
1099         drm_i915_private_t *dev_priv = dev->dev_private;
1100         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1101         struct drm_mm_node *free_space;
1102         int page_count, ret;
1103
1104         if (alignment == 0)
1105                 alignment = PAGE_SIZE;
1106         if (alignment & (PAGE_SIZE - 1)) {
1107                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
1108                 return -EINVAL;
1109         }
1110
1111  search_free:
1112         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
1113                                         obj->size, alignment, 0);
1114         if (free_space != NULL) {
1115                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
1116                                                        alignment);
1117                 if (obj_priv->gtt_space != NULL) {
1118                         obj_priv->gtt_space->private = obj;
1119                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
1120                 }
1121         }
1122         if (obj_priv->gtt_space == NULL) {
1123                 /* If the gtt is empty and we're still having trouble
1124                  * fitting our object in, we're out of memory.
1125                  */
1126 #if WATCH_LRU
1127                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
1128 #endif
1129                 if (list_empty(&dev_priv->mm.inactive_list) &&
1130                     list_empty(&dev_priv->mm.flushing_list) &&
1131                     list_empty(&dev_priv->mm.active_list)) {
1132                         DRM_ERROR("GTT full, but LRU list empty\n");
1133                         return -ENOMEM;
1134                 }
1135
1136                 ret = i915_gem_evict_something(dev);
1137                 if (ret != 0) {
1138                         DRM_ERROR("Failed to evict a buffer %d\n", ret);
1139                         return ret;
1140                 }
1141                 goto search_free;
1142         }
1143
1144 #if WATCH_BUF
1145         DRM_INFO("Binding object of size %d at 0x%08x\n",
1146                  obj->size, obj_priv->gtt_offset);
1147 #endif
1148         ret = i915_gem_object_get_page_list(obj);
1149         if (ret) {
1150                 drm_mm_put_block(obj_priv->gtt_space);
1151                 obj_priv->gtt_space = NULL;
1152                 return ret;
1153         }
1154
1155         page_count = obj->size / PAGE_SIZE;
1156         /* Create an AGP memory structure pointing at our pages, and bind it
1157          * into the GTT.
1158          */
1159         obj_priv->agp_mem = drm_agp_bind_pages(dev,
1160                                                obj_priv->page_list,
1161                                                page_count,
1162                                                obj_priv->gtt_offset,
1163                                                obj_priv->agp_type);
1164         if (obj_priv->agp_mem == NULL) {
1165                 i915_gem_object_free_page_list(obj);
1166                 drm_mm_put_block(obj_priv->gtt_space);
1167                 obj_priv->gtt_space = NULL;
1168                 return -ENOMEM;
1169         }
1170         atomic_inc(&dev->gtt_count);
1171         atomic_add(obj->size, &dev->gtt_memory);
1172
1173         /* Assert that the object is not currently in any GPU domain. As it
1174          * wasn't in the GTT, there shouldn't be any way it could have been in
1175          * a GPU cache
1176          */
1177         BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1178         BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1179
1180         return 0;
1181 }
1182
1183 void
1184 i915_gem_clflush_object(struct drm_gem_object *obj)
1185 {
1186         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1187
1188         /* If we don't have a page list set up, then we're not pinned
1189          * to GPU, and we can ignore the cache flush because it'll happen
1190          * again at bind time.
1191          */
1192         if (obj_priv->page_list == NULL)
1193                 return;
1194
1195         drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE);
1196 }
1197
1198 /*
1199  * Set the next domain for the specified object. This
1200  * may not actually perform the necessary flushing/invaliding though,
1201  * as that may want to be batched with other set_domain operations
1202  *
1203  * This is (we hope) the only really tricky part of gem. The goal
1204  * is fairly simple -- track which caches hold bits of the object
1205  * and make sure they remain coherent. A few concrete examples may
1206  * help to explain how it works. For shorthand, we use the notation
1207  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
1208  * a pair of read and write domain masks.
1209  *
1210  * Case 1: the batch buffer
1211  *
1212  *      1. Allocated
1213  *      2. Written by CPU
1214  *      3. Mapped to GTT
1215  *      4. Read by GPU
1216  *      5. Unmapped from GTT
1217  *      6. Freed
1218  *
1219  *      Let's take these a step at a time
1220  *
1221  *      1. Allocated
1222  *              Pages allocated from the kernel may still have
1223  *              cache contents, so we set them to (CPU, CPU) always.
1224  *      2. Written by CPU (using pwrite)
1225  *              The pwrite function calls set_domain (CPU, CPU) and
1226  *              this function does nothing (as nothing changes)
1227  *      3. Mapped by GTT
1228  *              This function asserts that the object is not
1229  *              currently in any GPU-based read or write domains
1230  *      4. Read by GPU
1231  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
1232  *              As write_domain is zero, this function adds in the
1233  *              current read domains (CPU+COMMAND, 0).
1234  *              flush_domains is set to CPU.
1235  *              invalidate_domains is set to COMMAND
1236  *              clflush is run to get data out of the CPU caches
1237  *              then i915_dev_set_domain calls i915_gem_flush to
1238  *              emit an MI_FLUSH and drm_agp_chipset_flush
1239  *      5. Unmapped from GTT
1240  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
1241  *              flush_domains and invalidate_domains end up both zero
1242  *              so no flushing/invalidating happens
1243  *      6. Freed
1244  *              yay, done
1245  *
1246  * Case 2: The shared render buffer
1247  *
1248  *      1. Allocated
1249  *      2. Mapped to GTT
1250  *      3. Read/written by GPU
1251  *      4. set_domain to (CPU,CPU)
1252  *      5. Read/written by CPU
1253  *      6. Read/written by GPU
1254  *
1255  *      1. Allocated
1256  *              Same as last example, (CPU, CPU)
1257  *      2. Mapped to GTT
1258  *              Nothing changes (assertions find that it is not in the GPU)
1259  *      3. Read/written by GPU
1260  *              execbuffer calls set_domain (RENDER, RENDER)
1261  *              flush_domains gets CPU
1262  *              invalidate_domains gets GPU
1263  *              clflush (obj)
1264  *              MI_FLUSH and drm_agp_chipset_flush
1265  *      4. set_domain (CPU, CPU)
1266  *              flush_domains gets GPU
1267  *              invalidate_domains gets CPU
1268  *              wait_rendering (obj) to make sure all drawing is complete.
1269  *              This will include an MI_FLUSH to get the data from GPU
1270  *              to memory
1271  *              clflush (obj) to invalidate the CPU cache
1272  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
1273  *      5. Read/written by CPU
1274  *              cache lines are loaded and dirtied
1275  *      6. Read written by GPU
1276  *              Same as last GPU access
1277  *
1278  * Case 3: The constant buffer
1279  *
1280  *      1. Allocated
1281  *      2. Written by CPU
1282  *      3. Read by GPU
1283  *      4. Updated (written) by CPU again
1284  *      5. Read by GPU
1285  *
1286  *      1. Allocated
1287  *              (CPU, CPU)
1288  *      2. Written by CPU
1289  *              (CPU, CPU)
1290  *      3. Read by GPU
1291  *              (CPU+RENDER, 0)
1292  *              flush_domains = CPU
1293  *              invalidate_domains = RENDER
1294  *              clflush (obj)
1295  *              MI_FLUSH
1296  *              drm_agp_chipset_flush
1297  *      4. Updated (written) by CPU again
1298  *              (CPU, CPU)
1299  *              flush_domains = 0 (no previous write domain)
1300  *              invalidate_domains = 0 (no new read domains)
1301  *      5. Read by GPU
1302  *              (CPU+RENDER, 0)
1303  *              flush_domains = CPU
1304  *              invalidate_domains = RENDER
1305  *              clflush (obj)
1306  *              MI_FLUSH
1307  *              drm_agp_chipset_flush
1308  */
1309 static int
1310 i915_gem_object_set_domain(struct drm_gem_object *obj,
1311                             uint32_t read_domains,
1312                             uint32_t write_domain)
1313 {
1314         struct drm_device               *dev = obj->dev;
1315         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1316         uint32_t                        invalidate_domains = 0;
1317         uint32_t                        flush_domains = 0;
1318         int                             ret;
1319
1320 #if WATCH_BUF
1321         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
1322                  __func__, obj,
1323                  obj->read_domains, read_domains,
1324                  obj->write_domain, write_domain);
1325 #endif
1326         /*
1327          * If the object isn't moving to a new write domain,
1328          * let the object stay in multiple read domains
1329          */
1330         if (write_domain == 0)
1331                 read_domains |= obj->read_domains;
1332         else
1333                 obj_priv->dirty = 1;
1334
1335         /*
1336          * Flush the current write domain if
1337          * the new read domains don't match. Invalidate
1338          * any read domains which differ from the old
1339          * write domain
1340          */
1341         if (obj->write_domain && obj->write_domain != read_domains) {
1342                 flush_domains |= obj->write_domain;
1343                 invalidate_domains |= read_domains & ~obj->write_domain;
1344         }
1345         /*
1346          * Invalidate any read caches which may have
1347          * stale data. That is, any new read domains.
1348          */
1349         invalidate_domains |= read_domains & ~obj->read_domains;
1350         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
1351 #if WATCH_BUF
1352                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
1353                          __func__, flush_domains, invalidate_domains);
1354 #endif
1355                 /*
1356                  * If we're invaliding the CPU cache and flushing a GPU cache,
1357                  * then pause for rendering so that the GPU caches will be
1358                  * flushed before the cpu cache is invalidated
1359                  */
1360                 if ((invalidate_domains & I915_GEM_DOMAIN_CPU) &&
1361                     (flush_domains & ~(I915_GEM_DOMAIN_CPU |
1362                                        I915_GEM_DOMAIN_GTT))) {
1363                         ret = i915_gem_object_wait_rendering(obj);
1364                         if (ret)
1365                                 return ret;
1366                 }
1367                 i915_gem_clflush_object(obj);
1368         }
1369
1370         if ((write_domain | flush_domains) != 0)
1371                 obj->write_domain = write_domain;
1372
1373         /* If we're invalidating the CPU domain, clear the per-page CPU
1374          * domain list as well.
1375          */
1376         if (obj_priv->page_cpu_valid != NULL &&
1377             (write_domain != 0 ||
1378              read_domains & I915_GEM_DOMAIN_CPU)) {
1379                 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
1380                          DRM_MEM_DRIVER);
1381                 obj_priv->page_cpu_valid = NULL;
1382         }
1383         obj->read_domains = read_domains;
1384
1385         dev->invalidate_domains |= invalidate_domains;
1386         dev->flush_domains |= flush_domains;
1387 #if WATCH_BUF
1388         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
1389                  __func__,
1390                  obj->read_domains, obj->write_domain,
1391                  dev->invalidate_domains, dev->flush_domains);
1392 #endif
1393         return 0;
1394 }
1395
1396 /**
1397  * Set the read/write domain on a range of the object.
1398  *
1399  * Currently only implemented for CPU reads, otherwise drops to normal
1400  * i915_gem_object_set_domain().
1401  */
1402 static int
1403 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
1404                                  uint64_t offset,
1405                                  uint64_t size,
1406                                  uint32_t read_domains,
1407                                  uint32_t write_domain)
1408 {
1409         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1410         int ret, i;
1411
1412         if (obj->read_domains & I915_GEM_DOMAIN_CPU)
1413                 return 0;
1414
1415         if (read_domains != I915_GEM_DOMAIN_CPU ||
1416             write_domain != 0)
1417                 return i915_gem_object_set_domain(obj,
1418                                                   read_domains, write_domain);
1419
1420         /* Wait on any GPU rendering to the object to be flushed. */
1421         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
1422                 ret = i915_gem_object_wait_rendering(obj);
1423                 if (ret)
1424                         return ret;
1425         }
1426
1427         if (obj_priv->page_cpu_valid == NULL) {
1428                 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
1429                                                       DRM_MEM_DRIVER);
1430         }
1431
1432         /* Flush the cache on any pages that are still invalid from the CPU's
1433          * perspective.
1434          */
1435         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) {
1436                 if (obj_priv->page_cpu_valid[i])
1437                         continue;
1438
1439                 drm_clflush_pages(obj_priv->page_list + i, 1);
1440
1441                 obj_priv->page_cpu_valid[i] = 1;
1442         }
1443
1444         return 0;
1445 }
1446
1447 /**
1448  * Once all of the objects have been set in the proper domain,
1449  * perform the necessary flush and invalidate operations.
1450  *
1451  * Returns the write domains flushed, for use in flush tracking.
1452  */
1453 static uint32_t
1454 i915_gem_dev_set_domain(struct drm_device *dev)
1455 {
1456         uint32_t flush_domains = dev->flush_domains;
1457
1458         /*
1459          * Now that all the buffers are synced to the proper domains,
1460          * flush and invalidate the collected domains
1461          */
1462         if (dev->invalidate_domains | dev->flush_domains) {
1463 #if WATCH_EXEC
1464                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
1465                           __func__,
1466                          dev->invalidate_domains,
1467                          dev->flush_domains);
1468 #endif
1469                 i915_gem_flush(dev,
1470                                dev->invalidate_domains,
1471                                dev->flush_domains);
1472                 dev->invalidate_domains = 0;
1473                 dev->flush_domains = 0;
1474         }
1475
1476         return flush_domains;
1477 }
1478
1479 /**
1480  * Pin an object to the GTT and evaluate the relocations landing in it.
1481  */
1482 static int
1483 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1484                                  struct drm_file *file_priv,
1485                                  struct drm_i915_gem_exec_object *entry)
1486 {
1487         struct drm_device *dev = obj->dev;
1488         struct drm_i915_gem_relocation_entry reloc;
1489         struct drm_i915_gem_relocation_entry __user *relocs;
1490         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1491         int i, ret;
1492         uint32_t last_reloc_offset = -1;
1493         void __iomem *reloc_page = NULL;
1494
1495         /* Choose the GTT offset for our buffer and put it there. */
1496         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
1497         if (ret)
1498                 return ret;
1499
1500         entry->offset = obj_priv->gtt_offset;
1501
1502         relocs = (struct drm_i915_gem_relocation_entry __user *)
1503                  (uintptr_t) entry->relocs_ptr;
1504         /* Apply the relocations, using the GTT aperture to avoid cache
1505          * flushing requirements.
1506          */
1507         for (i = 0; i < entry->relocation_count; i++) {
1508                 struct drm_gem_object *target_obj;
1509                 struct drm_i915_gem_object *target_obj_priv;
1510                 uint32_t reloc_val, reloc_offset;
1511                 uint32_t __iomem *reloc_entry;
1512
1513                 ret = copy_from_user(&reloc, relocs + i, sizeof(reloc));
1514                 if (ret != 0) {
1515                         i915_gem_object_unpin(obj);
1516                         return ret;
1517                 }
1518
1519                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
1520                                                    reloc.target_handle);
1521                 if (target_obj == NULL) {
1522                         i915_gem_object_unpin(obj);
1523                         return -EBADF;
1524                 }
1525                 target_obj_priv = target_obj->driver_private;
1526
1527                 /* The target buffer should have appeared before us in the
1528                  * exec_object list, so it should have a GTT space bound by now.
1529                  */
1530                 if (target_obj_priv->gtt_space == NULL) {
1531                         DRM_ERROR("No GTT space found for object %d\n",
1532                                   reloc.target_handle);
1533                         drm_gem_object_unreference(target_obj);
1534                         i915_gem_object_unpin(obj);
1535                         return -EINVAL;
1536                 }
1537
1538                 if (reloc.offset > obj->size - 4) {
1539                         DRM_ERROR("Relocation beyond object bounds: "
1540                                   "obj %p target %d offset %d size %d.\n",
1541                                   obj, reloc.target_handle,
1542                                   (int) reloc.offset, (int) obj->size);
1543                         drm_gem_object_unreference(target_obj);
1544                         i915_gem_object_unpin(obj);
1545                         return -EINVAL;
1546                 }
1547                 if (reloc.offset & 3) {
1548                         DRM_ERROR("Relocation not 4-byte aligned: "
1549                                   "obj %p target %d offset %d.\n",
1550                                   obj, reloc.target_handle,
1551                                   (int) reloc.offset);
1552                         drm_gem_object_unreference(target_obj);
1553                         i915_gem_object_unpin(obj);
1554                         return -EINVAL;
1555                 }
1556
1557                 if (reloc.write_domain && target_obj->pending_write_domain &&
1558                     reloc.write_domain != target_obj->pending_write_domain) {
1559                         DRM_ERROR("Write domain conflict: "
1560                                   "obj %p target %d offset %d "
1561                                   "new %08x old %08x\n",
1562                                   obj, reloc.target_handle,
1563                                   (int) reloc.offset,
1564                                   reloc.write_domain,
1565                                   target_obj->pending_write_domain);
1566                         drm_gem_object_unreference(target_obj);
1567                         i915_gem_object_unpin(obj);
1568                         return -EINVAL;
1569                 }
1570
1571 #if WATCH_RELOC
1572                 DRM_INFO("%s: obj %p offset %08x target %d "
1573                          "read %08x write %08x gtt %08x "
1574                          "presumed %08x delta %08x\n",
1575                          __func__,
1576                          obj,
1577                          (int) reloc.offset,
1578                          (int) reloc.target_handle,
1579                          (int) reloc.read_domains,
1580                          (int) reloc.write_domain,
1581                          (int) target_obj_priv->gtt_offset,
1582                          (int) reloc.presumed_offset,
1583                          reloc.delta);
1584 #endif
1585
1586                 target_obj->pending_read_domains |= reloc.read_domains;
1587                 target_obj->pending_write_domain |= reloc.write_domain;
1588
1589                 /* If the relocation already has the right value in it, no
1590                  * more work needs to be done.
1591                  */
1592                 if (target_obj_priv->gtt_offset == reloc.presumed_offset) {
1593                         drm_gem_object_unreference(target_obj);
1594                         continue;
1595                 }
1596
1597                 /* Now that we're going to actually write some data in,
1598                  * make sure that any rendering using this buffer's contents
1599                  * is completed.
1600                  */
1601                 i915_gem_object_wait_rendering(obj);
1602
1603                 /* As we're writing through the gtt, flush
1604                  * any CPU writes before we write the relocations
1605                  */
1606                 if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
1607                         i915_gem_clflush_object(obj);
1608                         drm_agp_chipset_flush(dev);
1609                         obj->write_domain = 0;
1610                 }
1611
1612                 /* Map the page containing the relocation we're going to
1613                  * perform.
1614                  */
1615                 reloc_offset = obj_priv->gtt_offset + reloc.offset;
1616                 if (reloc_page == NULL ||
1617                     (last_reloc_offset & ~(PAGE_SIZE - 1)) !=
1618                     (reloc_offset & ~(PAGE_SIZE - 1))) {
1619                         if (reloc_page != NULL)
1620                                 iounmap(reloc_page);
1621
1622                         reloc_page = ioremap_wc(dev->agp->base +
1623                                                 (reloc_offset &
1624                                                  ~(PAGE_SIZE - 1)),
1625                                                 PAGE_SIZE);
1626                         last_reloc_offset = reloc_offset;
1627                         if (reloc_page == NULL) {
1628                                 drm_gem_object_unreference(target_obj);
1629                                 i915_gem_object_unpin(obj);
1630                                 return -ENOMEM;
1631                         }
1632                 }
1633
1634                 reloc_entry = (uint32_t __iomem *)(reloc_page +
1635                                            (reloc_offset & (PAGE_SIZE - 1)));
1636                 reloc_val = target_obj_priv->gtt_offset + reloc.delta;
1637
1638 #if WATCH_BUF
1639                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
1640                           obj, (unsigned int) reloc.offset,
1641                           readl(reloc_entry), reloc_val);
1642 #endif
1643                 writel(reloc_val, reloc_entry);
1644
1645                 /* Write the updated presumed offset for this entry back out
1646                  * to the user.
1647                  */
1648                 reloc.presumed_offset = target_obj_priv->gtt_offset;
1649                 ret = copy_to_user(relocs + i, &reloc, sizeof(reloc));
1650                 if (ret != 0) {
1651                         drm_gem_object_unreference(target_obj);
1652                         i915_gem_object_unpin(obj);
1653                         return ret;
1654                 }
1655
1656                 drm_gem_object_unreference(target_obj);
1657         }
1658
1659         if (reloc_page != NULL)
1660                 iounmap(reloc_page);
1661
1662 #if WATCH_BUF
1663         if (0)
1664                 i915_gem_dump_object(obj, 128, __func__, ~0);
1665 #endif
1666         return 0;
1667 }
1668
1669 /** Dispatch a batchbuffer to the ring
1670  */
1671 static int
1672 i915_dispatch_gem_execbuffer(struct drm_device *dev,
1673                               struct drm_i915_gem_execbuffer *exec,
1674                               uint64_t exec_offset)
1675 {
1676         drm_i915_private_t *dev_priv = dev->dev_private;
1677         struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *)
1678                                              (uintptr_t) exec->cliprects_ptr;
1679         int nbox = exec->num_cliprects;
1680         int i = 0, count;
1681         uint32_t        exec_start, exec_len;
1682         RING_LOCALS;
1683
1684         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
1685         exec_len = (uint32_t) exec->batch_len;
1686
1687         if ((exec_start | exec_len) & 0x7) {
1688                 DRM_ERROR("alignment\n");
1689                 return -EINVAL;
1690         }
1691
1692         if (!exec_start)
1693                 return -EINVAL;
1694
1695         count = nbox ? nbox : 1;
1696
1697         for (i = 0; i < count; i++) {
1698                 if (i < nbox) {
1699                         int ret = i915_emit_box(dev, boxes, i,
1700                                                 exec->DR1, exec->DR4);
1701                         if (ret)
1702                                 return ret;
1703                 }
1704
1705                 if (IS_I830(dev) || IS_845G(dev)) {
1706                         BEGIN_LP_RING(4);
1707                         OUT_RING(MI_BATCH_BUFFER);
1708                         OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1709                         OUT_RING(exec_start + exec_len - 4);
1710                         OUT_RING(0);
1711                         ADVANCE_LP_RING();
1712                 } else {
1713                         BEGIN_LP_RING(2);
1714                         if (IS_I965G(dev)) {
1715                                 OUT_RING(MI_BATCH_BUFFER_START |
1716                                          (2 << 6) |
1717                                          MI_BATCH_NON_SECURE_I965);
1718                                 OUT_RING(exec_start);
1719                         } else {
1720                                 OUT_RING(MI_BATCH_BUFFER_START |
1721                                          (2 << 6));
1722                                 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1723                         }
1724                         ADVANCE_LP_RING();
1725                 }
1726         }
1727
1728         /* XXX breadcrumb */
1729         return 0;
1730 }
1731
1732 /* Throttle our rendering by waiting until the ring has completed our requests
1733  * emitted over 20 msec ago.
1734  *
1735  * This should get us reasonable parallelism between CPU and GPU but also
1736  * relatively low latency when blocking on a particular request to finish.
1737  */
1738 static int
1739 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
1740 {
1741         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1742         int ret = 0;
1743         uint32_t seqno;
1744
1745         mutex_lock(&dev->struct_mutex);
1746         seqno = i915_file_priv->mm.last_gem_throttle_seqno;
1747         i915_file_priv->mm.last_gem_throttle_seqno =
1748                 i915_file_priv->mm.last_gem_seqno;
1749         if (seqno)
1750                 ret = i915_wait_request(dev, seqno);
1751         mutex_unlock(&dev->struct_mutex);
1752         return ret;
1753 }
1754
1755 int
1756 i915_gem_execbuffer(struct drm_device *dev, void *data,
1757                     struct drm_file *file_priv)
1758 {
1759         drm_i915_private_t *dev_priv = dev->dev_private;
1760         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1761         struct drm_i915_gem_execbuffer *args = data;
1762         struct drm_i915_gem_exec_object *exec_list = NULL;
1763         struct drm_gem_object **object_list = NULL;
1764         struct drm_gem_object *batch_obj;
1765         int ret, i, pinned = 0;
1766         uint64_t exec_offset;
1767         uint32_t seqno, flush_domains;
1768
1769 #if WATCH_EXEC
1770         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1771                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1772 #endif
1773
1774         if (args->buffer_count < 1) {
1775                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1776                 return -EINVAL;
1777         }
1778         /* Copy in the exec list from userland */
1779         exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
1780                                DRM_MEM_DRIVER);
1781         object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
1782                                  DRM_MEM_DRIVER);
1783         if (exec_list == NULL || object_list == NULL) {
1784                 DRM_ERROR("Failed to allocate exec or object list "
1785                           "for %d buffers\n",
1786                           args->buffer_count);
1787                 ret = -ENOMEM;
1788                 goto pre_mutex_err;
1789         }
1790         ret = copy_from_user(exec_list,
1791                              (struct drm_i915_relocation_entry __user *)
1792                              (uintptr_t) args->buffers_ptr,
1793                              sizeof(*exec_list) * args->buffer_count);
1794         if (ret != 0) {
1795                 DRM_ERROR("copy %d exec entries failed %d\n",
1796                           args->buffer_count, ret);
1797                 goto pre_mutex_err;
1798         }
1799
1800         mutex_lock(&dev->struct_mutex);
1801
1802         i915_verify_inactive(dev, __FILE__, __LINE__);
1803
1804         if (dev_priv->mm.wedged) {
1805                 DRM_ERROR("Execbuf while wedged\n");
1806                 mutex_unlock(&dev->struct_mutex);
1807                 return -EIO;
1808         }
1809
1810         if (dev_priv->mm.suspended) {
1811                 DRM_ERROR("Execbuf while VT-switched.\n");
1812                 mutex_unlock(&dev->struct_mutex);
1813                 return -EBUSY;
1814         }
1815
1816         /* Zero the gloabl flush/invalidate flags. These
1817          * will be modified as each object is bound to the
1818          * gtt
1819          */
1820         dev->invalidate_domains = 0;
1821         dev->flush_domains = 0;
1822
1823         /* Look up object handles and perform the relocations */
1824         for (i = 0; i < args->buffer_count; i++) {
1825                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
1826                                                        exec_list[i].handle);
1827                 if (object_list[i] == NULL) {
1828                         DRM_ERROR("Invalid object handle %d at index %d\n",
1829                                    exec_list[i].handle, i);
1830                         ret = -EBADF;
1831                         goto err;
1832                 }
1833
1834                 object_list[i]->pending_read_domains = 0;
1835                 object_list[i]->pending_write_domain = 0;
1836                 ret = i915_gem_object_pin_and_relocate(object_list[i],
1837                                                        file_priv,
1838                                                        &exec_list[i]);
1839                 if (ret) {
1840                         DRM_ERROR("object bind and relocate failed %d\n", ret);
1841                         goto err;
1842                 }
1843                 pinned = i + 1;
1844         }
1845
1846         /* Set the pending read domains for the batch buffer to COMMAND */
1847         batch_obj = object_list[args->buffer_count-1];
1848         batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1849         batch_obj->pending_write_domain = 0;
1850
1851         i915_verify_inactive(dev, __FILE__, __LINE__);
1852
1853         for (i = 0; i < args->buffer_count; i++) {
1854                 struct drm_gem_object *obj = object_list[i];
1855                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1856
1857                 if (obj_priv->gtt_space == NULL) {
1858                         /* We evicted the buffer in the process of validating
1859                          * our set of buffers in.  We could try to recover by
1860                          * kicking them everything out and trying again from
1861                          * the start.
1862                          */
1863                         ret = -ENOMEM;
1864                         goto err;
1865                 }
1866
1867                 /* make sure all previous memory operations have passed */
1868                 ret = i915_gem_object_set_domain(obj,
1869                                                  obj->pending_read_domains,
1870                                                  obj->pending_write_domain);
1871                 if (ret)
1872                         goto err;
1873         }
1874
1875         i915_verify_inactive(dev, __FILE__, __LINE__);
1876
1877         /* Flush/invalidate caches and chipset buffer */
1878         flush_domains = i915_gem_dev_set_domain(dev);
1879
1880         i915_verify_inactive(dev, __FILE__, __LINE__);
1881
1882 #if WATCH_COHERENCY
1883         for (i = 0; i < args->buffer_count; i++) {
1884                 i915_gem_object_check_coherency(object_list[i],
1885                                                 exec_list[i].handle);
1886         }
1887 #endif
1888
1889         exec_offset = exec_list[args->buffer_count - 1].offset;
1890
1891 #if WATCH_EXEC
1892         i915_gem_dump_object(object_list[args->buffer_count - 1],
1893                               args->batch_len,
1894                               __func__,
1895                               ~0);
1896 #endif
1897
1898         (void)i915_add_request(dev, flush_domains);
1899
1900         /* Exec the batchbuffer */
1901         ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset);
1902         if (ret) {
1903                 DRM_ERROR("dispatch failed %d\n", ret);
1904                 goto err;
1905         }
1906
1907         /*
1908          * Ensure that the commands in the batch buffer are
1909          * finished before the interrupt fires
1910          */
1911         flush_domains = i915_retire_commands(dev);
1912
1913         i915_verify_inactive(dev, __FILE__, __LINE__);
1914
1915         /*
1916          * Get a seqno representing the execution of the current buffer,
1917          * which we can wait on.  We would like to mitigate these interrupts,
1918          * likely by only creating seqnos occasionally (so that we have
1919          * *some* interrupts representing completion of buffers that we can
1920          * wait on when trying to clear up gtt space).
1921          */
1922         seqno = i915_add_request(dev, flush_domains);
1923         BUG_ON(seqno == 0);
1924         i915_file_priv->mm.last_gem_seqno = seqno;
1925         for (i = 0; i < args->buffer_count; i++) {
1926                 struct drm_gem_object *obj = object_list[i];
1927                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1928
1929                 i915_gem_object_move_to_active(obj);
1930                 obj_priv->last_rendering_seqno = seqno;
1931 #if WATCH_LRU
1932                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
1933 #endif
1934         }
1935 #if WATCH_LRU
1936         i915_dump_lru(dev, __func__);
1937 #endif
1938
1939         i915_verify_inactive(dev, __FILE__, __LINE__);
1940
1941         /* Copy the new buffer offsets back to the user's exec list. */
1942         ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1943                            (uintptr_t) args->buffers_ptr,
1944                            exec_list,
1945                            sizeof(*exec_list) * args->buffer_count);
1946         if (ret)
1947                 DRM_ERROR("failed to copy %d exec entries "
1948                           "back to user (%d)\n",
1949                            args->buffer_count, ret);
1950 err:
1951         if (object_list != NULL) {
1952                 for (i = 0; i < pinned; i++)
1953                         i915_gem_object_unpin(object_list[i]);
1954
1955                 for (i = 0; i < args->buffer_count; i++)
1956                         drm_gem_object_unreference(object_list[i]);
1957         }
1958         mutex_unlock(&dev->struct_mutex);
1959
1960 pre_mutex_err:
1961         drm_free(object_list, sizeof(*object_list) * args->buffer_count,
1962                  DRM_MEM_DRIVER);
1963         drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
1964                  DRM_MEM_DRIVER);
1965
1966         return ret;
1967 }
1968
1969 int
1970 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
1971 {
1972         struct drm_device *dev = obj->dev;
1973         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1974         int ret;
1975
1976         i915_verify_inactive(dev, __FILE__, __LINE__);
1977         if (obj_priv->gtt_space == NULL) {
1978                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
1979                 if (ret != 0) {
1980                         DRM_ERROR("Failure to bind: %d", ret);
1981                         return ret;
1982                 }
1983         }
1984         obj_priv->pin_count++;
1985
1986         /* If the object is not active and not pending a flush,
1987          * remove it from the inactive list
1988          */
1989         if (obj_priv->pin_count == 1) {
1990                 atomic_inc(&dev->pin_count);
1991                 atomic_add(obj->size, &dev->pin_memory);
1992                 if (!obj_priv->active &&
1993                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
1994                                            I915_GEM_DOMAIN_GTT)) == 0 &&
1995                     !list_empty(&obj_priv->list))
1996                         list_del_init(&obj_priv->list);
1997         }
1998         i915_verify_inactive(dev, __FILE__, __LINE__);
1999
2000         return 0;
2001 }
2002
2003 void
2004 i915_gem_object_unpin(struct drm_gem_object *obj)
2005 {
2006         struct drm_device *dev = obj->dev;
2007         drm_i915_private_t *dev_priv = dev->dev_private;
2008         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2009
2010         i915_verify_inactive(dev, __FILE__, __LINE__);
2011         obj_priv->pin_count--;
2012         BUG_ON(obj_priv->pin_count < 0);
2013         BUG_ON(obj_priv->gtt_space == NULL);
2014
2015         /* If the object is no longer pinned, and is
2016          * neither active nor being flushed, then stick it on
2017          * the inactive list
2018          */
2019         if (obj_priv->pin_count == 0) {
2020                 if (!obj_priv->active &&
2021                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
2022                                            I915_GEM_DOMAIN_GTT)) == 0)
2023                         list_move_tail(&obj_priv->list,
2024                                        &dev_priv->mm.inactive_list);
2025                 atomic_dec(&dev->pin_count);
2026                 atomic_sub(obj->size, &dev->pin_memory);
2027         }
2028         i915_verify_inactive(dev, __FILE__, __LINE__);
2029 }
2030
2031 int
2032 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
2033                    struct drm_file *file_priv)
2034 {
2035         struct drm_i915_gem_pin *args = data;
2036         struct drm_gem_object *obj;
2037         struct drm_i915_gem_object *obj_priv;
2038         int ret;
2039
2040         mutex_lock(&dev->struct_mutex);
2041
2042         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2043         if (obj == NULL) {
2044                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
2045                           args->handle);
2046                 mutex_unlock(&dev->struct_mutex);
2047                 return -EBADF;
2048         }
2049         obj_priv = obj->driver_private;
2050
2051         ret = i915_gem_object_pin(obj, args->alignment);
2052         if (ret != 0) {
2053                 drm_gem_object_unreference(obj);
2054                 mutex_unlock(&dev->struct_mutex);
2055                 return ret;
2056         }
2057
2058         /* XXX - flush the CPU caches for pinned objects
2059          * as the X server doesn't manage domains yet
2060          */
2061         if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
2062                 i915_gem_clflush_object(obj);
2063                 drm_agp_chipset_flush(dev);
2064                 obj->write_domain = 0;
2065         }
2066         args->offset = obj_priv->gtt_offset;
2067         drm_gem_object_unreference(obj);
2068         mutex_unlock(&dev->struct_mutex);
2069
2070         return 0;
2071 }
2072
2073 int
2074 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
2075                      struct drm_file *file_priv)
2076 {
2077         struct drm_i915_gem_pin *args = data;
2078         struct drm_gem_object *obj;
2079
2080         mutex_lock(&dev->struct_mutex);
2081
2082         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2083         if (obj == NULL) {
2084                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
2085                           args->handle);
2086                 mutex_unlock(&dev->struct_mutex);
2087                 return -EBADF;
2088         }
2089
2090         i915_gem_object_unpin(obj);
2091
2092         drm_gem_object_unreference(obj);
2093         mutex_unlock(&dev->struct_mutex);
2094         return 0;
2095 }
2096
2097 int
2098 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2099                     struct drm_file *file_priv)
2100 {
2101         struct drm_i915_gem_busy *args = data;
2102         struct drm_gem_object *obj;
2103         struct drm_i915_gem_object *obj_priv;
2104
2105         mutex_lock(&dev->struct_mutex);
2106         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2107         if (obj == NULL) {
2108                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
2109                           args->handle);
2110                 mutex_unlock(&dev->struct_mutex);
2111                 return -EBADF;
2112         }
2113
2114         obj_priv = obj->driver_private;
2115         args->busy = obj_priv->active;
2116
2117         drm_gem_object_unreference(obj);
2118         mutex_unlock(&dev->struct_mutex);
2119         return 0;
2120 }
2121
2122 int
2123 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
2124                         struct drm_file *file_priv)
2125 {
2126     return i915_gem_ring_throttle(dev, file_priv);
2127 }
2128
2129 int i915_gem_init_object(struct drm_gem_object *obj)
2130 {
2131         struct drm_i915_gem_object *obj_priv;
2132
2133         obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
2134         if (obj_priv == NULL)
2135                 return -ENOMEM;
2136
2137         /*
2138          * We've just allocated pages from the kernel,
2139          * so they've just been written by the CPU with
2140          * zeros. They'll need to be clflushed before we
2141          * use them with the GPU.
2142          */
2143         obj->write_domain = I915_GEM_DOMAIN_CPU;
2144         obj->read_domains = I915_GEM_DOMAIN_CPU;
2145
2146         obj_priv->agp_type = AGP_USER_MEMORY;
2147
2148         obj->driver_private = obj_priv;
2149         obj_priv->obj = obj;
2150         INIT_LIST_HEAD(&obj_priv->list);
2151         return 0;
2152 }
2153
2154 void i915_gem_free_object(struct drm_gem_object *obj)
2155 {
2156         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2157
2158         while (obj_priv->pin_count > 0)
2159                 i915_gem_object_unpin(obj);
2160
2161         i915_gem_object_unbind(obj);
2162
2163         drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
2164         drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
2165 }
2166
2167 static int
2168 i915_gem_set_domain(struct drm_gem_object *obj,
2169                     struct drm_file *file_priv,
2170                     uint32_t read_domains,
2171                     uint32_t write_domain)
2172 {
2173         struct drm_device *dev = obj->dev;
2174         int ret;
2175         uint32_t flush_domains;
2176
2177         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2178
2179         ret = i915_gem_object_set_domain(obj, read_domains, write_domain);
2180         if (ret)
2181                 return ret;
2182         flush_domains = i915_gem_dev_set_domain(obj->dev);
2183
2184         if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))
2185                 (void) i915_add_request(dev, flush_domains);
2186
2187         return 0;
2188 }
2189
2190 /** Unbinds all objects that are on the given buffer list. */
2191 static int
2192 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
2193 {
2194         struct drm_gem_object *obj;
2195         struct drm_i915_gem_object *obj_priv;
2196         int ret;
2197
2198         while (!list_empty(head)) {
2199                 obj_priv = list_first_entry(head,
2200                                             struct drm_i915_gem_object,
2201                                             list);
2202                 obj = obj_priv->obj;
2203
2204                 if (obj_priv->pin_count != 0) {
2205                         DRM_ERROR("Pinned object in unbind list\n");
2206                         mutex_unlock(&dev->struct_mutex);
2207                         return -EINVAL;
2208                 }
2209
2210                 ret = i915_gem_object_unbind(obj);
2211                 if (ret != 0) {
2212                         DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
2213                                   ret);
2214                         mutex_unlock(&dev->struct_mutex);
2215                         return ret;
2216                 }
2217         }
2218
2219
2220         return 0;
2221 }
2222
2223 static int
2224 i915_gem_idle(struct drm_device *dev)
2225 {
2226         drm_i915_private_t *dev_priv = dev->dev_private;
2227         uint32_t seqno, cur_seqno, last_seqno;
2228         int stuck, ret;
2229
2230         if (dev_priv->mm.suspended)
2231                 return 0;
2232
2233         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
2234          * We need to replace this with a semaphore, or something.
2235          */
2236         dev_priv->mm.suspended = 1;
2237
2238         i915_kernel_lost_context(dev);
2239
2240         /* Flush the GPU along with all non-CPU write domains
2241          */
2242         i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
2243                        ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
2244         seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU |
2245                                         I915_GEM_DOMAIN_GTT));
2246
2247         if (seqno == 0) {
2248                 mutex_unlock(&dev->struct_mutex);
2249                 return -ENOMEM;
2250         }
2251
2252         dev_priv->mm.waiting_gem_seqno = seqno;
2253         last_seqno = 0;
2254         stuck = 0;
2255         for (;;) {
2256                 cur_seqno = i915_get_gem_seqno(dev);
2257                 if (i915_seqno_passed(cur_seqno, seqno))
2258                         break;
2259                 if (last_seqno == cur_seqno) {
2260                         if (stuck++ > 100) {
2261                                 DRM_ERROR("hardware wedged\n");
2262                                 dev_priv->mm.wedged = 1;
2263                                 DRM_WAKEUP(&dev_priv->irq_queue);
2264                                 break;
2265                         }
2266                 }
2267                 msleep(10);
2268                 last_seqno = cur_seqno;
2269         }
2270         dev_priv->mm.waiting_gem_seqno = 0;
2271
2272         i915_gem_retire_requests(dev);
2273
2274         /* Active and flushing should now be empty as we've
2275          * waited for a sequence higher than any pending execbuffer
2276          */
2277         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2278         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2279
2280         /* Request should now be empty as we've also waited
2281          * for the last request in the list
2282          */
2283         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2284
2285         /* Move all buffers out of the GTT. */
2286         ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
2287         if (ret)
2288                 return ret;
2289
2290         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2291         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2292         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2293         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2294         return 0;
2295 }
2296
2297 static int
2298 i915_gem_init_hws(struct drm_device *dev)
2299 {
2300         drm_i915_private_t *dev_priv = dev->dev_private;
2301         struct drm_gem_object *obj;
2302         struct drm_i915_gem_object *obj_priv;
2303         int ret;
2304
2305         /* If we need a physical address for the status page, it's already
2306          * initialized at driver load time.
2307          */
2308         if (!I915_NEED_GFX_HWS(dev))
2309                 return 0;
2310
2311         obj = drm_gem_object_alloc(dev, 4096);
2312         if (obj == NULL) {
2313                 DRM_ERROR("Failed to allocate status page\n");
2314                 return -ENOMEM;
2315         }
2316         obj_priv = obj->driver_private;
2317         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
2318
2319         ret = i915_gem_object_pin(obj, 4096);
2320         if (ret != 0) {
2321                 drm_gem_object_unreference(obj);
2322                 return ret;
2323         }
2324
2325         dev_priv->status_gfx_addr = obj_priv->gtt_offset;
2326
2327         dev_priv->hw_status_page = kmap(obj_priv->page_list[0]);
2328         if (dev_priv->hw_status_page == NULL) {
2329                 DRM_ERROR("Failed to map status page.\n");
2330                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2331                 drm_gem_object_unreference(obj);
2332                 return -EINVAL;
2333         }
2334         dev_priv->hws_obj = obj;
2335         memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
2336         I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
2337         I915_READ(HWS_PGA); /* posting read */
2338         DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
2339
2340         return 0;
2341 }
2342
2343 static int
2344 i915_gem_init_ringbuffer(struct drm_device *dev)
2345 {
2346         drm_i915_private_t *dev_priv = dev->dev_private;
2347         struct drm_gem_object *obj;
2348         struct drm_i915_gem_object *obj_priv;
2349         int ret;
2350         u32 head;
2351
2352         ret = i915_gem_init_hws(dev);
2353         if (ret != 0)
2354                 return ret;
2355
2356         obj = drm_gem_object_alloc(dev, 128 * 1024);
2357         if (obj == NULL) {
2358                 DRM_ERROR("Failed to allocate ringbuffer\n");
2359                 return -ENOMEM;
2360         }
2361         obj_priv = obj->driver_private;
2362
2363         ret = i915_gem_object_pin(obj, 4096);
2364         if (ret != 0) {
2365                 drm_gem_object_unreference(obj);
2366                 return ret;
2367         }
2368
2369         /* Set up the kernel mapping for the ring. */
2370         dev_priv->ring.Size = obj->size;
2371         dev_priv->ring.tail_mask = obj->size - 1;
2372
2373         dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset;
2374         dev_priv->ring.map.size = obj->size;
2375         dev_priv->ring.map.type = 0;
2376         dev_priv->ring.map.flags = 0;
2377         dev_priv->ring.map.mtrr = 0;
2378
2379         drm_core_ioremap_wc(&dev_priv->ring.map, dev);
2380         if (dev_priv->ring.map.handle == NULL) {
2381                 DRM_ERROR("Failed to map ringbuffer.\n");
2382                 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2383                 drm_gem_object_unreference(obj);
2384                 return -EINVAL;
2385         }
2386         dev_priv->ring.ring_obj = obj;
2387         dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
2388
2389         /* Stop the ring if it's running. */
2390         I915_WRITE(PRB0_CTL, 0);
2391         I915_WRITE(PRB0_TAIL, 0);
2392         I915_WRITE(PRB0_HEAD, 0);
2393
2394         /* Initialize the ring. */
2395         I915_WRITE(PRB0_START, obj_priv->gtt_offset);
2396         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2397
2398         /* G45 ring initialization fails to reset head to zero */
2399         if (head != 0) {
2400                 DRM_ERROR("Ring head not reset to zero "
2401                           "ctl %08x head %08x tail %08x start %08x\n",
2402                           I915_READ(PRB0_CTL),
2403                           I915_READ(PRB0_HEAD),
2404                           I915_READ(PRB0_TAIL),
2405                           I915_READ(PRB0_START));
2406                 I915_WRITE(PRB0_HEAD, 0);
2407
2408                 DRM_ERROR("Ring head forced to zero "
2409                           "ctl %08x head %08x tail %08x start %08x\n",
2410                           I915_READ(PRB0_CTL),
2411                           I915_READ(PRB0_HEAD),
2412                           I915_READ(PRB0_TAIL),
2413                           I915_READ(PRB0_START));
2414         }
2415
2416         I915_WRITE(PRB0_CTL,
2417                    ((obj->size - 4096) & RING_NR_PAGES) |
2418                    RING_NO_REPORT |
2419                    RING_VALID);
2420
2421         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2422
2423         /* If the head is still not zero, the ring is dead */
2424         if (head != 0) {
2425                 DRM_ERROR("Ring initialization failed "
2426                           "ctl %08x head %08x tail %08x start %08x\n",
2427                           I915_READ(PRB0_CTL),
2428                           I915_READ(PRB0_HEAD),
2429                           I915_READ(PRB0_TAIL),
2430                           I915_READ(PRB0_START));
2431                 return -EIO;
2432         }
2433
2434         /* Update our cache of the ring state */
2435         i915_kernel_lost_context(dev);
2436
2437         return 0;
2438 }
2439
2440 static void
2441 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
2442 {
2443         drm_i915_private_t *dev_priv = dev->dev_private;
2444
2445         if (dev_priv->ring.ring_obj == NULL)
2446                 return;
2447
2448         drm_core_ioremapfree(&dev_priv->ring.map, dev);
2449
2450         i915_gem_object_unpin(dev_priv->ring.ring_obj);
2451         drm_gem_object_unreference(dev_priv->ring.ring_obj);
2452         dev_priv->ring.ring_obj = NULL;
2453         memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2454
2455         if (dev_priv->hws_obj != NULL) {
2456                 struct drm_gem_object *obj = dev_priv->hws_obj;
2457                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2458
2459                 kunmap(obj_priv->page_list[0]);
2460                 i915_gem_object_unpin(obj);
2461                 drm_gem_object_unreference(obj);
2462                 dev_priv->hws_obj = NULL;
2463                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2464                 dev_priv->hw_status_page = NULL;
2465
2466                 /* Write high address into HWS_PGA when disabling. */
2467                 I915_WRITE(HWS_PGA, 0x1ffff000);
2468         }
2469 }
2470
2471 int
2472 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
2473                        struct drm_file *file_priv)
2474 {
2475         drm_i915_private_t *dev_priv = dev->dev_private;
2476         int ret;
2477
2478         if (dev_priv->mm.wedged) {
2479                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
2480                 dev_priv->mm.wedged = 0;
2481         }
2482
2483         ret = i915_gem_init_ringbuffer(dev);
2484         if (ret != 0)
2485                 return ret;
2486
2487         mutex_lock(&dev->struct_mutex);
2488         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2489         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2490         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2491         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2492         dev_priv->mm.suspended = 0;
2493         mutex_unlock(&dev->struct_mutex);
2494
2495         drm_irq_install(dev);
2496
2497         return 0;
2498 }
2499
2500 int
2501 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
2502                        struct drm_file *file_priv)
2503 {
2504         int ret;
2505
2506         mutex_lock(&dev->struct_mutex);
2507         ret = i915_gem_idle(dev);
2508         if (ret == 0)
2509                 i915_gem_cleanup_ringbuffer(dev);
2510         mutex_unlock(&dev->struct_mutex);
2511
2512         drm_irq_uninstall(dev);
2513
2514         return 0;
2515 }
2516
2517 void
2518 i915_gem_lastclose(struct drm_device *dev)
2519 {
2520         int ret;
2521         drm_i915_private_t *dev_priv = dev->dev_private;
2522
2523         mutex_lock(&dev->struct_mutex);
2524
2525         if (dev_priv->ring.ring_obj != NULL) {
2526                 ret = i915_gem_idle(dev);
2527                 if (ret)
2528                         DRM_ERROR("failed to idle hardware: %d\n", ret);
2529
2530                 i915_gem_cleanup_ringbuffer(dev);
2531         }
2532
2533         mutex_unlock(&dev->struct_mutex);
2534 }
2535
2536 void
2537 i915_gem_load(struct drm_device *dev)
2538 {
2539         drm_i915_private_t *dev_priv = dev->dev_private;
2540
2541         INIT_LIST_HEAD(&dev_priv->mm.active_list);
2542         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
2543         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
2544         INIT_LIST_HEAD(&dev_priv->mm.request_list);
2545         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
2546                           i915_gem_retire_work_handler);
2547         INIT_WORK(&dev_priv->mm.vblank_work,
2548                   i915_gem_vblank_work_handler);
2549         dev_priv->mm.next_gem_seqno = 1;
2550
2551         i915_gem_detect_bit_6_swizzle(dev);
2552 }