2 * Copyright © 2008,2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
25 * Chris Wilson <chris@chris-wilson.co.uk>
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <dev/drm2/drmP.h>
33 #include <dev/drm2/i915/i915_drm.h>
34 #include <dev/drm2/i915/i915_drv.h>
35 #include <dev/drm2/i915/intel_drv.h>
37 #include <sys/limits.h>
38 #include <sys/sf_buf.h>
42 struct hlist_head buckets[0];
45 static struct eb_objects *
48 struct eb_objects *eb;
49 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
50 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
53 eb = malloc(count*sizeof(struct hlist_head) +
54 sizeof(struct eb_objects),
55 DRM_I915_GEM, M_WAITOK | M_ZERO);
64 eb_reset(struct eb_objects *eb)
66 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
70 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
72 hlist_add_head(&obj->exec_node,
73 &eb->buckets[obj->exec_handle & eb->and]);
76 static struct drm_i915_gem_object *
77 eb_get_object(struct eb_objects *eb, unsigned long handle)
79 struct hlist_head *head;
80 struct hlist_node *node;
81 struct drm_i915_gem_object *obj;
83 head = &eb->buckets[handle & eb->and];
84 hlist_for_each(node, head) {
85 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
86 if (obj->exec_handle == handle)
94 eb_destroy(struct eb_objects *eb)
96 free(eb, DRM_I915_GEM);
99 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
101 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
102 !obj->map_and_fenceable ||
103 obj->cache_level != I915_CACHE_NONE);
107 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
108 struct eb_objects *eb,
109 struct drm_i915_gem_relocation_entry *reloc)
111 struct drm_device *dev = obj->base.dev;
112 struct drm_gem_object *target_obj;
113 struct drm_i915_gem_object *target_i915_obj;
114 uint32_t target_offset;
117 /* we've already hold a reference to all valid objects */
118 target_obj = &eb_get_object(eb, reloc->target_handle)->base;
119 if (unlikely(target_obj == NULL))
122 target_i915_obj = to_intel_bo(target_obj);
123 target_offset = target_i915_obj->gtt_offset;
125 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
126 * pipe_control writes because the gpu doesn't properly redirect them
127 * through the ppgtt for non_secure batchbuffers. */
128 if (unlikely(IS_GEN6(dev) &&
129 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
130 !target_i915_obj->has_global_gtt_mapping)) {
131 i915_gem_gtt_bind_object(target_i915_obj,
132 target_i915_obj->cache_level);
135 /* Validate that the target is in a valid r/w GPU domain */
136 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
137 DRM_DEBUG("reloc with multiple write domains: "
138 "obj %p target %d offset %d "
139 "read %08x write %08x",
140 obj, reloc->target_handle,
143 reloc->write_domain);
146 if (unlikely((reloc->write_domain | reloc->read_domains)
147 & ~I915_GEM_GPU_DOMAINS)) {
148 DRM_DEBUG("reloc with read/write non-GPU domains: "
149 "obj %p target %d offset %d "
150 "read %08x write %08x",
151 obj, reloc->target_handle,
154 reloc->write_domain);
157 if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
158 reloc->write_domain != target_obj->pending_write_domain)) {
159 DRM_DEBUG("Write domain conflict: "
160 "obj %p target %d offset %d "
161 "new %08x old %08x\n",
162 obj, reloc->target_handle,
165 target_obj->pending_write_domain);
169 target_obj->pending_read_domains |= reloc->read_domains;
170 target_obj->pending_write_domain |= reloc->write_domain;
172 /* If the relocation already has the right value in it, no
173 * more work needs to be done.
175 if (target_offset == reloc->presumed_offset)
178 /* Check that the relocation address is valid... */
179 if (unlikely(reloc->offset > obj->base.size - 4)) {
180 DRM_DEBUG("Relocation beyond object bounds: "
181 "obj %p target %d offset %d size %d.\n",
182 obj, reloc->target_handle,
184 (int) obj->base.size);
187 if (unlikely(reloc->offset & 3)) {
188 DRM_DEBUG("Relocation not 4-byte aligned: "
189 "obj %p target %d offset %d.\n",
190 obj, reloc->target_handle,
191 (int) reloc->offset);
195 /* We can't wait for rendering with pagefaults disabled */
196 if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
199 reloc->delta += target_offset;
200 if (use_cpu_reloc(obj)) {
201 uint32_t page_offset = reloc->offset & PAGE_MASK;
205 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
209 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
213 vaddr = (void *)sf_buf_kva(sf);
214 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
217 struct drm_i915_private *dev_priv = dev->dev_private;
218 uint32_t __iomem *reloc_entry;
219 char __iomem *reloc_page;
221 ret = i915_gem_object_set_to_gtt_domain(obj, true);
225 ret = i915_gem_object_put_fence(obj);
229 /* Map the page containing the relocation we're going to perform. */
230 reloc->offset += obj->gtt_offset;
231 reloc_page = pmap_mapdev_attr(dev_priv->mm.gtt_base_addr + (reloc->offset &
232 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
233 reloc_entry = (uint32_t __iomem *)
234 (reloc_page + (reloc->offset & PAGE_MASK));
235 *(volatile uint32_t *)reloc_entry = reloc->delta;
236 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
239 /* and update the user's relocation entry */
240 reloc->presumed_offset = target_offset;
246 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
247 struct eb_objects *eb)
249 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
250 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
251 struct drm_i915_gem_relocation_entry __user *user_relocs;
252 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
255 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
257 remain = entry->relocation_count;
259 struct drm_i915_gem_relocation_entry *r = stack_reloc;
261 if (count > ARRAY_SIZE(stack_reloc))
262 count = ARRAY_SIZE(stack_reloc);
265 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
269 u64 offset = r->presumed_offset;
271 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
275 if (r->presumed_offset != offset &&
276 __copy_to_user_inatomic(&user_relocs->presumed_offset,
278 sizeof(r->presumed_offset))) {
292 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
293 struct eb_objects *eb,
294 struct drm_i915_gem_relocation_entry *relocs)
296 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
299 for (i = 0; i < entry->relocation_count; i++) {
300 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
309 i915_gem_execbuffer_relocate(struct drm_device *dev,
310 struct eb_objects *eb,
311 struct list_head *objects)
313 struct drm_i915_gem_object *obj;
316 /* This is the fast path and we cannot handle a pagefault whilst
317 * holding the struct mutex lest the user pass in the relocations
318 * contained within a mmaped bo. For in such a case we, the page
319 * fault handler would call i915_gem_fault() and we would try to
320 * acquire the struct mutex again. Obviously this is bad and so
321 * lockdep complains vehemently.
323 pflags = vm_fault_disable_pagefaults();
324 list_for_each_entry(obj, objects, exec_list) {
325 ret = i915_gem_execbuffer_relocate_object(obj, eb);
329 vm_fault_enable_pagefaults(pflags);
334 #define __EXEC_OBJECT_HAS_PIN (1<<31)
335 #define __EXEC_OBJECT_HAS_FENCE (1<<30)
338 need_reloc_mappable(struct drm_i915_gem_object *obj)
340 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
341 return entry->relocation_count && !use_cpu_reloc(obj);
345 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
346 struct intel_ring_buffer *ring)
348 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
349 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
350 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
351 bool need_fence, need_mappable;
355 has_fenced_gpu_access &&
356 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
357 obj->tiling_mode != I915_TILING_NONE;
358 need_mappable = need_fence || need_reloc_mappable(obj);
360 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
364 entry->flags |= __EXEC_OBJECT_HAS_PIN;
366 if (has_fenced_gpu_access) {
367 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
368 ret = i915_gem_object_get_fence(obj);
372 if (i915_gem_object_pin_fence(obj))
373 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
375 obj->pending_fenced_gpu_access = true;
379 /* Ensure ppgtt mapping exists if needed */
380 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
381 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
382 obj, obj->cache_level);
384 obj->has_aliasing_ppgtt_mapping = 1;
387 entry->offset = obj->gtt_offset;
392 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
394 struct drm_i915_gem_exec_object2 *entry;
399 entry = obj->exec_entry;
401 if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
402 i915_gem_object_unpin_fence(obj);
404 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
405 i915_gem_object_unpin(obj);
407 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
411 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
412 struct drm_file *file,
413 struct list_head *objects)
415 struct drm_i915_gem_object *obj;
416 struct list_head ordered_objects;
417 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
420 INIT_LIST_HEAD(&ordered_objects);
421 while (!list_empty(objects)) {
422 struct drm_i915_gem_exec_object2 *entry;
423 bool need_fence, need_mappable;
425 obj = list_first_entry(objects,
426 struct drm_i915_gem_object,
428 entry = obj->exec_entry;
431 has_fenced_gpu_access &&
432 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
433 obj->tiling_mode != I915_TILING_NONE;
434 need_mappable = need_fence || need_reloc_mappable(obj);
437 list_move(&obj->exec_list, &ordered_objects);
439 list_move_tail(&obj->exec_list, &ordered_objects);
441 obj->base.pending_read_domains = 0;
442 obj->base.pending_write_domain = 0;
443 obj->pending_fenced_gpu_access = false;
445 list_splice(&ordered_objects, objects);
447 /* Attempt to pin all of the buffers into the GTT.
448 * This is done in 3 phases:
450 * 1a. Unbind all objects that do not match the GTT constraints for
451 * the execbuffer (fenceable, mappable, alignment etc).
452 * 1b. Increment pin count for already bound objects.
453 * 2. Bind new objects.
454 * 3. Decrement pin count.
456 * This avoid unnecessary unbinding of later objects in order to make
457 * room for the earlier objects *unless* we need to defragment.
463 /* Unbind any ill-fitting objects or pin. */
464 list_for_each_entry(obj, objects, exec_list) {
465 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
466 bool need_fence, need_mappable;
472 has_fenced_gpu_access &&
473 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
474 obj->tiling_mode != I915_TILING_NONE;
475 need_mappable = need_fence || need_reloc_mappable(obj);
477 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
478 (need_mappable && !obj->map_and_fenceable))
479 ret = i915_gem_object_unbind(obj);
481 ret = i915_gem_execbuffer_reserve_object(obj, ring);
486 /* Bind fresh objects */
487 list_for_each_entry(obj, objects, exec_list) {
491 ret = i915_gem_execbuffer_reserve_object(obj, ring);
496 err: /* Decrement pin count for bound objects */
497 list_for_each_entry(obj, objects, exec_list)
498 i915_gem_execbuffer_unreserve_object(obj);
500 if (ret != -ENOSPC || retry++)
503 ret = i915_gem_evict_everything(ring->dev);
510 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
511 struct drm_file *file,
512 struct intel_ring_buffer *ring,
513 struct list_head *objects,
514 struct eb_objects *eb,
515 struct drm_i915_gem_exec_object2 *exec,
518 struct drm_i915_gem_relocation_entry *reloc;
519 struct drm_i915_gem_object *obj;
523 /* We may process another execbuffer during the unlock... */
524 while (!list_empty(objects)) {
525 obj = list_first_entry(objects,
526 struct drm_i915_gem_object,
528 list_del_init(&obj->exec_list);
529 drm_gem_object_unreference(&obj->base);
535 for (i = 0; i < count; i++)
536 total += exec[i].relocation_count;
538 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
539 reloc = drm_malloc_ab(total, sizeof(*reloc));
540 if (reloc == NULL || reloc_offset == NULL) {
541 drm_free_large(reloc);
542 drm_free_large(reloc_offset);
548 for (i = 0; i < count; i++) {
549 struct drm_i915_gem_relocation_entry __user *user_relocs;
550 u64 invalid_offset = (u64)-1;
553 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
555 if (copy_from_user(reloc+total, user_relocs,
556 exec[i].relocation_count * sizeof(*reloc))) {
562 /* As we do not update the known relocation offsets after
563 * relocating (due to the complexities in lock handling),
564 * we need to mark them as invalid now so that we force the
565 * relocation processing next time. Just in case the target
566 * object is evicted and then rebound into its old
567 * presumed_offset before the next execbuffer - if that
568 * happened we would make the mistake of assuming that the
569 * relocations were valid.
571 for (j = 0; j < exec[i].relocation_count; j++) {
572 if (copy_to_user(&user_relocs[j].presumed_offset,
574 sizeof(invalid_offset))) {
581 reloc_offset[i] = total;
582 total += exec[i].relocation_count;
585 ret = i915_mutex_lock_interruptible(dev);
591 /* reacquire the objects */
593 for (i = 0; i < count; i++) {
594 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
596 if (&obj->base == NULL) {
597 DRM_DEBUG("Invalid object handle %d at index %d\n",
603 list_add_tail(&obj->exec_list, objects);
604 obj->exec_handle = exec[i].handle;
605 obj->exec_entry = &exec[i];
606 eb_add_object(eb, obj);
609 ret = i915_gem_execbuffer_reserve(ring, file, objects);
613 list_for_each_entry(obj, objects, exec_list) {
614 int offset = obj->exec_entry - exec;
615 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
616 reloc + reloc_offset[offset]);
621 /* Leave the user relocations as are, this is the painfully slow path,
622 * and we want to avoid the complication of dropping the lock whilst
623 * having buffers reserved in the aperture and so causing spurious
624 * ENOSPC for random operations.
628 drm_free_large(reloc);
629 drm_free_large(reloc_offset);
634 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
636 u32 plane, flip_mask;
639 /* Check for any pending flips. As we only maintain a flip queue depth
640 * of 1, we can simply insert a WAIT for the next display flip prior
641 * to executing the batch and avoid stalling the CPU.
644 for (plane = 0; flips >> plane; plane++) {
645 if (((flips >> plane) & 1) == 0)
649 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
651 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
653 ret = intel_ring_begin(ring, 2);
657 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
658 intel_ring_emit(ring, MI_NOOP);
659 intel_ring_advance(ring);
666 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
667 struct list_head *objects)
669 struct drm_i915_gem_object *obj;
670 uint32_t flush_domains = 0;
674 list_for_each_entry(obj, objects, exec_list) {
675 ret = i915_gem_object_sync(obj, ring);
679 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
680 i915_gem_clflush_object(obj);
682 if (obj->base.pending_write_domain)
683 flips |= atomic_read(&obj->pending_flip);
685 flush_domains |= obj->base.write_domain;
689 ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
694 if (flush_domains & I915_GEM_DOMAIN_CPU)
695 i915_gem_chipset_flush(ring->dev);
697 if (flush_domains & I915_GEM_DOMAIN_GTT)
700 /* Unconditionally invalidate gpu caches and ensure that we do flush
701 * any residual writes from the previous batch.
703 return intel_ring_invalidate_all_caches(ring);
707 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
709 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
713 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
714 int count, vm_page_t ***map, int **maplen)
717 int relocs_total = 0;
718 int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
721 /* XXXKIB various limits checking is missing there */
722 *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
723 *maplen = malloc(count * sizeof(*maplen), DRM_I915_GEM, M_WAITOK |
726 for (i = 0; i < count; i++) {
727 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
728 int length; /* limited by fault_in_pages_readable() */
730 /* First check for malicious input causing overflow in
731 * the worst case where we need to allocate the entire
732 * relocation tree as a single array.
734 if (exec[i].relocation_count > relocs_max - relocs_total)
736 relocs_total += exec[i].relocation_count;
738 length = exec[i].relocation_count *
739 sizeof(struct drm_i915_gem_relocation_entry);
746 * Since both start and end of the relocation region
747 * may be not aligned on the page boundary, be
748 * conservative and request a page slot for each
749 * partial page. Thus +2.
753 page_count = howmany(length, PAGE_SIZE) + 2;
754 ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
755 DRM_I915_GEM, M_WAITOK | M_ZERO);
756 (*maplen)[i] = vm_fault_quick_hold_pages(
757 &curproc->p_vmspace->vm_map, (vm_offset_t)ptr, length,
758 VM_PROT_READ | VM_PROT_WRITE, ma, page_count);
759 if ((*maplen)[i] == -1) {
760 free(ma, DRM_I915_GEM);
770 i915_gem_execbuffer_move_to_active(struct list_head *objects,
771 struct intel_ring_buffer *ring)
773 struct drm_i915_gem_object *obj;
775 list_for_each_entry(obj, objects, exec_list) {
777 u32 old_read = obj->base.read_domains;
778 u32 old_write = obj->base.write_domain;
781 obj->base.read_domains = obj->base.pending_read_domains;
782 obj->base.write_domain = obj->base.pending_write_domain;
783 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
785 i915_gem_object_move_to_active(obj, ring);
786 if (obj->base.write_domain) {
788 obj->last_write_seqno = intel_ring_get_seqno(ring);
789 if (obj->pin_count) /* check for potential scanout */
790 intel_mark_fb_busy(obj);
793 CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
794 obj, old_read, old_write);
799 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
800 struct drm_file *file,
801 struct intel_ring_buffer *ring)
803 /* Unconditionally force add_request to emit a full flush. */
804 ring->gpu_caches_dirty = true;
806 /* Add a breadcrumb for the completion of the batch buffer */
807 (void)i915_add_request(ring, file, NULL);
811 i915_reset_gen7_sol_offsets(struct drm_device *dev,
812 struct intel_ring_buffer *ring)
814 drm_i915_private_t *dev_priv = dev->dev_private;
817 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
820 ret = intel_ring_begin(ring, 4 * 3);
824 for (i = 0; i < 4; i++) {
825 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
826 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
827 intel_ring_emit(ring, 0);
830 intel_ring_advance(ring);
836 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
837 struct drm_file *file,
838 struct drm_i915_gem_execbuffer2 *args,
839 struct drm_i915_gem_exec_object2 *exec)
841 drm_i915_private_t *dev_priv = dev->dev_private;
842 struct list_head objects;
843 struct eb_objects *eb;
844 struct drm_i915_gem_object *batch_obj;
845 struct drm_clip_rect *cliprects = NULL;
846 struct intel_ring_buffer *ring;
847 u32 ctx_id = i915_execbuffer2_get_context_id(*args);
848 u32 exec_start, exec_len;
852 vm_page_t **relocs_ma;
855 if (!i915_gem_check_execbuffer(args)) {
856 DRM_DEBUG("execbuf with invalid offset/length\n");
860 ret = validate_exec_list(exec, args->buffer_count,
861 &relocs_ma, &relocs_len);
866 if (args->flags & I915_EXEC_SECURE) {
867 if (!file->is_master || !capable(CAP_SYS_ADMIN)) {
872 flags |= I915_DISPATCH_SECURE;
874 if (args->flags & I915_EXEC_IS_PINNED)
875 flags |= I915_DISPATCH_PINNED;
877 switch (args->flags & I915_EXEC_RING_MASK) {
878 case I915_EXEC_DEFAULT:
879 case I915_EXEC_RENDER:
880 ring = &dev_priv->ring[RCS];
883 ring = &dev_priv->ring[VCS];
885 DRM_DEBUG("Ring %s doesn't support contexts\n",
892 ring = &dev_priv->ring[BCS];
894 DRM_DEBUG("Ring %s doesn't support contexts\n",
901 DRM_DEBUG("execbuf with unknown ring: %d\n",
902 (int)(args->flags & I915_EXEC_RING_MASK));
906 if (!intel_ring_initialized(ring)) {
907 DRM_DEBUG("execbuf with invalid ring: %d\n",
908 (int)(args->flags & I915_EXEC_RING_MASK));
913 mode = args->flags & I915_EXEC_CONSTANTS_MASK;
914 mask = I915_EXEC_CONSTANTS_MASK;
916 case I915_EXEC_CONSTANTS_REL_GENERAL:
917 case I915_EXEC_CONSTANTS_ABSOLUTE:
918 case I915_EXEC_CONSTANTS_REL_SURFACE:
919 if (ring == &dev_priv->ring[RCS] &&
920 mode != dev_priv->relative_constants_mode) {
921 if (INTEL_INFO(dev)->gen < 4) {
926 if (INTEL_INFO(dev)->gen > 5 &&
927 mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
932 /* The HW changed the meaning on this bit on gen6 */
933 if (INTEL_INFO(dev)->gen >= 6)
934 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
938 DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
943 if (args->buffer_count < 1) {
944 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
949 if (args->num_cliprects != 0) {
950 if (ring != &dev_priv->ring[RCS]) {
951 DRM_DEBUG("clip rectangles are only valid with the render ring\n");
956 if (INTEL_INFO(dev)->gen >= 5) {
957 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
962 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
963 DRM_DEBUG("execbuf with %u cliprects\n",
964 args->num_cliprects);
969 cliprects = malloc(args->num_cliprects * sizeof(*cliprects),
970 DRM_I915_GEM, M_WAITOK);
971 if (cliprects == NULL) {
976 if (copy_from_user(cliprects,
977 (struct drm_clip_rect __user *)(uintptr_t)
979 sizeof(*cliprects)*args->num_cliprects)) {
985 ret = i915_mutex_lock_interruptible(dev);
989 if (dev_priv->mm.suspended) {
995 eb = eb_create(args->buffer_count);
1002 /* Look up object handles */
1003 INIT_LIST_HEAD(&objects);
1004 for (i = 0; i < args->buffer_count; i++) {
1005 struct drm_i915_gem_object *obj;
1007 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1009 if (&obj->base == NULL) {
1010 DRM_DEBUG("Invalid object handle %d at index %d\n",
1012 /* prevent error path from reading uninitialized data */
1017 if (!list_empty(&obj->exec_list)) {
1018 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1019 obj, exec[i].handle, i);
1024 list_add_tail(&obj->exec_list, &objects);
1025 obj->exec_handle = exec[i].handle;
1026 obj->exec_entry = &exec[i];
1027 eb_add_object(eb, obj);
1030 /* take note of the batch buffer before we might reorder the lists */
1031 batch_obj = list_entry(objects.prev,
1032 struct drm_i915_gem_object,
1035 /* Move the objects en-masse into the GTT, evicting if necessary. */
1036 ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1040 /* The objects are in their final locations, apply the relocations. */
1041 ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1043 if (ret == -EFAULT) {
1044 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1047 args->buffer_count);
1048 DRM_LOCK_ASSERT(dev);
1054 /* Set the pending read domains for the batch buffer to COMMAND */
1055 if (batch_obj->base.pending_write_domain) {
1056 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1060 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1062 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1063 * batch" bit. Hence we need to pin secure batches into the global gtt.
1064 * hsw should have this fixed, but let's be paranoid and do it
1065 * unconditionally for now. */
1066 if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1067 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1069 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1073 ret = i915_switch_context(ring, file, ctx_id);
1077 if (ring == &dev_priv->ring[RCS] &&
1078 mode != dev_priv->relative_constants_mode) {
1079 ret = intel_ring_begin(ring, 4);
1083 intel_ring_emit(ring, MI_NOOP);
1084 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1085 intel_ring_emit(ring, INSTPM);
1086 intel_ring_emit(ring, mask << 16 | mode);
1087 intel_ring_advance(ring);
1089 dev_priv->relative_constants_mode = mode;
1092 if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1093 ret = i915_reset_gen7_sol_offsets(dev, ring);
1098 exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1099 exec_len = args->batch_len;
1101 for (i = 0; i < args->num_cliprects; i++) {
1102 ret = i915_emit_box(dev, &cliprects[i],
1103 args->DR1, args->DR4);
1107 ret = ring->dispatch_execbuffer(ring,
1108 exec_start, exec_len,
1114 ret = ring->dispatch_execbuffer(ring,
1115 exec_start, exec_len,
1121 CTR3(KTR_DRM, "ring_dispatch ring=%s seqno=%d flags=%u", ring->name,
1122 intel_ring_get_seqno(ring), flags);
1124 i915_gem_execbuffer_move_to_active(&objects, ring);
1125 i915_gem_execbuffer_retire_commands(dev, file, ring);
1129 while (!list_empty(&objects)) {
1130 struct drm_i915_gem_object *obj;
1132 obj = list_first_entry(&objects,
1133 struct drm_i915_gem_object,
1135 list_del_init(&obj->exec_list);
1136 drm_gem_object_unreference(&obj->base);
1142 for (i = 0; i < args->buffer_count; i++) {
1143 if (relocs_ma[i] != NULL) {
1144 vm_page_unhold_pages(relocs_ma[i], relocs_len[i]);
1145 free(relocs_ma[i], DRM_I915_GEM);
1148 free(relocs_len, DRM_I915_GEM);
1149 free(relocs_ma, DRM_I915_GEM);
1150 free(cliprects, DRM_I915_GEM);
1155 * Legacy execbuffer just creates an exec2 list from the original exec object
1156 * list array and passes it to the real function.
1159 i915_gem_execbuffer(struct drm_device *dev, void *data,
1160 struct drm_file *file)
1162 struct drm_i915_gem_execbuffer *args = data;
1163 struct drm_i915_gem_execbuffer2 exec2;
1164 struct drm_i915_gem_exec_object *exec_list = NULL;
1165 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1168 if (args->buffer_count < 1) {
1169 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1173 /* Copy in the exec list from userland */
1174 /* XXXKIB user-controlled malloc size */
1175 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1176 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1177 if (exec_list == NULL || exec2_list == NULL) {
1178 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1179 args->buffer_count);
1180 drm_free_large(exec_list);
1181 drm_free_large(exec2_list);
1184 ret = copy_from_user(exec_list,
1185 (void __user *)(uintptr_t)args->buffers_ptr,
1186 sizeof(*exec_list) * args->buffer_count);
1188 DRM_DEBUG("copy %d exec entries failed %d\n",
1189 args->buffer_count, ret);
1190 drm_free_large(exec_list);
1191 drm_free_large(exec2_list);
1195 for (i = 0; i < args->buffer_count; i++) {
1196 exec2_list[i].handle = exec_list[i].handle;
1197 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1198 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1199 exec2_list[i].alignment = exec_list[i].alignment;
1200 exec2_list[i].offset = exec_list[i].offset;
1201 if (INTEL_INFO(dev)->gen < 4)
1202 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1204 exec2_list[i].flags = 0;
1207 exec2.buffers_ptr = args->buffers_ptr;
1208 exec2.buffer_count = args->buffer_count;
1209 exec2.batch_start_offset = args->batch_start_offset;
1210 exec2.batch_len = args->batch_len;
1211 exec2.DR1 = args->DR1;
1212 exec2.DR4 = args->DR4;
1213 exec2.num_cliprects = args->num_cliprects;
1214 exec2.cliprects_ptr = args->cliprects_ptr;
1215 exec2.flags = I915_EXEC_RENDER;
1216 i915_execbuffer2_set_context_id(exec2, 0);
1218 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1220 /* Copy the new buffer offsets back to the user's exec list. */
1221 for (i = 0; i < args->buffer_count; i++)
1222 exec_list[i].offset = exec2_list[i].offset;
1223 /* ... and back out to userspace */
1224 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1226 sizeof(*exec_list) * args->buffer_count);
1229 DRM_DEBUG("failed to copy %d exec entries "
1230 "back to user (%d)\n",
1231 args->buffer_count, ret);
1235 drm_free_large(exec_list);
1236 drm_free_large(exec2_list);
1241 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1242 struct drm_file *file)
1244 struct drm_i915_gem_execbuffer2 *args = data;
1245 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1248 if (args->buffer_count < 1 ||
1249 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1250 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1254 /* XXXKIB user-controllable malloc size */
1255 exec2_list = malloc(sizeof(*exec2_list)*args->buffer_count,
1256 DRM_I915_GEM, M_WAITOK);
1257 if (exec2_list == NULL) {
1258 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1259 args->buffer_count);
1262 ret = copy_from_user(exec2_list,
1263 (struct drm_i915_relocation_entry __user *)
1264 (uintptr_t) args->buffers_ptr,
1265 sizeof(*exec2_list) * args->buffer_count);
1267 DRM_DEBUG("copy %d exec entries failed %d\n",
1268 args->buffer_count, ret);
1269 free(exec2_list, DRM_I915_GEM);
1273 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1275 /* Copy the new buffer offsets back to the user's exec list. */
1276 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1278 sizeof(*exec2_list) * args->buffer_count);
1281 DRM_DEBUG("failed to copy %d exec entries "
1282 "back to user (%d)\n",
1283 args->buffer_count, ret);
1287 free(exec2_list, DRM_I915_GEM);