sys/dev/drm2/i915/i915_gem_execbuffer.c

   1 /*
   2  * Copyright © 2008,2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *    Chris Wilson <chris@chris-wilson.co.uk>
  26  *
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <dev/drm2/drmP.h>
  33 #include <dev/drm2/drm.h>
  34 #include <dev/drm2/i915/i915_drm.h>
  35 #include <dev/drm2/i915/i915_drv.h>
  36 #include <dev/drm2/i915/intel_drv.h>
  37 #include <sys/limits.h>
  38 #include <sys/sf_buf.h>
  39
  40 struct change_domains {
  41         uint32_t invalidate_domains;
  42         uint32_t flush_domains;
  43         uint32_t flush_rings;
  44         uint32_t flips;
  45 };
  46
  47 /*
  48  * Set the next domain for the specified object. This
  49  * may not actually perform the necessary flushing/invaliding though,
  50  * as that may want to be batched with other set_domain operations
  51  *
  52  * This is (we hope) the only really tricky part of gem. The goal
  53  * is fairly simple -- track which caches hold bits of the object
  54  * and make sure they remain coherent. A few concrete examples may
  55  * help to explain how it works. For shorthand, we use the notation
  56  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
  57  * a pair of read and write domain masks.
  58  *
  59  * Case 1: the batch buffer
  60  *
  61  *      1. Allocated
  62  *      2. Written by CPU
  63  *      3. Mapped to GTT
  64  *      4. Read by GPU
  65  *      5. Unmapped from GTT
  66  *      6. Freed
  67  *
  68  *      Let's take these a step at a time
  69  *
  70  *      1. Allocated
  71  *              Pages allocated from the kernel may still have
  72  *              cache contents, so we set them to (CPU, CPU) always.
  73  *      2. Written by CPU (using pwrite)
  74  *              The pwrite function calls set_domain (CPU, CPU) and
  75  *              this function does nothing (as nothing changes)
  76  *      3. Mapped by GTT
  77  *              This function asserts that the object is not
  78  *              currently in any GPU-based read or write domains
  79  *      4. Read by GPU
  80  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
  81  *              As write_domain is zero, this function adds in the
  82  *              current read domains (CPU+COMMAND, 0).
  83  *              flush_domains is set to CPU.
  84  *              invalidate_domains is set to COMMAND
  85  *              clflush is run to get data out of the CPU caches
  86  *              then i915_dev_set_domain calls i915_gem_flush to
  87  *              emit an MI_FLUSH and drm_agp_chipset_flush
  88  *      5. Unmapped from GTT
  89  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
  90  *              flush_domains and invalidate_domains end up both zero
  91  *              so no flushing/invalidating happens
  92  *      6. Freed
  93  *              yay, done
  94  *
  95  * Case 2: The shared render buffer
  96  *
  97  *      1. Allocated
  98  *      2. Mapped to GTT
  99  *      3. Read/written by GPU
 100  *      4. set_domain to (CPU,CPU)
 101  *      5. Read/written by CPU
 102  *      6. Read/written by GPU
 103  *
 104  *      1. Allocated
 105  *              Same as last example, (CPU, CPU)
 106  *      2. Mapped to GTT
 107  *              Nothing changes (assertions find that it is not in the GPU)
 108  *      3. Read/written by GPU
 109  *              execbuffer calls set_domain (RENDER, RENDER)
 110  *              flush_domains gets CPU
 111  *              invalidate_domains gets GPU
 112  *              clflush (obj)
 113  *              MI_FLUSH and drm_agp_chipset_flush
 114  *      4. set_domain (CPU, CPU)
 115  *              flush_domains gets GPU
 116  *              invalidate_domains gets CPU
 117  *              wait_rendering (obj) to make sure all drawing is complete.
 118  *              This will include an MI_FLUSH to get the data from GPU
 119  *              to memory
 120  *              clflush (obj) to invalidate the CPU cache
 121  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
 122  *      5. Read/written by CPU
 123  *              cache lines are loaded and dirtied
 124  *      6. Read written by GPU
 125  *              Same as last GPU access
 126  *
 127  * Case 3: The constant buffer
 128  *
 129  *      1. Allocated
 130  *      2. Written by CPU
 131  *      3. Read by GPU
 132  *      4. Updated (written) by CPU again
 133  *      5. Read by GPU
 134  *
 135  *      1. Allocated
 136  *              (CPU, CPU)
 137  *      2. Written by CPU
 138  *              (CPU, CPU)
 139  *      3. Read by GPU
 140  *              (CPU+RENDER, 0)
 141  *              flush_domains = CPU
 142  *              invalidate_domains = RENDER
 143  *              clflush (obj)
 144  *              MI_FLUSH
 145  *              drm_agp_chipset_flush
 146  *      4. Updated (written) by CPU again
 147  *              (CPU, CPU)
 148  *              flush_domains = 0 (no previous write domain)
 149  *              invalidate_domains = 0 (no new read domains)
 150  *      5. Read by GPU
 151  *              (CPU+RENDER, 0)
 152  *              flush_domains = CPU
 153  *              invalidate_domains = RENDER
 154  *              clflush (obj)
 155  *              MI_FLUSH
 156  *              drm_agp_chipset_flush
 157  */
 158 static void
 159 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 160                                   struct intel_ring_buffer *ring,
 161                                   struct change_domains *cd)
 162 {
 163         uint32_t invalidate_domains = 0, flush_domains = 0;
 164
 165         /*
 166          * If the object isn't moving to a new write domain,
 167          * let the object stay in multiple read domains
 168          */
 169         if (obj->base.pending_write_domain == 0)
 170                 obj->base.pending_read_domains |= obj->base.read_domains;
 171
 172         /*
 173          * Flush the current write domain if
 174          * the new read domains don't match. Invalidate
 175          * any read domains which differ from the old
 176          * write domain
 177          */
 178         if (obj->base.write_domain &&
 179             (((obj->base.write_domain != obj->base.pending_read_domains ||
 180                obj->ring != ring)) ||
 181              (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
 182                 flush_domains |= obj->base.write_domain;
 183                 invalidate_domains |=
 184                         obj->base.pending_read_domains & ~obj->base.write_domain;
 185         }
 186         /*
 187          * Invalidate any read caches which may have
 188          * stale data. That is, any new read domains.
 189          */
 190         invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
 191         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
 192                 i915_gem_clflush_object(obj);
 193
 194         if (obj->base.pending_write_domain)
 195                 cd->flips |= atomic_load_acq_int(&obj->pending_flip);
 196
 197         /* The actual obj->write_domain will be updated with
 198          * pending_write_domain after we emit the accumulated flush for all
 199          * of our domain changes in execbuffers (which clears objects'
 200          * write_domains).  So if we have a current write domain that we
 201          * aren't changing, set pending_write_domain to that.
 202          */
 203         if (flush_domains == 0 && obj->base.pending_write_domain == 0)
 204                 obj->base.pending_write_domain = obj->base.write_domain;
 205
 206         cd->invalidate_domains |= invalidate_domains;
 207         cd->flush_domains |= flush_domains;
 208         if (flush_domains & I915_GEM_GPU_DOMAINS)
 209                 cd->flush_rings |= intel_ring_flag(obj->ring);
 210         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
 211                 cd->flush_rings |= intel_ring_flag(ring);
 212 }
 213
 214 struct eb_objects {
 215         u_long hashmask;
 216         LIST_HEAD(, drm_i915_gem_object) *buckets;
 217 };
 218
 219 static struct eb_objects *
 220 eb_create(int size)
 221 {
 222         struct eb_objects *eb;
 223
 224         eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO);
 225         eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask);
 226         return (eb);
 227 }
 228
 229 static void
 230 eb_reset(struct eb_objects *eb)
 231 {
 232         int i;
 233
 234         for (i = 0; i <= eb->hashmask; i++)
 235                 LIST_INIT(&eb->buckets[i]);
 236 }
 237
 238 static void
 239 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
 240 {
 241
 242         LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask],
 243             obj, exec_node);
 244 }
 245
 246 static struct drm_i915_gem_object *
 247 eb_get_object(struct eb_objects *eb, unsigned long handle)
 248 {
 249         struct drm_i915_gem_object *obj;
 250
 251         LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) {
 252                 if (obj->exec_handle == handle)
 253                         return (obj);
 254         }
 255         return (NULL);
 256 }
 257
 258 static void
 259 eb_destroy(struct eb_objects *eb)
 260 {
 261
 262         free(eb->buckets, DRM_I915_GEM);
 263         free(eb, DRM_I915_GEM);
 264 }
 265
 266 static int
 267 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 268                                    struct eb_objects *eb,
 269                                    struct drm_i915_gem_relocation_entry *reloc)
 270 {
 271         struct drm_device *dev = obj->base.dev;
 272         struct drm_gem_object *target_obj;
 273         uint32_t target_offset;
 274         int ret = -EINVAL;
 275
 276         /* we've already hold a reference to all valid objects */
 277         target_obj = &eb_get_object(eb, reloc->target_handle)->base;
 278         if (unlikely(target_obj == NULL))
 279                 return -ENOENT;
 280
 281         target_offset = to_intel_bo(target_obj)->gtt_offset;
 282
 283 #if WATCH_RELOC
 284         DRM_INFO("%s: obj %p offset %08x target %d "
 285                  "read %08x write %08x gtt %08x "
 286                  "presumed %08x delta %08x\n",
 287                  __func__,
 288                  obj,
 289                  (int) reloc->offset,
 290                  (int) reloc->target_handle,
 291                  (int) reloc->read_domains,
 292                  (int) reloc->write_domain,
 293                  (int) target_offset,
 294                  (int) reloc->presumed_offset,
 295                  reloc->delta);
 296 #endif
 297
 298         /* The target buffer should have appeared before us in the
 299          * exec_object list, so it should have a GTT space bound by now.
 300          */
 301         if (unlikely(target_offset == 0)) {
 302                 DRM_DEBUG("No GTT space found for object %d\n",
 303                           reloc->target_handle);
 304                 return ret;
 305         }
 306
 307         /* Validate that the target is in a valid r/w GPU domain */
 308         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 309                 DRM_DEBUG("reloc with multiple write domains: "
 310                           "obj %p target %d offset %d "
 311                           "read %08x write %08x",
 312                           obj, reloc->target_handle,
 313                           (int) reloc->offset,
 314                           reloc->read_domains,
 315                           reloc->write_domain);
 316                 return ret;
 317         }
 318         if (unlikely((reloc->write_domain | reloc->read_domains)
 319                      & ~I915_GEM_GPU_DOMAINS)) {
 320                 DRM_DEBUG("reloc with read/write non-GPU domains: "
 321                           "obj %p target %d offset %d "
 322                           "read %08x write %08x",
 323                           obj, reloc->target_handle,
 324                           (int) reloc->offset,
 325                           reloc->read_domains,
 326                           reloc->write_domain);
 327                 return ret;
 328         }
 329         if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
 330                      reloc->write_domain != target_obj->pending_write_domain)) {
 331                 DRM_DEBUG("Write domain conflict: "
 332                           "obj %p target %d offset %d "
 333                           "new %08x old %08x\n",
 334                           obj, reloc->target_handle,
 335                           (int) reloc->offset,
 336                           reloc->write_domain,
 337                           target_obj->pending_write_domain);
 338                 return ret;
 339         }
 340
 341         target_obj->pending_read_domains |= reloc->read_domains;
 342         target_obj->pending_write_domain |= reloc->write_domain;
 343
 344         /* If the relocation already has the right value in it, no
 345          * more work needs to be done.
 346          */
 347         if (target_offset == reloc->presumed_offset)
 348                 return 0;
 349
 350         /* Check that the relocation address is valid... */
 351         if (unlikely(reloc->offset > obj->base.size - 4)) {
 352                 DRM_DEBUG("Relocation beyond object bounds: "
 353                           "obj %p target %d offset %d size %d.\n",
 354                           obj, reloc->target_handle,
 355                           (int) reloc->offset,
 356                           (int) obj->base.size);
 357                 return ret;
 358         }
 359         if (unlikely(reloc->offset & 3)) {
 360                 DRM_DEBUG("Relocation not 4-byte aligned: "
 361                           "obj %p target %d offset %d.\n",
 362                           obj, reloc->target_handle,
 363                           (int) reloc->offset);
 364                 return ret;
 365         }
 366
 367         reloc->delta += target_offset;
 368         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 369                 uint32_t page_offset = reloc->offset & PAGE_MASK;
 370                 char *vaddr;
 371                 struct sf_buf *sf;
 372
 373                 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)],
 374                     SFB_NOWAIT);
 375                 if (sf == NULL)
 376                         return (-ENOMEM);
 377                 vaddr = (void *)sf_buf_kva(sf);
 378                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
 379                 sf_buf_free(sf);
 380         } else {
 381                 uint32_t *reloc_entry;
 382                 char *reloc_page;
 383
 384                 /* We can't wait for rendering with pagefaults disabled */
 385                 if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0)
 386                         return (-EFAULT);
 387                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 388                 if (ret)
 389                         return ret;
 390
 391                 /*
 392                  * Map the page containing the relocation we're going
 393                  * to perform.
 394                  */
 395                 reloc->offset += obj->gtt_offset;
 396                 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
 397                     ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
 398                 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset &
 399                     PAGE_MASK));
 400                 *(volatile uint32_t *)reloc_entry = reloc->delta;
 401                 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
 402         }
 403
 404         /* and update the user's relocation entry */
 405         reloc->presumed_offset = target_offset;
 406
 407         return 0;
 408 }
 409
 410 static int
 411 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
 412     struct eb_objects *eb)
 413 {
 414         struct drm_i915_gem_relocation_entry *user_relocs;
 415         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 416         struct drm_i915_gem_relocation_entry reloc;
 417         int i, ret;
 418
 419         user_relocs = (void *)(uintptr_t)entry->relocs_ptr;
 420         for (i = 0; i < entry->relocation_count; i++) {
 421                 ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc));
 422                 if (ret != 0)
 423                         return (ret);
 424
 425                 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
 426                 if (ret != 0)
 427                         return (ret);
 428
 429                 ret = -copyout_nofault(&reloc.presumed_offset,
 430                     &user_relocs[i].presumed_offset,
 431                     sizeof(reloc.presumed_offset));
 432                 if (ret != 0)
 433                         return (ret);
 434         }
 435
 436         return (0);
 437 }
 438
 439 static int
 440 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
 441     struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs)
 442 {
 443         const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 444         int i, ret;
 445
 446         for (i = 0; i < entry->relocation_count; i++) {
 447                 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
 448                 if (ret)
 449                         return ret;
 450         }
 451
 452         return 0;
 453 }
 454
 455 static int
 456 i915_gem_execbuffer_relocate(struct drm_device *dev,
 457                              struct eb_objects *eb,
 458                              struct list_head *objects)
 459 {
 460         struct drm_i915_gem_object *obj;
 461         int ret, pflags;
 462
 463         /* Try to move as many of the relocation targets off the active list
 464          * to avoid unnecessary fallbacks to the slow path, as we cannot wait
 465          * for the retirement with pagefaults disabled.
 466          */
 467         i915_gem_retire_requests(dev);
 468
 469         ret = 0;
 470         pflags = vm_fault_disable_pagefaults();
 471         /* This is the fast path and we cannot handle a pagefault whilst
 472          * holding the device lock lest the user pass in the relocations
 473          * contained within a mmaped bo. For in such a case we, the page
 474          * fault handler would call i915_gem_fault() and we would try to
 475          * acquire the device lock again. Obviously this is bad.
 476          */
 477
 478         list_for_each_entry(obj, objects, exec_list) {
 479                 ret = i915_gem_execbuffer_relocate_object(obj, eb);
 480                 if (ret != 0)
 481                         break;
 482         }
 483         vm_fault_enable_pagefaults(pflags);
 484         return (ret);
 485 }
 486
 487 #define  __EXEC_OBJECT_HAS_FENCE (1<<31)
 488
 489 static int
 490 pin_and_fence_object(struct drm_i915_gem_object *obj,
 491                      struct intel_ring_buffer *ring)
 492 {
 493         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 494         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 495         bool need_fence, need_mappable;
 496         int ret;
 497
 498         need_fence =
 499                 has_fenced_gpu_access &&
 500                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 501                 obj->tiling_mode != I915_TILING_NONE;
 502         need_mappable =
 503                 entry->relocation_count ? true : need_fence;
 504
 505         ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
 506         if (ret)
 507                 return ret;
 508
 509         if (has_fenced_gpu_access) {
 510                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 511                         if (obj->tiling_mode) {
 512                                 ret = i915_gem_object_get_fence(obj, ring);
 513                                 if (ret)
 514                                         goto err_unpin;
 515
 516                                 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 517                                 i915_gem_object_pin_fence(obj);
 518                         } else {
 519                                 ret = i915_gem_object_put_fence(obj);
 520                                 if (ret)
 521                                         goto err_unpin;
 522                         }
 523                         obj->pending_fenced_gpu_access = true;
 524                 }
 525         }
 526
 527         entry->offset = obj->gtt_offset;
 528         return 0;
 529
 530 err_unpin:
 531         i915_gem_object_unpin(obj);
 532         return ret;
 533 }
 534
 535 static int
 536 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 537                             struct drm_file *file,
 538                             struct list_head *objects)
 539 {
 540         drm_i915_private_t *dev_priv;
 541         struct drm_i915_gem_object *obj;
 542         int ret, retry;
 543         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 544         struct list_head ordered_objects;
 545
 546         dev_priv = ring->dev->dev_private;
 547         INIT_LIST_HEAD(&ordered_objects);
 548         while (!list_empty(objects)) {
 549                 struct drm_i915_gem_exec_object2 *entry;
 550                 bool need_fence, need_mappable;
 551
 552                 obj = list_first_entry(objects,
 553                                        struct drm_i915_gem_object,
 554                                        exec_list);
 555                 entry = obj->exec_entry;
 556
 557                 need_fence =
 558                         has_fenced_gpu_access &&
 559                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 560                         obj->tiling_mode != I915_TILING_NONE;
 561                 need_mappable =
 562                         entry->relocation_count ? true : need_fence;
 563
 564                 if (need_mappable)
 565                         list_move(&obj->exec_list, &ordered_objects);
 566                 else
 567                         list_move_tail(&obj->exec_list, &ordered_objects);
 568
 569                 obj->base.pending_read_domains = 0;
 570                 obj->base.pending_write_domain = 0;
 571         }
 572         list_splice(&ordered_objects, objects);
 573
 574         /* Attempt to pin all of the buffers into the GTT.
 575          * This is done in 3 phases:
 576          *
 577          * 1a. Unbind all objects that do not match the GTT constraints for
 578          *     the execbuffer (fenceable, mappable, alignment etc).
 579          * 1b. Increment pin count for already bound objects and obtain
 580          *     a fence register if required.
 581          * 2.  Bind new objects.
 582          * 3.  Decrement pin count.
 583          *
 584          * This avoid unnecessary unbinding of later objects in order to makr
 585          * room for the earlier objects *unless* we need to defragment.
 586          */
 587         retry = 0;
 588         do {
 589                 ret = 0;
 590
 591                 /* Unbind any ill-fitting objects or pin. */
 592                 list_for_each_entry(obj, objects, exec_list) {
 593                         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 594                         bool need_fence, need_mappable;
 595
 596                         if (!obj->gtt_space)
 597                                 continue;
 598
 599                         need_fence =
 600                                 has_fenced_gpu_access &&
 601                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 602                                 obj->tiling_mode != I915_TILING_NONE;
 603                         need_mappable =
 604                                 entry->relocation_count ? true : need_fence;
 605
 606                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
 607                             (need_mappable && !obj->map_and_fenceable))
 608                                 ret = i915_gem_object_unbind(obj);
 609                         else
 610                                 ret = pin_and_fence_object(obj, ring);
 611                         if (ret)
 612                                 goto err;
 613                 }
 614
 615                 /* Bind fresh objects */
 616                 list_for_each_entry(obj, objects, exec_list) {
 617                         if (obj->gtt_space)
 618                                 continue;
 619
 620                         ret = pin_and_fence_object(obj, ring);
 621                         if (ret) {
 622                                 int ret_ignore;
 623
 624                                 /* This can potentially raise a harmless
 625                                  * -EINVAL if we failed to bind in the above
 626                                  * call. It cannot raise -EINTR since we know
 627                                  * that the bo is freshly bound and so will
 628                                  * not need to be flushed or waited upon.
 629                                  */
 630                                 ret_ignore = i915_gem_object_unbind(obj);
 631                                 (void)ret_ignore;
 632                                 if (obj->gtt_space != NULL)
 633                                         printf("%s: gtt_space\n", __func__);
 634                                 break;
 635                         }
 636                 }
 637
 638                 /* Decrement pin count for bound objects */
 639                 list_for_each_entry(obj, objects, exec_list) {
 640                         struct drm_i915_gem_exec_object2 *entry;
 641
 642                         if (!obj->gtt_space)
 643                                 continue;
 644
 645                         entry = obj->exec_entry;
 646                         if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
 647                                 i915_gem_object_unpin_fence(obj);
 648                                 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
 649                         }
 650
 651                         i915_gem_object_unpin(obj);
 652
 653                         /* ... and ensure ppgtt mapping exist if needed. */
 654                         if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
 655                                 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
 656                                                        obj, obj->cache_level);
 657
 658                                 obj->has_aliasing_ppgtt_mapping = 1;
 659                         }
 660                 }
 661
 662                 if (ret != -ENOSPC || retry > 1)
 663                         return ret;
 664
 665                 /* First attempt, just clear anything that is purgeable.
 666                  * Second attempt, clear the entire GTT.
 667                  */
 668                 ret = i915_gem_evict_everything(ring->dev, retry == 0);
 669                 if (ret)
 670                         return ret;
 671
 672                 retry++;
 673         } while (1);
 674
 675 err:
 676         list_for_each_entry_continue_reverse(obj, objects, exec_list) {
 677                 struct drm_i915_gem_exec_object2 *entry;
 678
 679                 if (!obj->gtt_space)
 680                         continue;
 681
 682                 entry = obj->exec_entry;
 683                 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
 684                         i915_gem_object_unpin_fence(obj);
 685                         entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
 686                 }
 687
 688                 i915_gem_object_unpin(obj);
 689         }
 690
 691         return ret;
 692 }
 693
 694 static int
 695 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 696     struct drm_file *file, struct intel_ring_buffer *ring,
 697     struct list_head *objects, struct eb_objects *eb,
 698     struct drm_i915_gem_exec_object2 *exec, int count)
 699 {
 700         struct drm_i915_gem_relocation_entry *reloc;
 701         struct drm_i915_gem_object *obj;
 702         int *reloc_offset;
 703         int i, total, ret;
 704
 705         /* We may process another execbuffer during the unlock... */
 706         while (!list_empty(objects)) {
 707                 obj = list_first_entry(objects,
 708                                        struct drm_i915_gem_object,
 709                                        exec_list);
 710                 list_del_init(&obj->exec_list);
 711                 drm_gem_object_unreference(&obj->base);
 712         }
 713
 714         DRM_UNLOCK(dev);
 715
 716         total = 0;
 717         for (i = 0; i < count; i++)
 718                 total += exec[i].relocation_count;
 719
 720         reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM,
 721             M_WAITOK | M_ZERO);
 722         reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO);
 723
 724         total = 0;
 725         for (i = 0; i < count; i++) {
 726                 struct drm_i915_gem_relocation_entry *user_relocs;
 727
 728                 user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr;
 729                 ret = -copyin(user_relocs, reloc + total,
 730                     exec[i].relocation_count * sizeof(*reloc));
 731                 if (ret != 0) {
 732                         DRM_LOCK(dev);
 733                         goto err;
 734                 }
 735
 736                 reloc_offset[i] = total;
 737                 total += exec[i].relocation_count;
 738         }
 739
 740         ret = i915_mutex_lock_interruptible(dev);
 741         if (ret) {
 742                 DRM_LOCK(dev);
 743                 goto err;
 744         }
 745
 746         /* reacquire the objects */
 747         eb_reset(eb);
 748         for (i = 0; i < count; i++) {
 749                 struct drm_i915_gem_object *obj;
 750
 751                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
 752                                                         exec[i].handle));
 753                 if (&obj->base == NULL) {
 754                         DRM_DEBUG("Invalid object handle %d at index %d\n",
 755                                    exec[i].handle, i);
 756                         ret = -ENOENT;
 757                         goto err;
 758                 }
 759
 760                 list_add_tail(&obj->exec_list, objects);
 761                 obj->exec_handle = exec[i].handle;
 762                 obj->exec_entry = &exec[i];
 763                 eb_add_object(eb, obj);
 764         }
 765
 766         ret = i915_gem_execbuffer_reserve(ring, file, objects);
 767         if (ret)
 768                 goto err;
 769
 770         list_for_each_entry(obj, objects, exec_list) {
 771                 int offset = obj->exec_entry - exec;
 772                 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
 773                     reloc + reloc_offset[offset]);
 774                 if (ret)
 775                         goto err;
 776         }
 777
 778         /* Leave the user relocations as are, this is the painfully slow path,
 779          * and we want to avoid the complication of dropping the lock whilst
 780          * having buffers reserved in the aperture and so causing spurious
 781          * ENOSPC for random operations.
 782          */
 783
 784 err:
 785         free(reloc, DRM_I915_GEM);
 786         free(reloc_offset, DRM_I915_GEM);
 787         return ret;
 788 }
 789
 790 static int
 791 i915_gem_execbuffer_flush(struct drm_device *dev,
 792                           uint32_t invalidate_domains,
 793                           uint32_t flush_domains,
 794                           uint32_t flush_rings)
 795 {
 796         drm_i915_private_t *dev_priv = dev->dev_private;
 797         int i, ret;
 798
 799         if (flush_domains & I915_GEM_DOMAIN_CPU)
 800                 intel_gtt_chipset_flush();
 801
 802         if (flush_domains & I915_GEM_DOMAIN_GTT)
 803                 wmb();
 804
 805         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
 806                 for (i = 0; i < I915_NUM_RINGS; i++)
 807                         if (flush_rings & (1 << i)) {
 808                                 ret = i915_gem_flush_ring(&dev_priv->rings[i],
 809                                     invalidate_domains, flush_domains);
 810                                 if (ret)
 811                                         return ret;
 812                         }
 813         }
 814
 815         return 0;
 816 }
 817
 818 static bool
 819 intel_enable_semaphores(struct drm_device *dev)
 820 {
 821         if (INTEL_INFO(dev)->gen < 6)
 822                 return 0;
 823
 824         if (i915_semaphores >= 0)
 825                 return i915_semaphores;
 826
 827         /* Enable semaphores on SNB when IO remapping is off */
 828         if (INTEL_INFO(dev)->gen == 6)
 829                 return !intel_iommu_enabled;
 830
 831         return 1;
 832 }
 833
 834 static int
 835 i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 836                                struct intel_ring_buffer *to)
 837 {
 838         struct intel_ring_buffer *from = obj->ring;
 839         u32 seqno;
 840         int ret, idx;
 841
 842         if (from == NULL || to == from)
 843                 return 0;
 844
 845         /* XXX gpu semaphores are implicated in various hard hangs on SNB */
 846         if (!intel_enable_semaphores(obj->base.dev))
 847                 return i915_gem_object_wait_rendering(obj);
 848
 849         idx = intel_ring_sync_index(from, to);
 850
 851         seqno = obj->last_rendering_seqno;
 852         if (seqno <= from->sync_seqno[idx])
 853                 return 0;
 854
 855         if (seqno == from->outstanding_lazy_request) {
 856                 struct drm_i915_gem_request *request;
 857
 858                 request = malloc(sizeof(*request), DRM_I915_GEM,
 859                     M_WAITOK | M_ZERO);
 860                 ret = i915_add_request(from, NULL, request);
 861                 if (ret) {
 862                         free(request, DRM_I915_GEM);
 863                         return ret;
 864                 }
 865
 866                 seqno = request->seqno;
 867         }
 868
 869         from->sync_seqno[idx] = seqno;
 870
 871         return to->sync_to(to, from, seqno - 1);
 872 }
 873
 874 static int
 875 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
 876 {
 877         u32 plane, flip_mask;
 878         int ret;
 879
 880         /* Check for any pending flips. As we only maintain a flip queue depth
 881          * of 1, we can simply insert a WAIT for the next display flip prior
 882          * to executing the batch and avoid stalling the CPU.
 883          */
 884
 885         for (plane = 0; flips >> plane; plane++) {
 886                 if (((flips >> plane) & 1) == 0)
 887                         continue;
 888
 889                 if (plane)
 890                         flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
 891                 else
 892                         flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
 893
 894                 ret = intel_ring_begin(ring, 2);
 895                 if (ret)
 896                         return ret;
 897
 898                 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
 899                 intel_ring_emit(ring, MI_NOOP);
 900                 intel_ring_advance(ring);
 901         }
 902
 903         return 0;
 904 }
 905
 906 static int
 907 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 908                                 struct list_head *objects)
 909 {
 910         struct drm_i915_gem_object *obj;
 911         struct change_domains cd;
 912         int ret;
 913
 914         memset(&cd, 0, sizeof(cd));
 915         list_for_each_entry(obj, objects, exec_list)
 916                 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
 917
 918         if (cd.invalidate_domains | cd.flush_domains) {
 919 #if WATCH_EXEC
 920                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
 921                           __func__,
 922                          cd.invalidate_domains,
 923                          cd.flush_domains);
 924 #endif
 925                 ret = i915_gem_execbuffer_flush(ring->dev,
 926                                                 cd.invalidate_domains,
 927                                                 cd.flush_domains,
 928                                                 cd.flush_rings);
 929                 if (ret)
 930                         return ret;
 931         }
 932
 933         if (cd.flips) {
 934                 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
 935                 if (ret)
 936                         return ret;
 937         }
 938
 939         list_for_each_entry(obj, objects, exec_list) {
 940                 ret = i915_gem_execbuffer_sync_rings(obj, ring);
 941                 if (ret)
 942                         return ret;
 943         }
 944
 945         return 0;
 946 }
 947
 948 static bool
 949 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 950 {
 951         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 952 }
 953
 954 static int
 955 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count,
 956     vm_page_t ***map)
 957 {
 958         vm_page_t *ma;
 959         int i, length, page_count;
 960
 961         /* XXXKIB various limits checking is missing there */
 962         *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO);
 963         for (i = 0; i < count; i++) {
 964                 /* First check for malicious input causing overflow */
 965                 if (exec[i].relocation_count >
 966                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
 967                         return -EINVAL;
 968
 969                 length = exec[i].relocation_count *
 970                     sizeof(struct drm_i915_gem_relocation_entry);
 971                 if (length == 0) {
 972                         (*map)[i] = NULL;
 973                         continue;
 974                 }
 975                 /*
 976                  * Since both start and end of the relocation region
 977                  * may be not aligned on the page boundary, be
 978                  * conservative and request a page slot for each
 979                  * partial page.  Thus +2.
 980                  */
 981                 page_count = howmany(length, PAGE_SIZE) + 2;
 982                 ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t),
 983                     DRM_I915_GEM, M_WAITOK | M_ZERO);
 984                 if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
 985                     exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE,
 986                     ma, page_count) == -1) {
 987                         free(ma, DRM_I915_GEM);
 988                         (*map)[i] = NULL;
 989                         return (-EFAULT);
 990                 }
 991         }
 992
 993         return 0;
 994 }
 995
 996 static void
 997 i915_gem_execbuffer_move_to_active(struct list_head *objects,
 998                                    struct intel_ring_buffer *ring,
 999                                    u32 seqno)
1000 {
1001         struct drm_i915_gem_object *obj;
1002         uint32_t old_read, old_write;
1003
1004         list_for_each_entry(obj, objects, exec_list) {
1005                 old_read = obj->base.read_domains;
1006                 old_write = obj->base.write_domain;
1007
1008                 obj->base.read_domains = obj->base.pending_read_domains;
1009                 obj->base.write_domain = obj->base.pending_write_domain;
1010                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
1011
1012                 i915_gem_object_move_to_active(obj, ring, seqno);
1013                 if (obj->base.write_domain) {
1014                         obj->dirty = 1;
1015                         obj->pending_gpu_write = true;
1016                         list_move_tail(&obj->gpu_write_list,
1017                                        &ring->gpu_write_list);
1018                         intel_mark_busy(ring->dev, obj);
1019                 }
1020                 CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x",
1021                     obj, old_read, old_write);
1022         }
1023 }
1024
1025 int i915_gem_sync_exec_requests;
1026
1027 static void
1028 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1029                                     struct drm_file *file,
1030                                     struct intel_ring_buffer *ring)
1031 {
1032         struct drm_i915_gem_request *request;
1033         u32 invalidate;
1034
1035         /*
1036          * Ensure that the commands in the batch buffer are
1037          * finished before the interrupt fires.
1038          *
1039          * The sampler always gets flushed on i965 (sigh).
1040          */
1041         invalidate = I915_GEM_DOMAIN_COMMAND;
1042         if (INTEL_INFO(dev)->gen >= 4)
1043                 invalidate |= I915_GEM_DOMAIN_SAMPLER;
1044         if (ring->flush(ring, invalidate, 0)) {
1045                 i915_gem_next_request_seqno(ring);
1046                 return;
1047         }
1048
1049         /* Add a breadcrumb for the completion of the batch buffer */
1050         request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
1051         if (request == NULL || i915_add_request(ring, file, request)) {
1052                 i915_gem_next_request_seqno(ring);
1053                 free(request, DRM_I915_GEM);
1054         } else if (i915_gem_sync_exec_requests)
1055                 i915_wait_request(ring, request->seqno, true);
1056 }
1057
1058 static void
1059 i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj,
1060     uint32_t batch_start_offset, uint32_t batch_len)
1061 {
1062         char *mkva;
1063         uint64_t po_r, po_w;
1064         uint32_t cmd;
1065
1066         po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset +
1067             batch_start_offset + batch_len;
1068         if (batch_len > 0)
1069                 po_r -= 4;
1070         mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE,
1071             PAT_WRITE_COMBINING);
1072         po_r &= PAGE_MASK;
1073         cmd = *(uint32_t *)(mkva + po_r);
1074
1075         if (cmd != MI_BATCH_BUFFER_END) {
1076                 /*
1077                  * batch_len != 0 due to the check at the start of
1078                  * i915_gem_do_execbuffer
1079                  */
1080                 if (batch_obj->base.size > batch_start_offset + batch_len) {
1081                         po_w = po_r + 4;
1082 /* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */
1083                 } else {
1084                         po_w = po_r;
1085 DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n");
1086                 }
1087                 *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END;
1088         }
1089
1090         pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE);
1091 }
1092
1093 int i915_fix_mi_batchbuffer_end = 0;
1094
1095  static int
1096 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1097                             struct intel_ring_buffer *ring)
1098 {
1099         drm_i915_private_t *dev_priv = dev->dev_private;
1100         int ret, i;
1101
1102         if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS])
1103                 return 0;
1104
1105         ret = intel_ring_begin(ring, 4 * 3);
1106         if (ret)
1107                 return ret;
1108
1109         for (i = 0; i < 4; i++) {
1110                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1111                 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1112                 intel_ring_emit(ring, 0);
1113         }
1114
1115         intel_ring_advance(ring);
1116
1117         return 0;
1118 }
1119
1120 static int
1121 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1122                        struct drm_file *file,
1123                        struct drm_i915_gem_execbuffer2 *args,
1124                        struct drm_i915_gem_exec_object2 *exec)
1125 {
1126         drm_i915_private_t *dev_priv = dev->dev_private;
1127         struct list_head objects;
1128         struct eb_objects *eb;
1129         struct drm_i915_gem_object *batch_obj;
1130         struct drm_clip_rect *cliprects = NULL;
1131         struct intel_ring_buffer *ring;
1132         vm_page_t **relocs_ma;
1133         u32 exec_start, exec_len;
1134         u32 seqno;
1135         u32 mask;
1136         int ret, mode, i;
1137
1138         if (!i915_gem_check_execbuffer(args)) {
1139                 DRM_DEBUG("execbuf with invalid offset/length\n");
1140                 return -EINVAL;
1141         }
1142
1143         if (args->batch_len == 0)
1144                 return (0);
1145
1146         ret = validate_exec_list(exec, args->buffer_count, &relocs_ma);
1147         if (ret != 0)
1148                 goto pre_struct_lock_err;
1149
1150         switch (args->flags & I915_EXEC_RING_MASK) {
1151         case I915_EXEC_DEFAULT:
1152         case I915_EXEC_RENDER:
1153                 ring = &dev_priv->rings[RCS];
1154                 break;
1155         case I915_EXEC_BSD:
1156                 if (!HAS_BSD(dev)) {
1157                         DRM_DEBUG("execbuf with invalid ring (BSD)\n");
1158                         return -EINVAL;
1159                 }
1160                 ring = &dev_priv->rings[VCS];
1161                 break;
1162         case I915_EXEC_BLT:
1163                 if (!HAS_BLT(dev)) {
1164                         DRM_DEBUG("execbuf with invalid ring (BLT)\n");
1165                         return -EINVAL;
1166                 }
1167                 ring = &dev_priv->rings[BCS];
1168                 break;
1169         default:
1170                 DRM_DEBUG("execbuf with unknown ring: %d\n",
1171                           (int)(args->flags & I915_EXEC_RING_MASK));
1172                 ret = -EINVAL;
1173                 goto pre_struct_lock_err;
1174         }
1175
1176         mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1177         mask = I915_EXEC_CONSTANTS_MASK;
1178         switch (mode) {
1179         case I915_EXEC_CONSTANTS_REL_GENERAL:
1180         case I915_EXEC_CONSTANTS_ABSOLUTE:
1181         case I915_EXEC_CONSTANTS_REL_SURFACE:
1182                 if (ring == &dev_priv->rings[RCS] &&
1183                     mode != dev_priv->relative_constants_mode) {
1184                         if (INTEL_INFO(dev)->gen < 4) {
1185                                 ret = -EINVAL;
1186                                 goto pre_struct_lock_err;
1187                         }
1188
1189                         if (INTEL_INFO(dev)->gen > 5 &&
1190                             mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1191                                 ret = -EINVAL;
1192                                 goto pre_struct_lock_err;
1193                         }
1194
1195                         /* The HW changed the meaning on this bit on gen6 */
1196                         if (INTEL_INFO(dev)->gen >= 6)
1197                                 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1198                 }
1199                 break;
1200         default:
1201                 DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1202                 ret = -EINVAL;
1203                 goto pre_struct_lock_err;
1204         }
1205
1206         if (args->buffer_count < 1) {
1207                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1208                 ret = -EINVAL;
1209                 goto pre_struct_lock_err;
1210         }
1211
1212         if (args->num_cliprects != 0) {
1213                 if (ring != &dev_priv->rings[RCS]) {
1214         DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1215                         ret = -EINVAL;
1216                         goto pre_struct_lock_err;
1217                 }
1218
1219                 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1220                         DRM_DEBUG("execbuf with %u cliprects\n",
1221                                   args->num_cliprects);
1222                         ret = -EINVAL;
1223                         goto pre_struct_lock_err;
1224                 }
1225                 cliprects = malloc( sizeof(*cliprects) * args->num_cliprects,
1226                     DRM_I915_GEM, M_WAITOK | M_ZERO);
1227                 ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects,
1228                     sizeof(*cliprects) * args->num_cliprects);
1229                 if (ret != 0)
1230                         goto pre_struct_lock_err;
1231         }
1232
1233         ret = i915_mutex_lock_interruptible(dev);
1234         if (ret)
1235                 goto pre_struct_lock_err;
1236
1237         if (dev_priv->mm.suspended) {
1238                 ret = -EBUSY;
1239                 goto struct_lock_err;
1240         }
1241
1242         eb = eb_create(args->buffer_count);
1243         if (eb == NULL) {
1244                 ret = -ENOMEM;
1245                 goto struct_lock_err;
1246         }
1247
1248         /* Look up object handles */
1249         INIT_LIST_HEAD(&objects);
1250         for (i = 0; i < args->buffer_count; i++) {
1251                 struct drm_i915_gem_object *obj;
1252                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1253                                                         exec[i].handle));
1254                 if (&obj->base == NULL) {
1255                         DRM_DEBUG("Invalid object handle %d at index %d\n",
1256                                    exec[i].handle, i);
1257                         /* prevent error path from reading uninitialized data */
1258                         ret = -ENOENT;
1259                         goto err;
1260                 }
1261
1262                 if (!list_empty(&obj->exec_list)) {
1263                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
1264                                    obj, exec[i].handle, i);
1265                         ret = -EINVAL;
1266                         goto err;
1267                 }
1268
1269                 list_add_tail(&obj->exec_list, &objects);
1270                 obj->exec_handle = exec[i].handle;
1271                 obj->exec_entry = &exec[i];
1272                 eb_add_object(eb, obj);
1273         }
1274
1275         /* take note of the batch buffer before we might reorder the lists */
1276         batch_obj = list_entry(objects.prev,
1277                                struct drm_i915_gem_object,
1278                                exec_list);
1279
1280         /* Move the objects en-masse into the GTT, evicting if necessary. */
1281         ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1282         if (ret)
1283                 goto err;
1284
1285         /* The objects are in their final locations, apply the relocations. */
1286         ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1287         if (ret) {
1288                 if (ret == -EFAULT) {
1289                         ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1290                             &objects, eb, exec, args->buffer_count);
1291                         DRM_LOCK_ASSERT(dev);
1292                 }
1293                 if (ret)
1294                         goto err;
1295         }
1296
1297         /* Set the pending read domains for the batch buffer to COMMAND */
1298         if (batch_obj->base.pending_write_domain) {
1299                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1300                 ret = -EINVAL;
1301                 goto err;
1302         }
1303         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1304
1305         ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1306         if (ret)
1307                 goto err;
1308
1309         seqno = i915_gem_next_request_seqno(ring);
1310         for (i = 0; i < I915_NUM_RINGS - 1; i++) {
1311                 if (seqno < ring->sync_seqno[i]) {
1312                         /* The GPU can not handle its semaphore value wrapping,
1313                          * so every billion or so execbuffers, we need to stall
1314                          * the GPU in order to reset the counters.
1315                          */
1316                         ret = i915_gpu_idle(dev, true);
1317                         if (ret)
1318                                 goto err;
1319
1320                         KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno"));
1321                 }
1322         }
1323
1324         if (ring == &dev_priv->rings[RCS] &&
1325             mode != dev_priv->relative_constants_mode) {
1326                 ret = intel_ring_begin(ring, 4);
1327                 if (ret)
1328                         goto err;
1329
1330                 intel_ring_emit(ring, MI_NOOP);
1331                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1332                 intel_ring_emit(ring, INSTPM);
1333                 intel_ring_emit(ring, mask << 16 | mode);
1334                 intel_ring_advance(ring);
1335
1336                 dev_priv->relative_constants_mode = mode;
1337         }
1338
1339         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1340                 ret = i915_reset_gen7_sol_offsets(dev, ring);
1341                 if (ret)
1342                         goto err;
1343         }
1344
1345         exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1346         exec_len = args->batch_len;
1347
1348         if (i915_fix_mi_batchbuffer_end) {
1349                 i915_gem_fix_mi_batchbuffer_end(batch_obj,
1350                     args->batch_start_offset, args->batch_len);
1351         }
1352
1353         CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno,
1354             exec_start, exec_len);
1355
1356         if (cliprects) {
1357                 for (i = 0; i < args->num_cliprects; i++) {
1358                         ret = i915_emit_box_p(dev, &cliprects[i],
1359                             args->DR1, args->DR4);
1360                         if (ret)
1361                                 goto err;
1362
1363                         ret = ring->dispatch_execbuffer(ring, exec_start,
1364                             exec_len);
1365                         if (ret)
1366                                 goto err;
1367                 }
1368         } else {
1369                 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1370                 if (ret)
1371                         goto err;
1372         }
1373
1374         i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1375         i915_gem_execbuffer_retire_commands(dev, file, ring);
1376
1377 err:
1378         eb_destroy(eb);
1379         while (!list_empty(&objects)) {
1380                 struct drm_i915_gem_object *obj;
1381
1382                 obj = list_first_entry(&objects, struct drm_i915_gem_object,
1383                     exec_list);
1384                 list_del_init(&obj->exec_list);
1385                 drm_gem_object_unreference(&obj->base);
1386         }
1387 struct_lock_err:
1388         DRM_UNLOCK(dev);
1389
1390 pre_struct_lock_err:
1391         for (i = 0; i < args->buffer_count; i++) {
1392                 if (relocs_ma[i] != NULL) {
1393                         vm_page_unhold_pages(relocs_ma[i], howmany(
1394                             exec[i].relocation_count *
1395                             sizeof(struct drm_i915_gem_relocation_entry),
1396                             PAGE_SIZE));
1397                         free(relocs_ma[i], DRM_I915_GEM);
1398                 }
1399         }
1400         free(relocs_ma, DRM_I915_GEM);
1401         free(cliprects, DRM_I915_GEM);
1402         return ret;
1403 }
1404
1405 /*
1406  * Legacy execbuffer just creates an exec2 list from the original exec object
1407  * list array and passes it to the real function.
1408  */
1409 int
1410 i915_gem_execbuffer(struct drm_device *dev, void *data,
1411                     struct drm_file *file)
1412 {
1413         struct drm_i915_gem_execbuffer *args = data;
1414         struct drm_i915_gem_execbuffer2 exec2;
1415         struct drm_i915_gem_exec_object *exec_list = NULL;
1416         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1417         int ret, i;
1418
1419         DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n",
1420             (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1421
1422         if (args->buffer_count < 1) {
1423                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1424                 return -EINVAL;
1425         }
1426
1427         /* Copy in the exec list from userland */
1428         /* XXXKIB user-controlled malloc size */
1429         exec_list = malloc(sizeof(*exec_list) * args->buffer_count,
1430             DRM_I915_GEM, M_WAITOK);
1431         exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1432             DRM_I915_GEM, M_WAITOK);
1433         ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list,
1434             sizeof(*exec_list) * args->buffer_count);
1435         if (ret != 0) {
1436                 DRM_DEBUG("copy %d exec entries failed %d\n",
1437                           args->buffer_count, ret);
1438                 free(exec_list, DRM_I915_GEM);
1439                 free(exec2_list, DRM_I915_GEM);
1440                 return (ret);
1441         }
1442
1443         for (i = 0; i < args->buffer_count; i++) {
1444                 exec2_list[i].handle = exec_list[i].handle;
1445                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1446                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1447                 exec2_list[i].alignment = exec_list[i].alignment;
1448                 exec2_list[i].offset = exec_list[i].offset;
1449                 if (INTEL_INFO(dev)->gen < 4)
1450                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1451                 else
1452                         exec2_list[i].flags = 0;
1453         }
1454
1455         exec2.buffers_ptr = args->buffers_ptr;
1456         exec2.buffer_count = args->buffer_count;
1457         exec2.batch_start_offset = args->batch_start_offset;
1458         exec2.batch_len = args->batch_len;
1459         exec2.DR1 = args->DR1;
1460         exec2.DR4 = args->DR4;
1461         exec2.num_cliprects = args->num_cliprects;
1462         exec2.cliprects_ptr = args->cliprects_ptr;
1463         exec2.flags = I915_EXEC_RENDER;
1464
1465         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1466         if (!ret) {
1467                 /* Copy the new buffer offsets back to the user's exec list. */
1468                 for (i = 0; i < args->buffer_count; i++)
1469                         exec_list[i].offset = exec2_list[i].offset;
1470                 /* ... and back out to userspace */
1471                 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr,
1472                     sizeof(*exec_list) * args->buffer_count);
1473                 if (ret != 0) {
1474                         DRM_DEBUG("failed to copy %d exec entries "
1475                                   "back to user (%d)\n",
1476                                   args->buffer_count, ret);
1477                 }
1478         }
1479
1480         free(exec_list, DRM_I915_GEM);
1481         free(exec2_list, DRM_I915_GEM);
1482         return ret;
1483 }
1484
1485 int
1486 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1487                      struct drm_file *file)
1488 {
1489         struct drm_i915_gem_execbuffer2 *args = data;
1490         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1491         int ret;
1492
1493         DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n",
1494             (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len);
1495
1496         if (args->buffer_count < 1 ||
1497             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1498                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1499                 return -EINVAL;
1500         }
1501
1502         /* XXXKIB user-controllable malloc size */
1503         exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count,
1504             DRM_I915_GEM, M_WAITOK);
1505         ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list,
1506             sizeof(*exec2_list) * args->buffer_count);
1507         if (ret != 0) {
1508                 DRM_DEBUG("copy %d exec entries failed %d\n",
1509                           args->buffer_count, ret);
1510                 free(exec2_list, DRM_I915_GEM);
1511                 return (ret);
1512         }
1513
1514         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1515         if (!ret) {
1516                 /* Copy the new buffer offsets back to the user's exec list. */
1517                 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr,
1518                     sizeof(*exec2_list) * args->buffer_count);
1519                 if (ret) {
1520                         DRM_DEBUG("failed to copy %d exec entries "
1521                                   "back to user (%d)\n",
1522                                   args->buffer_count, ret);
1523                 }
1524         }
1525
1526         free(exec2_list, DRM_I915_GEM);
1527         return ret;
1528 }