]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/dev/drm2/radeon/radeon_fence.c
Merge from stable/10, r259742:
[FreeBSD/releng/10.0.git] / sys / dev / drm2 / radeon / radeon_fence.c
1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Dave Airlie
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <dev/drm2/drmP.h>
36 #include "radeon_reg.h"
37 #include "radeon.h"
38 #ifdef DUMBBELL_WIP
39 #include "radeon_trace.h"
40 #endif /* DUMBBELL_WIP */
41
42 /*
43  * Fences
44  * Fences mark an event in the GPUs pipeline and are used
45  * for GPU/CPU synchronization.  When the fence is written,
46  * it is expected that all buffers associated with that fence
47  * are no longer in use by the associated ring on the GPU and
48  * that the the relevant GPU caches have been flushed.  Whether
49  * we use a scratch register or memory location depends on the asic
50  * and whether writeback is enabled.
51  */
52
53 /**
54  * radeon_fence_write - write a fence value
55  *
56  * @rdev: radeon_device pointer
57  * @seq: sequence number to write
58  * @ring: ring index the fence is associated with
59  *
60  * Writes a fence value to memory or a scratch register (all asics).
61  */
62 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
63 {
64         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
65         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
66                 *drv->cpu_addr = cpu_to_le32(seq);
67         } else {
68                 WREG32(drv->scratch_reg, seq);
69         }
70 }
71
72 /**
73  * radeon_fence_read - read a fence value
74  *
75  * @rdev: radeon_device pointer
76  * @ring: ring index the fence is associated with
77  *
78  * Reads a fence value from memory or a scratch register (all asics).
79  * Returns the value of the fence read from memory or register.
80  */
81 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
82 {
83         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
84         u32 seq = 0;
85
86         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
87                 seq = le32_to_cpu(*drv->cpu_addr);
88         } else {
89                 seq = RREG32(drv->scratch_reg);
90         }
91         return seq;
92 }
93
94 /**
95  * radeon_fence_emit - emit a fence on the requested ring
96  *
97  * @rdev: radeon_device pointer
98  * @fence: radeon fence object
99  * @ring: ring index the fence is associated with
100  *
101  * Emits a fence command on the requested ring (all asics).
102  * Returns 0 on success, -ENOMEM on failure.
103  */
104 int radeon_fence_emit(struct radeon_device *rdev,
105                       struct radeon_fence **fence,
106                       int ring)
107 {
108         /* we are protected by the ring emission mutex */
109         *fence = malloc(sizeof(struct radeon_fence), DRM_MEM_DRIVER, M_WAITOK);
110         if ((*fence) == NULL) {
111                 return -ENOMEM;
112         }
113         refcount_init(&((*fence)->kref), 1);
114         (*fence)->rdev = rdev;
115         (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
116         (*fence)->ring = ring;
117         radeon_fence_ring_emit(rdev, ring, *fence);
118         CTR2(KTR_DRM, "radeon fence: emit (ring=%d, seq=%d)", ring, (*fence)->seq);
119         return 0;
120 }
121
122 /**
123  * radeon_fence_process - process a fence
124  *
125  * @rdev: radeon_device pointer
126  * @ring: ring index the fence is associated with
127  *
128  * Checks the current fence value and wakes the fence queue
129  * if the sequence number has increased (all asics).
130  */
131 void radeon_fence_process(struct radeon_device *rdev, int ring)
132 {
133         uint64_t seq, last_seq, last_emitted;
134         unsigned count_loop = 0;
135         bool wake = false;
136
137         /* Note there is a scenario here for an infinite loop but it's
138          * very unlikely to happen. For it to happen, the current polling
139          * process need to be interrupted by another process and another
140          * process needs to update the last_seq btw the atomic read and
141          * xchg of the current process.
142          *
143          * More over for this to go in infinite loop there need to be
144          * continuously new fence signaled ie radeon_fence_read needs
145          * to return a different value each time for both the currently
146          * polling process and the other process that xchg the last_seq
147          * btw atomic read and xchg of the current process. And the
148          * value the other process set as last seq must be higher than
149          * the seq value we just read. Which means that current process
150          * need to be interrupted after radeon_fence_read and before
151          * atomic xchg.
152          *
153          * To be even more safe we count the number of time we loop and
154          * we bail after 10 loop just accepting the fact that we might
155          * have temporarly set the last_seq not to the true real last
156          * seq but to an older one.
157          */
158         last_seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq);
159         do {
160                 last_emitted = rdev->fence_drv[ring].sync_seq[ring];
161                 seq = radeon_fence_read(rdev, ring);
162                 seq |= last_seq & 0xffffffff00000000LL;
163                 if (seq < last_seq) {
164                         seq &= 0xffffffff;
165                         seq |= last_emitted & 0xffffffff00000000LL;
166                 }
167
168                 if (seq <= last_seq || seq > last_emitted) {
169                         break;
170                 }
171                 /* If we loop over we don't want to return without
172                  * checking if a fence is signaled as it means that the
173                  * seq we just read is different from the previous on.
174                  */
175                 wake = true;
176                 last_seq = seq;
177                 if ((count_loop++) > 10) {
178                         /* We looped over too many time leave with the
179                          * fact that we might have set an older fence
180                          * seq then the current real last seq as signaled
181                          * by the hw.
182                          */
183                         break;
184                 }
185         } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
186
187         if (wake) {
188                 rdev->fence_drv[ring].last_activity = jiffies;
189                 cv_broadcast(&rdev->fence_queue);
190         }
191 }
192
193 /**
194  * radeon_fence_destroy - destroy a fence
195  *
196  * @kref: fence kref
197  *
198  * Frees the fence object (all asics).
199  */
200 static void radeon_fence_destroy(struct radeon_fence *fence)
201 {
202
203         free(fence, DRM_MEM_DRIVER);
204 }
205
206 /**
207  * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
208  *
209  * @rdev: radeon device pointer
210  * @seq: sequence number
211  * @ring: ring index the fence is associated with
212  *
213  * Check if the last singled fence sequnce number is >= the requested
214  * sequence number (all asics).
215  * Returns true if the fence has signaled (current fence value
216  * is >= requested value) or false if it has not (current fence
217  * value is < the requested value.  Helper function for
218  * radeon_fence_signaled().
219  */
220 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
221                                       u64 seq, unsigned ring)
222 {
223         if (atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) >= seq) {
224                 return true;
225         }
226         /* poll new last sequence at least once */
227         radeon_fence_process(rdev, ring);
228         if (atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) >= seq) {
229                 return true;
230         }
231         return false;
232 }
233
234 /**
235  * radeon_fence_signaled - check if a fence has signaled
236  *
237  * @fence: radeon fence object
238  *
239  * Check if the requested fence has signaled (all asics).
240  * Returns true if the fence has signaled or false if it has not.
241  */
242 bool radeon_fence_signaled(struct radeon_fence *fence)
243 {
244         if (!fence) {
245                 return true;
246         }
247         if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
248                 return true;
249         }
250         if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
251                 fence->seq = RADEON_FENCE_SIGNALED_SEQ;
252                 return true;
253         }
254         return false;
255 }
256
257 /**
258  * radeon_fence_wait_seq - wait for a specific sequence number
259  *
260  * @rdev: radeon device pointer
261  * @target_seq: sequence number we want to wait for
262  * @ring: ring index the fence is associated with
263  * @intr: use interruptable sleep
264  * @lock_ring: whether the ring should be locked or not
265  *
266  * Wait for the requested sequence number to be written (all asics).
267  * @intr selects whether to use interruptable (true) or non-interruptable
268  * (false) sleep when waiting for the sequence number.  Helper function
269  * for radeon_fence_wait(), et al.
270  * Returns 0 if the sequence number has passed, error for all other cases.
271  * -EDEADLK is returned when a GPU lockup has been detected and the ring is
272  * marked as not ready so no further jobs get scheduled until a successful
273  * reset.
274  */
275 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
276                                  unsigned ring, bool intr, bool lock_ring)
277 {
278         unsigned long timeout, last_activity;
279         uint64_t seq;
280         unsigned i;
281         bool signaled, fence_queue_locked;
282         int r;
283
284         while (target_seq > atomic_load_acq_64(&rdev->fence_drv[ring].last_seq)) {
285                 if (!rdev->ring[ring].ready) {
286                         return -EBUSY;
287                 }
288
289                 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
290                 if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
291                         /* the normal case, timeout is somewhere before last_activity */
292                         timeout = rdev->fence_drv[ring].last_activity - timeout;
293                 } else {
294                         /* either jiffies wrapped around, or no fence was signaled in the last 500ms
295                          * anyway we will just wait for the minimum amount and then check for a lockup
296                          */
297                         timeout = 1;
298                 }
299                 seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq);
300                 /* Save current last activity valuee, used to check for GPU lockups */
301                 last_activity = rdev->fence_drv[ring].last_activity;
302
303                 CTR2(KTR_DRM, "radeon fence: wait begin (ring=%d, seq=%d)",
304                     ring, seq);
305
306                 radeon_irq_kms_sw_irq_get(rdev, ring);
307                 fence_queue_locked = false;
308                 r = 0;
309                 while (!(signaled = radeon_fence_seq_signaled(rdev,
310                     target_seq, ring))) {
311                         if (!fence_queue_locked) {
312                                 mtx_lock(&rdev->fence_queue_mtx);
313                                 fence_queue_locked = true;
314                         }
315                         if (intr) {
316                                 r = cv_timedwait_sig(&rdev->fence_queue,
317                                     &rdev->fence_queue_mtx,
318                                     timeout);
319                         } else {
320                                 r = cv_timedwait(&rdev->fence_queue,
321                                     &rdev->fence_queue_mtx,
322                                     timeout);
323                         }
324                         if (r == EINTR)
325                                 r = ERESTARTSYS;
326                         if (r != 0) {
327                                 if (r == EWOULDBLOCK) {
328                                         signaled =
329                                             radeon_fence_seq_signaled(
330                                                 rdev, target_seq, ring);
331                                 }
332                                 break;
333                         }
334                 }
335                 if (fence_queue_locked) {
336                         mtx_unlock(&rdev->fence_queue_mtx);
337                 }
338                 radeon_irq_kms_sw_irq_put(rdev, ring);
339                 if (unlikely(r == ERESTARTSYS)) {
340                         return -r;
341                 }
342                 CTR2(KTR_DRM, "radeon fence: wait end (ring=%d, seq=%d)",
343                     ring, seq);
344
345                 if (unlikely(!signaled)) {
346 #ifndef __FreeBSD__
347                         /* we were interrupted for some reason and fence
348                          * isn't signaled yet, resume waiting */
349                         if (r) {
350                                 continue;
351                         }
352 #endif
353
354                         /* check if sequence value has changed since last_activity */
355                         if (seq != atomic_load_acq_64(&rdev->fence_drv[ring].last_seq)) {
356                                 continue;
357                         }
358
359                         if (lock_ring) {
360                                 sx_xlock(&rdev->ring_lock);
361                         }
362
363                         /* test if somebody else has already decided that this is a lockup */
364                         if (last_activity != rdev->fence_drv[ring].last_activity) {
365                                 if (lock_ring) {
366                                         sx_xunlock(&rdev->ring_lock);
367                                 }
368                                 continue;
369                         }
370
371                         if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
372                                 /* good news we believe it's a lockup */
373                                 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016jx last fence id 0x%016jx)\n",
374                                          (uintmax_t)target_seq, (uintmax_t)seq);
375
376                                 /* change last activity so nobody else think there is a lockup */
377                                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
378                                         rdev->fence_drv[i].last_activity = jiffies;
379                                 }
380
381                                 /* mark the ring as not ready any more */
382                                 rdev->ring[ring].ready = false;
383                                 if (lock_ring) {
384                                         sx_xunlock(&rdev->ring_lock);
385                                 }
386                                 return -EDEADLK;
387                         }
388
389                         if (lock_ring) {
390                                 sx_xunlock(&rdev->ring_lock);
391                         }
392                 }
393         }
394         return 0;
395 }
396
397 /**
398  * radeon_fence_wait - wait for a fence to signal
399  *
400  * @fence: radeon fence object
401  * @intr: use interruptable sleep
402  *
403  * Wait for the requested fence to signal (all asics).
404  * @intr selects whether to use interruptable (true) or non-interruptable
405  * (false) sleep when waiting for the fence.
406  * Returns 0 if the fence has passed, error for all other cases.
407  */
408 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
409 {
410         int r;
411
412         if (fence == NULL) {
413                 DRM_ERROR("Querying an invalid fence : %p !\n", fence);
414                 return -EINVAL;
415         }
416
417         r = radeon_fence_wait_seq(fence->rdev, fence->seq,
418                                   fence->ring, intr, true);
419         if (r) {
420                 return r;
421         }
422         fence->seq = RADEON_FENCE_SIGNALED_SEQ;
423         return 0;
424 }
425
426 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
427 {
428         unsigned i;
429
430         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
431                 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
432                         return true;
433                 }
434         }
435         return false;
436 }
437
438 /**
439  * radeon_fence_wait_any_seq - wait for a sequence number on any ring
440  *
441  * @rdev: radeon device pointer
442  * @target_seq: sequence number(s) we want to wait for
443  * @intr: use interruptable sleep
444  *
445  * Wait for the requested sequence number(s) to be written by any ring
446  * (all asics).  Sequnce number array is indexed by ring id.
447  * @intr selects whether to use interruptable (true) or non-interruptable
448  * (false) sleep when waiting for the sequence number.  Helper function
449  * for radeon_fence_wait_any(), et al.
450  * Returns 0 if the sequence number has passed, error for all other cases.
451  */
452 static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
453                                      u64 *target_seq, bool intr)
454 {
455         unsigned long timeout, last_activity, tmp;
456         unsigned i, ring = RADEON_NUM_RINGS;
457         bool signaled, fence_queue_locked;
458         int r;
459
460         for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
461                 if (!target_seq[i]) {
462                         continue;
463                 }
464
465                 /* use the most recent one as indicator */
466                 if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
467                         last_activity = rdev->fence_drv[i].last_activity;
468                 }
469
470                 /* For lockup detection just pick the lowest ring we are
471                  * actively waiting for
472                  */
473                 if (i < ring) {
474                         ring = i;
475                 }
476         }
477
478         /* nothing to wait for ? */
479         if (ring == RADEON_NUM_RINGS) {
480                 return -ENOENT;
481         }
482
483         while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
484                 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
485                 if (time_after(last_activity, timeout)) {
486                         /* the normal case, timeout is somewhere before last_activity */
487                         timeout = last_activity - timeout;
488                 } else {
489                         /* either jiffies wrapped around, or no fence was signaled in the last 500ms
490                          * anyway we will just wait for the minimum amount and then check for a lockup
491                          */
492                         timeout = 1;
493                 }
494
495                 CTR2(KTR_DRM, "radeon fence: wait begin (ring=%d, target_seq=%d)",
496                     ring, target_seq[ring]);
497                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
498                         if (target_seq[i]) {
499                                 radeon_irq_kms_sw_irq_get(rdev, i);
500                         }
501                 }
502                 fence_queue_locked = false;
503                 r = 0;
504                 while (!(signaled = radeon_fence_any_seq_signaled(rdev,
505                     target_seq))) {
506                         if (!fence_queue_locked) {
507                                 mtx_lock(&rdev->fence_queue_mtx);
508                                 fence_queue_locked = true;
509                         }
510                         if (intr) {
511                                 r = cv_timedwait_sig(&rdev->fence_queue,
512                                     &rdev->fence_queue_mtx,
513                                     timeout);
514                         } else {
515                                 r = cv_timedwait(&rdev->fence_queue,
516                                     &rdev->fence_queue_mtx,
517                                     timeout);
518                         }
519                         if (r == EINTR)
520                                 r = ERESTARTSYS;
521                         if (r != 0) {
522                                 if (r == EWOULDBLOCK) {
523                                         signaled =
524                                             radeon_fence_any_seq_signaled(
525                                                 rdev, target_seq);
526                                 }
527                                 break;
528                         }
529                 }
530                 if (fence_queue_locked) {
531                         mtx_unlock(&rdev->fence_queue_mtx);
532                 }
533                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
534                         if (target_seq[i]) {
535                                 radeon_irq_kms_sw_irq_put(rdev, i);
536                         }
537                 }
538                 if (unlikely(r == ERESTARTSYS)) {
539                         return -r;
540                 }
541                 CTR2(KTR_DRM, "radeon fence: wait end (ring=%d, target_seq=%d)",
542                     ring, target_seq[ring]);
543
544                 if (unlikely(!signaled)) {
545 #ifndef __FreeBSD__
546                         /* we were interrupted for some reason and fence
547                          * isn't signaled yet, resume waiting */
548                         if (r) {
549                                 continue;
550                         }
551 #endif
552
553                         sx_xlock(&rdev->ring_lock);
554                         for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
555                                 if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
556                                         tmp = rdev->fence_drv[i].last_activity;
557                                 }
558                         }
559                         /* test if somebody else has already decided that this is a lockup */
560                         if (last_activity != tmp) {
561                                 last_activity = tmp;
562                                 sx_xunlock(&rdev->ring_lock);
563                                 continue;
564                         }
565
566                         if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
567                                 /* good news we believe it's a lockup */
568                                 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016jx)\n",
569                                          (uintmax_t)target_seq[ring]);
570
571                                 /* change last activity so nobody else think there is a lockup */
572                                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
573                                         rdev->fence_drv[i].last_activity = jiffies;
574                                 }
575
576                                 /* mark the ring as not ready any more */
577                                 rdev->ring[ring].ready = false;
578                                 sx_xunlock(&rdev->ring_lock);
579                                 return -EDEADLK;
580                         }
581                         sx_xunlock(&rdev->ring_lock);
582                 }
583         }
584         return 0;
585 }
586
587 /**
588  * radeon_fence_wait_any - wait for a fence to signal on any ring
589  *
590  * @rdev: radeon device pointer
591  * @fences: radeon fence object(s)
592  * @intr: use interruptable sleep
593  *
594  * Wait for any requested fence to signal (all asics).  Fence
595  * array is indexed by ring id.  @intr selects whether to use
596  * interruptable (true) or non-interruptable (false) sleep when
597  * waiting for the fences. Used by the suballocator.
598  * Returns 0 if any fence has passed, error for all other cases.
599  */
600 int radeon_fence_wait_any(struct radeon_device *rdev,
601                           struct radeon_fence **fences,
602                           bool intr)
603 {
604         uint64_t seq[RADEON_NUM_RINGS];
605         unsigned i;
606         int r;
607
608         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
609                 seq[i] = 0;
610
611                 if (!fences[i]) {
612                         continue;
613                 }
614
615                 if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
616                         /* something was allready signaled */
617                         return 0;
618                 }
619
620                 seq[i] = fences[i]->seq;
621         }
622
623         r = radeon_fence_wait_any_seq(rdev, seq, intr);
624         if (r) {
625                 return r;
626         }
627         return 0;
628 }
629
630 /**
631  * radeon_fence_wait_next_locked - wait for the next fence to signal
632  *
633  * @rdev: radeon device pointer
634  * @ring: ring index the fence is associated with
635  *
636  * Wait for the next fence on the requested ring to signal (all asics).
637  * Returns 0 if the next fence has passed, error for all other cases.
638  * Caller must hold ring lock.
639  */
640 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
641 {
642         uint64_t seq;
643
644         seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) + 1ULL;
645         if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
646                 /* nothing to wait for, last_seq is
647                    already the last emited fence */
648                 return -ENOENT;
649         }
650         return radeon_fence_wait_seq(rdev, seq, ring, false, false);
651 }
652
653 /**
654  * radeon_fence_wait_empty_locked - wait for all fences to signal
655  *
656  * @rdev: radeon device pointer
657  * @ring: ring index the fence is associated with
658  *
659  * Wait for all fences on the requested ring to signal (all asics).
660  * Returns 0 if the fences have passed, error for all other cases.
661  * Caller must hold ring lock.
662  */
663 int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
664 {
665         uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
666         int r;
667
668         r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
669         if (r) {
670                 if (r == -EDEADLK) {
671                         return -EDEADLK;
672                 }
673                 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
674                         ring, r);
675         }
676         return 0;
677 }
678
679 /**
680  * radeon_fence_ref - take a ref on a fence
681  *
682  * @fence: radeon fence object
683  *
684  * Take a reference on a fence (all asics).
685  * Returns the fence.
686  */
687 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
688 {
689         refcount_acquire(&fence->kref);
690         return fence;
691 }
692
693 /**
694  * radeon_fence_unref - remove a ref on a fence
695  *
696  * @fence: radeon fence object
697  *
698  * Remove a reference on a fence (all asics).
699  */
700 void radeon_fence_unref(struct radeon_fence **fence)
701 {
702         struct radeon_fence *tmp = *fence;
703
704         *fence = NULL;
705         if (tmp) {
706                 if (refcount_release(&tmp->kref)) {
707                         radeon_fence_destroy(tmp);
708                 }
709         }
710 }
711
712 /**
713  * radeon_fence_count_emitted - get the count of emitted fences
714  *
715  * @rdev: radeon device pointer
716  * @ring: ring index the fence is associated with
717  *
718  * Get the number of fences emitted on the requested ring (all asics).
719  * Returns the number of emitted fences on the ring.  Used by the
720  * dynpm code to ring track activity.
721  */
722 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
723 {
724         uint64_t emitted;
725
726         /* We are not protected by ring lock when reading the last sequence
727          * but it's ok to report slightly wrong fence count here.
728          */
729         radeon_fence_process(rdev, ring);
730         emitted = rdev->fence_drv[ring].sync_seq[ring]
731                 - atomic_load_acq_64(&rdev->fence_drv[ring].last_seq);
732         /* to avoid 32bits warp around */
733         if (emitted > 0x10000000) {
734                 emitted = 0x10000000;
735         }
736         return (unsigned)emitted;
737 }
738
739 /**
740  * radeon_fence_need_sync - do we need a semaphore
741  *
742  * @fence: radeon fence object
743  * @dst_ring: which ring to check against
744  *
745  * Check if the fence needs to be synced against another ring
746  * (all asics).  If so, we need to emit a semaphore.
747  * Returns true if we need to sync with another ring, false if
748  * not.
749  */
750 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
751 {
752         struct radeon_fence_driver *fdrv;
753
754         if (!fence) {
755                 return false;
756         }
757
758         if (fence->ring == dst_ring) {
759                 return false;
760         }
761
762         /* we are protected by the ring mutex */
763         fdrv = &fence->rdev->fence_drv[dst_ring];
764         if (fence->seq <= fdrv->sync_seq[fence->ring]) {
765                 return false;
766         }
767
768         return true;
769 }
770
771 /**
772  * radeon_fence_note_sync - record the sync point
773  *
774  * @fence: radeon fence object
775  * @dst_ring: which ring to check against
776  *
777  * Note the sequence number at which point the fence will
778  * be synced with the requested ring (all asics).
779  */
780 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
781 {
782         struct radeon_fence_driver *dst, *src;
783         unsigned i;
784
785         if (!fence) {
786                 return;
787         }
788
789         if (fence->ring == dst_ring) {
790                 return;
791         }
792
793         /* we are protected by the ring mutex */
794         src = &fence->rdev->fence_drv[fence->ring];
795         dst = &fence->rdev->fence_drv[dst_ring];
796         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
797                 if (i == dst_ring) {
798                         continue;
799                 }
800                 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
801         }
802 }
803
804 /**
805  * radeon_fence_driver_start_ring - make the fence driver
806  * ready for use on the requested ring.
807  *
808  * @rdev: radeon device pointer
809  * @ring: ring index to start the fence driver on
810  *
811  * Make the fence driver ready for processing (all asics).
812  * Not all asics have all rings, so each asic will only
813  * start the fence driver on the rings it has.
814  * Returns 0 for success, errors for failure.
815  */
816 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
817 {
818         uint64_t index;
819         int r;
820
821         radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
822         if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
823                 rdev->fence_drv[ring].scratch_reg = 0;
824                 index = R600_WB_EVENT_OFFSET + ring * 4;
825         } else {
826                 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
827                 if (r) {
828                         dev_err(rdev->dev, "fence failed to get scratch register\n");
829                         return r;
830                 }
831                 index = RADEON_WB_SCRATCH_OFFSET +
832                         rdev->fence_drv[ring].scratch_reg -
833                         rdev->scratch.reg_base;
834         }
835         rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
836         rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
837         radeon_fence_write(rdev, atomic_load_acq_64(&rdev->fence_drv[ring].last_seq), ring);
838         rdev->fence_drv[ring].initialized = true;
839         dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016jx and cpu addr 0x%p\n",
840                  ring, (uintmax_t)rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
841         return 0;
842 }
843
844 /**
845  * radeon_fence_driver_init_ring - init the fence driver
846  * for the requested ring.
847  *
848  * @rdev: radeon device pointer
849  * @ring: ring index to start the fence driver on
850  *
851  * Init the fence driver for the requested ring (all asics).
852  * Helper function for radeon_fence_driver_init().
853  */
854 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
855 {
856         int i;
857
858         rdev->fence_drv[ring].scratch_reg = -1;
859         rdev->fence_drv[ring].cpu_addr = NULL;
860         rdev->fence_drv[ring].gpu_addr = 0;
861         for (i = 0; i < RADEON_NUM_RINGS; ++i)
862                 rdev->fence_drv[ring].sync_seq[i] = 0;
863         atomic_store_rel_64(&rdev->fence_drv[ring].last_seq, 0);
864         rdev->fence_drv[ring].last_activity = jiffies;
865         rdev->fence_drv[ring].initialized = false;
866 }
867
868 /**
869  * radeon_fence_driver_init - init the fence driver
870  * for all possible rings.
871  *
872  * @rdev: radeon device pointer
873  *
874  * Init the fence driver for all possible rings (all asics).
875  * Not all asics have all rings, so each asic will only
876  * start the fence driver on the rings it has using
877  * radeon_fence_driver_start_ring().
878  * Returns 0 for success.
879  */
880 int radeon_fence_driver_init(struct radeon_device *rdev)
881 {
882         int ring;
883
884         mtx_init(&rdev->fence_queue_mtx,
885             "drm__radeon_device__fence_queue_mtx", NULL, MTX_DEF);
886         cv_init(&rdev->fence_queue, "drm__radeon_device__fence_queue");
887         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
888                 radeon_fence_driver_init_ring(rdev, ring);
889         }
890         if (radeon_debugfs_fence_init(rdev)) {
891                 dev_err(rdev->dev, "fence debugfs file creation failed\n");
892         }
893         return 0;
894 }
895
896 /**
897  * radeon_fence_driver_fini - tear down the fence driver
898  * for all possible rings.
899  *
900  * @rdev: radeon device pointer
901  *
902  * Tear down the fence driver for all possible rings (all asics).
903  */
904 void radeon_fence_driver_fini(struct radeon_device *rdev)
905 {
906         int ring, r;
907
908         sx_xlock(&rdev->ring_lock);
909         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
910                 if (!rdev->fence_drv[ring].initialized)
911                         continue;
912                 r = radeon_fence_wait_empty_locked(rdev, ring);
913                 if (r) {
914                         /* no need to trigger GPU reset as we are unloading */
915                         radeon_fence_driver_force_completion(rdev);
916                 }
917                 cv_broadcast(&rdev->fence_queue);
918                 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
919                 rdev->fence_drv[ring].initialized = false;
920                 cv_destroy(&rdev->fence_queue);
921         }
922         sx_xunlock(&rdev->ring_lock);
923 }
924
925 /**
926  * radeon_fence_driver_force_completion - force all fence waiter to complete
927  *
928  * @rdev: radeon device pointer
929  *
930  * In case of GPU reset failure make sure no process keep waiting on fence
931  * that will never complete.
932  */
933 void radeon_fence_driver_force_completion(struct radeon_device *rdev)
934 {
935         int ring;
936
937         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
938                 if (!rdev->fence_drv[ring].initialized)
939                         continue;
940                 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
941         }
942 }
943
944
945 /*
946  * Fence debugfs
947  */
948 #if defined(CONFIG_DEBUG_FS)
949 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
950 {
951         struct drm_info_node *node = (struct drm_info_node *)m->private;
952         struct drm_device *dev = node->minor->dev;
953         struct radeon_device *rdev = dev->dev_private;
954         int i, j;
955
956         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
957                 if (!rdev->fence_drv[i].initialized)
958                         continue;
959
960                 seq_printf(m, "--- ring %d ---\n", i);
961                 seq_printf(m, "Last signaled fence 0x%016llx\n",
962                            (unsigned long long)atomic_load_acq_64(&rdev->fence_drv[i].last_seq));
963                 seq_printf(m, "Last emitted        0x%016llx\n",
964                            rdev->fence_drv[i].sync_seq[i]);
965
966                 for (j = 0; j < RADEON_NUM_RINGS; ++j) {
967                         if (i != j && rdev->fence_drv[j].initialized)
968                                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
969                                            j, rdev->fence_drv[i].sync_seq[j]);
970                 }
971         }
972         return 0;
973 }
974
975 static struct drm_info_list radeon_debugfs_fence_list[] = {
976         {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
977 };
978 #endif
979
980 int radeon_debugfs_fence_init(struct radeon_device *rdev)
981 {
982 #if defined(CONFIG_DEBUG_FS)
983         return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
984 #else
985         return 0;
986 #endif
987 }