Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36abacae authored by Christian König's avatar Christian König Committed by Dave Airlie
Browse files

drm/radeon: rework gpu lockup detection and processing



Previusly multiple rings could trigger multiple GPU
resets at the same time.

Signed-off-by: default avatarChristian König <deathsimple@vodafone.de>
Reviewed-by: default avatarJerome Glisse <jglisse@redhat.com>
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
parent 7bd560e8
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -255,8 +255,7 @@ struct radeon_fence_driver {
	volatile uint32_t		*cpu_addr;
	atomic_t			seq;
	uint32_t			last_seq;
	unsigned long			last_jiffies;
	unsigned long			last_timeout;
	unsigned long			last_activity;
	wait_queue_head_t		queue;
	struct list_head		created;
	struct list_head		emitted;
+76 −74
Original line number Diff line number Diff line
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
	radeon_fence_ring_emit(rdev, fence->ring, fence);
	trace_radeon_fence_emit(rdev->ddev, fence->seq);
	fence->emitted = true;
	/* are we the first fence on a previusly idle ring? */
	if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
		rdev->fence_drv[fence->ring].last_activity = jiffies;
	}
	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
	return 0;
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
	struct list_head *i, *n;
	uint32_t seq;
	bool wake = false;
	unsigned long cjiffies;

	seq = radeon_fence_read(rdev, ring);
	if (seq != rdev->fence_drv[ring].last_seq) {
		rdev->fence_drv[ring].last_seq = seq;
		rdev->fence_drv[ring].last_jiffies = jiffies;
		rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
	} else {
		cjiffies = jiffies;
		if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
			cjiffies -= rdev->fence_drv[ring].last_jiffies;
			if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) {
				/* update the timeout */
				rdev->fence_drv[ring].last_timeout -= cjiffies;
			} else {
				/* the 500ms timeout is elapsed we should test
				 * for GPU lockup
				 */
				rdev->fence_drv[ring].last_timeout = 1;
			}
		} else {
			/* wrap around update last jiffies, we will just wait
			 * a little longer
			 */
			rdev->fence_drv[ring].last_jiffies = cjiffies;
		}
	if (seq == rdev->fence_drv[ring].last_seq)
		return false;
	}

	rdev->fence_drv[ring].last_seq = seq;
	rdev->fence_drv[ring].last_activity = jiffies;

	n = NULL;
	list_for_each(i, &rdev->fence_drv[ring].emitted) {
		fence = list_entry(i, struct radeon_fence, list);
@@ -207,49 +191,68 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
	struct radeon_device *rdev;
	unsigned long irq_flags, timeout;
	u32 seq;
	int r;
	int i, r;
	bool signaled;

	if (fence == NULL) {
		WARN(1, "Querying an invalid fence : %p !\n", fence);
		return 0;
		return -EINVAL;
	}

	rdev = fence->rdev;
	if (radeon_fence_signaled(fence)) {
		return 0;
	signaled = radeon_fence_signaled(fence);
	while (!signaled) {
		read_lock_irqsave(&rdev->fence_lock, irq_flags);
		timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
		if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) {
			/* the normal case, timeout is somewhere before last_activity */
			timeout = rdev->fence_drv[fence->ring].last_activity - timeout;
		} else {
			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
			 * anyway we will just wait for the minimum amount and then check for a lockup */
			timeout = 1;
		}
	timeout = rdev->fence_drv[fence->ring].last_timeout;
retry:
	/* save current sequence used to check for GPU lockup */
		/* save current sequence value used to check for GPU lockups */
		seq = rdev->fence_drv[fence->ring].last_seq;
		read_unlock_irqrestore(&rdev->fence_lock, irq_flags);

		trace_radeon_fence_wait_begin(rdev->ddev, seq);
	if (intr) {
		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
		r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue,
				radeon_fence_signaled(fence), timeout);
		if (intr) {
			r = wait_event_interruptible_timeout(
				rdev->fence_drv[fence->ring].queue,
				(signaled = radeon_fence_signaled(fence)), timeout);
		} else {
			r = wait_event_timeout(
				rdev->fence_drv[fence->ring].queue,
				(signaled = radeon_fence_signaled(fence)), timeout);
		}
		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
		if (unlikely(r < 0)) {
			return r;
		}
	} else {
		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
		r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
			 radeon_fence_signaled(fence), timeout);
		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
	}
		trace_radeon_fence_wait_end(rdev->ddev, seq);
	if (unlikely(!radeon_fence_signaled(fence))) {
		/* we were interrupted for some reason and fence isn't
		 * isn't signaled yet, resume wait
		 */

		if (unlikely(!signaled)) {
			/* we were interrupted for some reason and fence
			 * isn't signaled yet, resume waiting */
			if (r) {
			timeout = r;
			goto retry;
				continue;
			}
		/* don't protect read access to rdev->fence_drv[t].last_seq
		 * if we experiencing a lockup the value doesn't change
		 */
		if (seq == rdev->fence_drv[fence->ring].last_seq &&
		    radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {

			write_lock_irqsave(&rdev->fence_lock, irq_flags);
			/* check if sequence value has changed since last_activity */
			if (seq != rdev->fence_drv[fence->ring].last_seq) {
				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
				continue;
			}

			/* change sequence value on all rings, so nobody else things there is a lockup */
			for (i = 0; i < RADEON_NUM_RINGS; ++i)
				rdev->fence_drv[i].last_seq -= 0x10000;
			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);

			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {

				/* good news we believe it's a lockup */
				printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
@@ -260,13 +263,12 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
				r = radeon_gpu_reset(rdev);
				if (r)
					return r;
		}
		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;

				write_lock_irqsave(&rdev->fence_lock, irq_flags);
		rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
		rdev->fence_drv[fence->ring].last_jiffies = jiffies;
				rdev->fence_drv[fence->ring].last_activity = jiffies;
				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
		goto retry;
			}
		}
	}
	return 0;
}