Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 59bc1d89 authored by Lauri Kasanen's avatar Lauri Kasanen Committed by Alex Deucher
Browse files

drm/radeon: Inline r100_mm_rreg, -wreg, v3



This was originally un-inlined by Andi Kleen in 2011 citing size concerns.
Indeed, a first attempt at inlining it grew radeon.ko by 7%.

However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps
in Urban Terror.

v2: We know the minimum MMIO size. Adding it to the if allows the compiler to
optimize the branch out, improving both performance and size.

The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense
says perf is now more than 1% better.

v3: Also change _wreg, make the threshold a define.

Inlining _wreg increased the size a bit compared to v2, so now radeon.ko
is only 1% smaller.

Signed-off-by: default avatarLauri Kasanen <cand@gmx.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3e22920f
Loading
Loading
Loading
Loading
+0 −33
Original line number Diff line number Diff line
@@ -4065,39 +4065,6 @@ int r100_init(struct radeon_device *rdev)
	return 0;
}

uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
		      bool always_indirect)
{
	if (reg < rdev->rmmio_size && !always_indirect)
		return readl(((void __iomem *)rdev->rmmio) + reg);
	else {
		unsigned long flags;
		uint32_t ret;

		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
		ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);

		return ret;
	}
}

void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
		  bool always_indirect)
{
	if (reg < rdev->rmmio_size && !always_indirect)
		writel(v, ((void __iomem *)rdev->rmmio) + reg);
	else {
		unsigned long flags;

		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
	}
}

u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
{
	if (reg < rdev->rio_mem_size)
+36 −4
Original line number Diff line number Diff line
@@ -2357,10 +2357,42 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);

uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
		      bool always_indirect);
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
		  bool always_indirect);
#define RADEON_MIN_MMIO_SIZE 0x10000

static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
				    bool always_indirect)
{
	/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
		return readl(((void __iomem *)rdev->rmmio) + reg);
	else {
		unsigned long flags;
		uint32_t ret;

		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
		ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);

		return ret;
	}
}

static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
				bool always_indirect)
{
	if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
		writel(v, ((void __iomem *)rdev->rmmio) + reg);
	else {
		unsigned long flags;

		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
	}
}

u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);