Merge "msm: kgsl: Flush TLB when mapping to IOMMU pagetable" (c5759e1e) · Commits · e / devices / android_kernel_sony_msm8994

drivers/gpu/msm/adreno-gpulist.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -88,7 +88,7 @@ static const struct adreno_gpulist {
		.major = 3,
		.minor = 0,
		.patchid = ANY_ID,
		.features = ADRENO_USES_OCMEM,
		.features = ADRENO_USES_OCMEM \| IOMMU_FLUSH_TLB_ON_MAP,
		.pm4fw = "a330_pm4.fw",
		.pfpfw = "a330_pfp.fw",
		.gpudev = &adreno_a3xx_gpudev,
		@@ -112,7 +112,7 @@ static const struct adreno_gpulist {
		.patchid = 0x10,
		.pm4fw = "a330_pm4.fw",
		.pfpfw = "a330_pfp.fw",
		.features = ADRENO_USES_OCMEM,
		.features = ADRENO_USES_OCMEM \| IOMMU_FLUSH_TLB_ON_MAP,
		.gpudev = &adreno_a3xx_gpudev,
		.gmem_size = SZ_128K,
		.sync_lock_pm4_ver = NO_VER,
		@@ -129,7 +129,7 @@ static const struct adreno_gpulist {
		.major = 0,
		.minor = 5,
		.patchid = 0x12,
		.features = ADRENO_USES_OCMEM,
		.features = ADRENO_USES_OCMEM \| IOMMU_FLUSH_TLB_ON_MAP,
		.pm4fw = "a330_pm4.fw",
		.pfpfw = "a330_pfp.fw",
		.gpudev = &adreno_a3xx_gpudev,
		@@ -192,7 +192,7 @@ static const struct adreno_gpulist {
		.major = 2,
		.minor = 0,
		.patchid = ANY_ID,
		.features = ADRENO_USES_OCMEM,
		.features = ADRENO_USES_OCMEM \| IOMMU_FLUSH_TLB_ON_MAP,
		.pm4fw = "a420_pm4.fw",
		.pfpfw = "a420_pfp.fw",
		.gpudev = &adreno_a4xx_gpudev,

drivers/gpu/msm/adreno.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@

		/* Adreno core features */
		#define ADRENO_USES_OCMEM BIT(0)
		#define IOMMU_FLUSH_TLB_ON_MAP BIT(1)

		/* Flags to control command packet settings */
		#define KGSL_CMD_FLAGS_NONE 0

drivers/gpu/msm/kgsl_iommu.c

+56 −23

Original line number	Diff line number	Diff line
		@@ -1690,16 +1690,49 @@ done:
		return status;
		}

		/*
		* kgsl_iommu_flush_tlb_pt_current - Flush IOMMU TLB if pagetable is
		* currently used by GPU.
		* @pt - Pointer to kgsl pagetable structure
		*
		* Return - void
		*/
		static void kgsl_iommu_flush_tlb_pt_current(struct kgsl_pagetable *pt)
		{
		int lock_taken = 0;
		struct kgsl_device *device = pt->mmu->device;
		struct kgsl_iommu *iommu = pt->mmu->priv;

		/*
		* Check to see if the current thread already holds the device mutex.
		* If it does not, then take the device mutex which is required for
		* flushing the tlb
		*/
		if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner))
		lock_taken = 1;

		/*
		* Flush the tlb only if the iommu device is attached and the pagetable
		* hasn't been switched yet
		*/
		if (kgsl_mmu_is_perprocess(pt->mmu) &&
		iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached &&
		kgsl_iommu_pt_equal(pt->mmu, pt,
		kgsl_iommu_get_current_ptbase(pt->mmu)))
		kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH);

		if (lock_taken)
		kgsl_mutex_unlock(&device->mutex, &device->mutex_owner);
		}

		static int
		kgsl_iommu_unmap(struct kgsl_pagetable *pt,
		struct kgsl_memdesc *memdesc,
		unsigned int *tlb_flags)
		{
		int ret = 0, lock_taken = 0;
		int ret = 0;
		unsigned int range = memdesc->size;
		struct kgsl_iommu_pt *iommu_pt = pt->priv;
		struct kgsl_device *device = pt->mmu->device;
		struct kgsl_iommu *iommu = pt->mmu->priv;

		/* All GPU addresses as assigned are page aligned, but some
		functions purturb the gpuaddr with an offset, so apply the
		@@ -1721,26 +1754,7 @@ kgsl_iommu_unmap(struct kgsl_pagetable *pt,
		return ret;
		}

		/*
		* Check to see if the current thread already holds the device mutex.
		* If it does not, then take the device mutex which is required for
		* flushing the tlb
		*/
		if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner))
		lock_taken = 1;

		/*
		* Flush the tlb only if the iommu device is attached and the pagetable
		* hasn't been switched yet
		*/
		if (kgsl_mmu_is_perprocess(pt->mmu) &&
		iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached &&
		kgsl_iommu_pt_equal(pt->mmu, pt,
		kgsl_iommu_get_current_ptbase(pt->mmu)))
		kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH);

		if (lock_taken)
		kgsl_mutex_unlock(&device->mutex, &device->mutex_owner);
		kgsl_iommu_flush_tlb_pt_current(pt);

		return ret;
		}
		@@ -1755,6 +1769,8 @@ kgsl_iommu_map(struct kgsl_pagetable *pt,
		struct kgsl_iommu_pt *iommu_pt = pt->priv;
		size_t size = memdesc->size;
		unsigned int protflags;
		struct kgsl_device *device = pt->mmu->device;
		struct adreno_device *adreno_dev = ADRENO_DEVICE(device);

		BUG_ON(NULL == iommu_pt);

		@@ -1787,6 +1803,23 @@ kgsl_iommu_map(struct kgsl_pagetable *pt,
		size);
		}
		}

		/*
		* IOMMU V1 BFBs pre-fetch data beyond what is being used by the core.
		* This can include both allocated pages and un-allocated pages.
		* If an un-allocated page is cached, and later used (if it has been
		* newly dynamically allocated by SW) the SMMU HW should automatically
		* re-fetch the pages from memory (rather than using the cached
		* un-allocated page). This logic is known as the re-fetch logic.
		* In current chips we suspect this re-fetch logic is broken,
		* it can result in bad translations which can either cause downstream
		* bus errors, or upstream cores being hung (because of garbage data
		* being read) -> causing TLB sync stuck issues. As a result SW must
		* implement the invalidate+map.
		*/
		if (adreno_dev->features & IOMMU_FLUSH_TLB_ON_MAP)
		kgsl_iommu_flush_tlb_pt_current(pt);

		return ret;
		}