Loading drivers/gpu/msm/adreno-gpulist.h +4 −4 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ static const struct adreno_gpulist { .major = 3, .minor = 0, .patchid = ANY_ID, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .gpudev = &adreno_a3xx_gpudev, Loading @@ -112,7 +112,7 @@ static const struct adreno_gpulist { .patchid = 0x10, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .gpudev = &adreno_a3xx_gpudev, .gmem_size = SZ_128K, .sync_lock_pm4_ver = NO_VER, Loading @@ -129,7 +129,7 @@ static const struct adreno_gpulist { .major = 0, .minor = 5, .patchid = 0x12, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .gpudev = &adreno_a3xx_gpudev, Loading Loading @@ -192,7 +192,7 @@ static const struct adreno_gpulist { .major = 2, .minor = 0, .patchid = ANY_ID, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a420_pm4.fw", .pfpfw = "a420_pfp.fw", .gpudev = &adreno_a4xx_gpudev, Loading drivers/gpu/msm/adreno.h +1 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ /* Adreno core features */ #define ADRENO_USES_OCMEM BIT(0) #define IOMMU_FLUSH_TLB_ON_MAP BIT(1) /* Flags to control command packet settings */ #define KGSL_CMD_FLAGS_NONE 0 Loading drivers/gpu/msm/kgsl_iommu.c +56 −23 Original line number Diff line number Diff line Loading @@ -1690,16 +1690,49 @@ done: return status; } /* * kgsl_iommu_flush_tlb_pt_current - Flush IOMMU TLB if pagetable is * currently used by GPU. * @pt - Pointer to kgsl pagetable structure * * Return - void */ static void kgsl_iommu_flush_tlb_pt_current(struct kgsl_pagetable *pt) { int lock_taken = 0; struct kgsl_device *device = pt->mmu->device; struct kgsl_iommu *iommu = pt->mmu->priv; /* * Check to see if the current thread already holds the device mutex. * If it does not, then take the device mutex which is required for * flushing the tlb */ if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner)) lock_taken = 1; /* * Flush the tlb only if the iommu device is attached and the pagetable * hasn't been switched yet */ if (kgsl_mmu_is_perprocess(pt->mmu) && iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached && kgsl_iommu_pt_equal(pt->mmu, pt, kgsl_iommu_get_current_ptbase(pt->mmu))) kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH); if (lock_taken) kgsl_mutex_unlock(&device->mutex, &device->mutex_owner); } static int kgsl_iommu_unmap(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, unsigned int *tlb_flags) { int ret = 0, lock_taken = 0; int ret = 0; unsigned int range = memdesc->size; struct kgsl_iommu_pt *iommu_pt = pt->priv; struct kgsl_device *device = pt->mmu->device; struct kgsl_iommu *iommu = pt->mmu->priv; /* All GPU addresses as assigned are page aligned, but some functions purturb the gpuaddr with an offset, so apply the Loading @@ -1721,26 +1754,7 @@ kgsl_iommu_unmap(struct kgsl_pagetable *pt, return ret; } /* * Check to see if the current thread already holds the device mutex. * If it does not, then take the device mutex which is required for * flushing the tlb */ if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner)) lock_taken = 1; /* * Flush the tlb only if the iommu device is attached and the pagetable * hasn't been switched yet */ if (kgsl_mmu_is_perprocess(pt->mmu) && iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached && kgsl_iommu_pt_equal(pt->mmu, pt, kgsl_iommu_get_current_ptbase(pt->mmu))) kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH); if (lock_taken) kgsl_mutex_unlock(&device->mutex, &device->mutex_owner); kgsl_iommu_flush_tlb_pt_current(pt); return ret; } Loading @@ -1755,6 +1769,8 @@ kgsl_iommu_map(struct kgsl_pagetable *pt, struct kgsl_iommu_pt *iommu_pt = pt->priv; size_t size = memdesc->size; unsigned int protflags; struct kgsl_device *device = pt->mmu->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); BUG_ON(NULL == iommu_pt); Loading Loading @@ -1787,6 +1803,23 @@ kgsl_iommu_map(struct kgsl_pagetable *pt, size); } } /* * IOMMU V1 BFBs pre-fetch data beyond what is being used by the core. * This can include both allocated pages and un-allocated pages. * If an un-allocated page is cached, and later used (if it has been * newly dynamically allocated by SW) the SMMU HW should automatically * re-fetch the pages from memory (rather than using the cached * un-allocated page). This logic is known as the re-fetch logic. * In current chips we suspect this re-fetch logic is broken, * it can result in bad translations which can either cause downstream * bus errors, or upstream cores being hung (because of garbage data * being read) -> causing TLB sync stuck issues. As a result SW must * implement the invalidate+map. */ if (adreno_dev->features & IOMMU_FLUSH_TLB_ON_MAP) kgsl_iommu_flush_tlb_pt_current(pt); return ret; } Loading Loading
drivers/gpu/msm/adreno-gpulist.h +4 −4 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ static const struct adreno_gpulist { .major = 3, .minor = 0, .patchid = ANY_ID, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .gpudev = &adreno_a3xx_gpudev, Loading @@ -112,7 +112,7 @@ static const struct adreno_gpulist { .patchid = 0x10, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .gpudev = &adreno_a3xx_gpudev, .gmem_size = SZ_128K, .sync_lock_pm4_ver = NO_VER, Loading @@ -129,7 +129,7 @@ static const struct adreno_gpulist { .major = 0, .minor = 5, .patchid = 0x12, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a330_pm4.fw", .pfpfw = "a330_pfp.fw", .gpudev = &adreno_a3xx_gpudev, Loading Loading @@ -192,7 +192,7 @@ static const struct adreno_gpulist { .major = 2, .minor = 0, .patchid = ANY_ID, .features = ADRENO_USES_OCMEM, .features = ADRENO_USES_OCMEM | IOMMU_FLUSH_TLB_ON_MAP, .pm4fw = "a420_pm4.fw", .pfpfw = "a420_pfp.fw", .gpudev = &adreno_a4xx_gpudev, Loading
drivers/gpu/msm/adreno.h +1 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ /* Adreno core features */ #define ADRENO_USES_OCMEM BIT(0) #define IOMMU_FLUSH_TLB_ON_MAP BIT(1) /* Flags to control command packet settings */ #define KGSL_CMD_FLAGS_NONE 0 Loading
drivers/gpu/msm/kgsl_iommu.c +56 −23 Original line number Diff line number Diff line Loading @@ -1690,16 +1690,49 @@ done: return status; } /* * kgsl_iommu_flush_tlb_pt_current - Flush IOMMU TLB if pagetable is * currently used by GPU. * @pt - Pointer to kgsl pagetable structure * * Return - void */ static void kgsl_iommu_flush_tlb_pt_current(struct kgsl_pagetable *pt) { int lock_taken = 0; struct kgsl_device *device = pt->mmu->device; struct kgsl_iommu *iommu = pt->mmu->priv; /* * Check to see if the current thread already holds the device mutex. * If it does not, then take the device mutex which is required for * flushing the tlb */ if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner)) lock_taken = 1; /* * Flush the tlb only if the iommu device is attached and the pagetable * hasn't been switched yet */ if (kgsl_mmu_is_perprocess(pt->mmu) && iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached && kgsl_iommu_pt_equal(pt->mmu, pt, kgsl_iommu_get_current_ptbase(pt->mmu))) kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH); if (lock_taken) kgsl_mutex_unlock(&device->mutex, &device->mutex_owner); } static int kgsl_iommu_unmap(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, unsigned int *tlb_flags) { int ret = 0, lock_taken = 0; int ret = 0; unsigned int range = memdesc->size; struct kgsl_iommu_pt *iommu_pt = pt->priv; struct kgsl_device *device = pt->mmu->device; struct kgsl_iommu *iommu = pt->mmu->priv; /* All GPU addresses as assigned are page aligned, but some functions purturb the gpuaddr with an offset, so apply the Loading @@ -1721,26 +1754,7 @@ kgsl_iommu_unmap(struct kgsl_pagetable *pt, return ret; } /* * Check to see if the current thread already holds the device mutex. * If it does not, then take the device mutex which is required for * flushing the tlb */ if (!kgsl_mutex_lock(&device->mutex, &device->mutex_owner)) lock_taken = 1; /* * Flush the tlb only if the iommu device is attached and the pagetable * hasn't been switched yet */ if (kgsl_mmu_is_perprocess(pt->mmu) && iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached && kgsl_iommu_pt_equal(pt->mmu, pt, kgsl_iommu_get_current_ptbase(pt->mmu))) kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH); if (lock_taken) kgsl_mutex_unlock(&device->mutex, &device->mutex_owner); kgsl_iommu_flush_tlb_pt_current(pt); return ret; } Loading @@ -1755,6 +1769,8 @@ kgsl_iommu_map(struct kgsl_pagetable *pt, struct kgsl_iommu_pt *iommu_pt = pt->priv; size_t size = memdesc->size; unsigned int protflags; struct kgsl_device *device = pt->mmu->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); BUG_ON(NULL == iommu_pt); Loading Loading @@ -1787,6 +1803,23 @@ kgsl_iommu_map(struct kgsl_pagetable *pt, size); } } /* * IOMMU V1 BFBs pre-fetch data beyond what is being used by the core. * This can include both allocated pages and un-allocated pages. * If an un-allocated page is cached, and later used (if it has been * newly dynamically allocated by SW) the SMMU HW should automatically * re-fetch the pages from memory (rather than using the cached * un-allocated page). This logic is known as the re-fetch logic. * In current chips we suspect this re-fetch logic is broken, * it can result in bad translations which can either cause downstream * bus errors, or upstream cores being hung (because of garbage data * being read) -> causing TLB sync stuck issues. As a result SW must * implement the invalidate+map. */ if (adreno_dev->features & IOMMU_FLUSH_TLB_ON_MAP) kgsl_iommu_flush_tlb_pt_current(pt); return ret; } Loading