Merge branch 'core' into arm/tegra (fa94cf84) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/iommu/amd_iommu.c

+9 −220

Original line number	Diff line number	Diff line
		@@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache;
		static void update_domain(struct protection_domain *domain);
		static int protection_domain_init(struct protection_domain *domain);
		static void detach_device(struct device *dev);

		#define FLUSH_QUEUE_SIZE 256

		struct flush_queue_entry {
		unsigned long iova_pfn;
		unsigned long pages;
		u64 counter; /* Flush counter when this entry was added to the queue */
		};

		struct flush_queue {
		struct flush_queue_entry *entries;
		unsigned head, tail;
		spinlock_t lock;
		};
		static void iova_domain_flush_tlb(struct iova_domain *iovad);

		/*
		* Data container for a dma_ops specific protection domain
		@@ -161,36 +148,6 @@ struct dma_ops_domain {

		/* IOVA RB-Tree */
		struct iova_domain iovad;

		struct flush_queue __percpu *flush_queue;

		/*
		* We need two counter here to be race-free wrt. IOTLB flushing and
		* adding entries to the flush queue.
		*
		* The flush_start_cnt is incremented _before_ the IOTLB flush starts.
		* New entries added to the flush ring-buffer get their 'counter' value
		* from here. This way we can make sure that entries added to the queue
		* (or other per-cpu queues of the same domain) while the TLB is about
		* to be flushed are not considered to be flushed already.
		*/
		atomic64_t flush_start_cnt;

		/*
		* The flush_finish_cnt is incremented when an IOTLB flush is complete.
		* This value is always smaller than flush_start_cnt. The queue_add
		* function frees all IOVAs that have a counter value smaller than
		* flush_finish_cnt. This makes sure that we only free IOVAs that are
		* flushed out of the IOTLB of the domain.
		*/
		atomic64_t flush_finish_cnt;

		/*
		* Timer to make sure we don't keep IOVAs around unflushed
		* for too long
		*/
		struct timer_list flush_timer;
		atomic_t flush_timer_on;
		};

		static struct iova_domain reserved_iova_ranges;
		@@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain)
		free_page((unsigned long)domain->gcr3_tbl);
		}

		static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
		{
		int cpu;

		for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		kfree(queue->entries);
		}

		free_percpu(dom->flush_queue);

		dom->flush_queue = NULL;
		}

		static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
		{
		int cpu;

		atomic64_set(&dom->flush_start_cnt, 0);
		atomic64_set(&dom->flush_finish_cnt, 0);

		dom->flush_queue = alloc_percpu(struct flush_queue);
		if (!dom->flush_queue)
		return -ENOMEM;

		/* First make sure everything is cleared */
		for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		queue->head = 0;
		queue->tail = 0;
		queue->entries = NULL;
		}

		/* Now start doing the allocation */
		for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
		GFP_KERNEL);
		if (!queue->entries) {
		dma_ops_domain_free_flush_queue(dom);
		return -ENOMEM;
		}

		spin_lock_init(&queue->lock);
		}

		return 0;
		}

		static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
		{
		atomic64_inc(&dom->flush_start_cnt);
		domain_flush_tlb(&dom->domain);
		domain_flush_complete(&dom->domain);
		atomic64_inc(&dom->flush_finish_cnt);
		}

		static inline bool queue_ring_full(struct flush_queue *queue)
		{
		assert_spin_locked(&queue->lock);

		return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
		}

		#define queue_ring_for_each(i, q) \
		for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)

		static inline unsigned queue_ring_add(struct flush_queue *queue)
		static void iova_domain_flush_tlb(struct iova_domain *iovad)
		{
		unsigned idx = queue->tail;
		struct dma_ops_domain *dom;

		assert_spin_locked(&queue->lock);
		queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
		dom = container_of(iovad, struct dma_ops_domain, iovad);

		return idx;
		}

		static inline void queue_ring_remove_head(struct flush_queue *queue)
		{
		assert_spin_locked(&queue->lock);
		queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
		}

		static void queue_ring_free_flushed(struct dma_ops_domain *dom,
		struct flush_queue *queue)
		{
		u64 counter = atomic64_read(&dom->flush_finish_cnt);
		int idx;

		queue_ring_for_each(idx, queue) {
		/*
		* This assumes that counter values in the ring-buffer are
		* monotonously rising.
		*/
		if (queue->entries[idx].counter >= counter)
		break;

		free_iova_fast(&dom->iovad,
		queue->entries[idx].iova_pfn,
		queue->entries[idx].pages);

		queue_ring_remove_head(queue);
		}
		}

		static void queue_add(struct dma_ops_domain *dom,
		unsigned long address, unsigned long pages)
		{
		struct flush_queue *queue;
		unsigned long flags;
		int idx;

		pages = __roundup_pow_of_two(pages);
		address >>= PAGE_SHIFT;

		queue = get_cpu_ptr(dom->flush_queue);
		spin_lock_irqsave(&queue->lock, flags);

		/*
		* First remove the enries from the ring-buffer that are already
		* flushed to make the below queue_ring_full() check less likely
		*/
		queue_ring_free_flushed(dom, queue);

		/*
		* When ring-queue is full, flush the entries from the IOTLB so
		* that we can free all entries with queue_ring_free_flushed()
		* below.
		*/
		if (queue_ring_full(queue)) {
		dma_ops_domain_flush_tlb(dom);
		queue_ring_free_flushed(dom, queue);
		}

		idx = queue_ring_add(queue);

		queue->entries[idx].iova_pfn = address;
		queue->entries[idx].pages = pages;
		queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt);

		spin_unlock_irqrestore(&queue->lock, flags);

		if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));

		put_cpu_ptr(dom->flush_queue);
		}

		static void queue_flush_timeout(unsigned long data)
		{
		struct dma_ops_domain dom = (struct dma_ops_domain )data;
		int cpu;

		atomic_set(&dom->flush_timer_on, 0);

		dma_ops_domain_flush_tlb(dom);

		for_each_possible_cpu(cpu) {
		struct flush_queue *queue;
		unsigned long flags;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		spin_lock_irqsave(&queue->lock, flags);
		queue_ring_free_flushed(dom, queue);
		spin_unlock_irqrestore(&queue->lock, flags);
		}
		}

		/*
		@@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)

		del_domain_from_list(&dom->domain);

		if (timer_pending(&dom->flush_timer))
		del_timer(&dom->flush_timer);

		dma_ops_domain_free_flush_queue(dom);

		put_iova_domain(&dom->iovad);

		free_pagetable(&dom->domain);
		@@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
		init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
		IOVA_START_PFN, DMA_32BIT_PFN);

		/* Initialize reserved ranges */
		copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);

		if (dma_ops_domain_alloc_flush_queue(dma_dom))
		if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
		goto free_dma_dom;

		setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
		(unsigned long)dma_dom);

		atomic_set(&dma_dom->flush_timer_on, 0);
		/* Initialize reserved ranges */
		copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);

		add_domain_to_list(&dma_dom->domain);

		@@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
		domain_flush_tlb(&dma_dom->domain);
		domain_flush_complete(&dma_dom->domain);
		} else {
		queue_add(dma_dom, dma_addr, pages);
		pages = __roundup_pow_of_two(pages);
		queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
		}
		}

drivers/iommu/dmar.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,

		if (mask) {
		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
		addr \|= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
		addr \|= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
		desc.high = QI_DEV_IOTLB_ADDR(addr) \| QI_DEV_IOTLB_SIZE;
		} else
		desc.high = QI_DEV_IOTLB_ADDR(addr);

drivers/iommu/fsl_pamu.c

+8 −12

Original line number	Diff line number	Diff line
		@@ -530,8 +530,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
		if (node) {
		prop = of_get_property(node, "cache-stash-id", NULL);
		if (!prop) {
		pr_debug("missing cache-stash-id at %s\n",
		node->full_name);
		pr_debug("missing cache-stash-id at %pOF\n",
		node);
		of_node_put(node);
		return ~(u32)0;
		}
		@@ -557,8 +557,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
		if (stash_dest_hint == cache_level) {
		prop = of_get_property(node, "cache-stash-id", NULL);
		if (!prop) {
		pr_debug("missing cache-stash-id at %s\n",
		node->full_name);
		pr_debug("missing cache-stash-id at %pOF\n",
		node);
		of_node_put(node);
		return ~(u32)0;
		}
		@@ -568,8 +568,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)

		prop = of_get_property(node, "next-level-cache", NULL);
		if (!prop) {
		pr_debug("can't find next-level-cache at %s\n",
		node->full_name);
		pr_debug("can't find next-level-cache at %pOF\n", node);
		of_node_put(node);
		return ~(u32)0; /* can't traverse any further */
		}
		@@ -1063,8 +1062,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)

		guts_node = of_find_matching_node(NULL, guts_device_ids);
		if (!guts_node) {
		dev_err(dev, "could not find GUTS node %s\n",
		dev->of_node->full_name);
		dev_err(dev, "could not find GUTS node %pOF\n", dev->of_node);
		ret = -ENODEV;
		goto error;
		}
		@@ -1246,8 +1244,7 @@ static __init int fsl_pamu_init(void)

		pdev = platform_device_alloc("fsl-of-pamu", 0);
		if (!pdev) {
		pr_err("could not allocate device %s\n",
		np->full_name);
		pr_err("could not allocate device %pOF\n", np);
		ret = -ENOMEM;
		goto error_device_alloc;
		}
		@@ -1259,8 +1256,7 @@ static __init int fsl_pamu_init(void)

		ret = platform_device_add(pdev);
		if (ret) {
		pr_err("could not add device %s (err=%i)\n",
		np->full_name, ret);
		pr_err("could not add device %pOF (err=%i)\n", np, ret);
		goto error_device_add;
		}

drivers/iommu/fsl_pamu_domain.c

+4 −6

Original line number	Diff line number	Diff line
		@@ -619,8 +619,8 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain,
		for (i = 0; i < num; i++) {
		/* Ensure that LIODN value is valid */
		if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
		pr_debug("Invalid liodn %d, attach device failed for %s\n",
		liodn[i], dev->of_node->full_name);
		pr_debug("Invalid liodn %d, attach device failed for %pOF\n",
		liodn[i], dev->of_node);
		ret = -EINVAL;
		break;
		}
		@@ -684,8 +684,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
		liodn_cnt = len / sizeof(u32);
		ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt);
		} else {
		pr_debug("missing fsl,liodn property at %s\n",
		dev->of_node->full_name);
		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
		ret = -EINVAL;
		}

		@@ -720,8 +719,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain,
		if (prop)
		detach_device(dev, dma_domain);
		else
		pr_debug("missing fsl,liodn property at %s\n",
		dev->of_node->full_name);
		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
		}

		static int configure_domain_geometry(struct iommu_domain domain, void data)

drivers/iommu/intel-iommu.c

+38 −159

Original line number	Diff line number	Diff line
		@@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units);
		#define for_each_rmrr_units(rmrr) \
		list_for_each_entry(rmrr, &dmar_rmrr_units, list)

		static void flush_unmaps_timeout(unsigned long data);

		struct deferred_flush_entry {
		unsigned long iova_pfn;
		unsigned long nrpages;
		struct dmar_domain *domain;
		struct page *freelist;
		};

		#define HIGH_WATER_MARK 250
		struct deferred_flush_table {
		int next;
		struct deferred_flush_entry entries[HIGH_WATER_MARK];
		};

		struct deferred_flush_data {
		spinlock_t lock;
		int timer_on;
		struct timer_list timer;
		long size;
		struct deferred_flush_table *tables;
		};

		static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);

		/* bitmap for indexing intel_iommus */
		static int g_num_of_iommus;

		@@ -1309,6 +1284,13 @@ static void dma_free_pagelist(struct page *freelist)
		}
		}

		static void iova_entry_free(unsigned long data)
		{
		struct page freelist = (struct page )data;

		dma_free_pagelist(freelist);
		}

		/* iommu handling */
		static int iommu_alloc_root_entry(struct intel_iommu *iommu)
		{
		@@ -1622,6 +1604,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
		addr, mask);
		}

		static void iommu_flush_iova(struct iova_domain *iovad)
		{
		struct dmar_domain *domain;
		int idx;

		domain = container_of(iovad, struct dmar_domain, iovad);

		for_each_domain_iommu(idx, domain) {
		struct intel_iommu *iommu = g_iommus[idx];
		u16 did = domain->iommu_did[iommu->seq_id];

		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);

		if (!cap_caching_mode(iommu->cap))
		iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
		0, MAX_AGAW_PFN_WIDTH);
		}
		}

		static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
		{
		u32 pmen;
		@@ -1932,9 +1933,16 @@ static int domain_init(struct dmar_domain domain, struct intel_iommu iommu,
		{
		int adjust_width, agaw;
		unsigned long sagaw;
		int err;

		init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
		DMA_32BIT_PFN);

		err = init_iova_flush_queue(&domain->iovad,
		iommu_flush_iova, iova_entry_free);
		if (err)
		return err;

		domain_reserve_special_ranges(domain);

		/* calculate AGAW */
		@@ -1986,14 +1994,6 @@ static void domain_exit(struct dmar_domain *domain)
		if (!domain)
		return;

		/* Flush any lazy unmaps that may reference this domain */
		if (!intel_iommu_strict) {
		int cpu;

		for_each_possible_cpu(cpu)
		flush_unmaps_timeout(cpu);
		}

		/* Remove associated devices and clear attached or cached domains */
		rcu_read_lock();
		domain_remove_dev_info(domain);
		@@ -3206,7 +3206,7 @@ static int __init init_dmars(void)
		bool copied_tables = false;
		struct device *dev;
		struct intel_iommu *iommu;
		int i, ret, cpu;
		int i, ret;

		/*
		* for each drhd
		@@ -3239,22 +3239,6 @@ static int __init init_dmars(void)
		goto error;
		}

		for_each_possible_cpu(cpu) {
		struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
		cpu);

		dfd->tables = kzalloc(g_num_of_iommus *
		sizeof(struct deferred_flush_table),
		GFP_KERNEL);
		if (!dfd->tables) {
		ret = -ENOMEM;
		goto free_g_iommus;
		}

		spin_lock_init(&dfd->lock);
		setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
		}

		for_each_active_iommu(iommu, drhd) {
		g_iommus[iommu->seq_id] = iommu;

		@@ -3437,10 +3421,9 @@ static int __init init_dmars(void)
		disable_dmar_iommu(iommu);
		free_dmar_iommu(iommu);
		}
		free_g_iommus:
		for_each_possible_cpu(cpu)
		kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);

		kfree(g_iommus);

		error:
		return ret;
		}
		@@ -3645,110 +3628,6 @@ static dma_addr_t intel_map_page(struct device dev, struct page page,
		dir, *dev->dma_mask);
		}

		static void flush_unmaps(struct deferred_flush_data *flush_data)
		{
		int i, j;

		flush_data->timer_on = 0;

		/* just flush them all */
		for (i = 0; i < g_num_of_iommus; i++) {
		struct intel_iommu *iommu = g_iommus[i];
		struct deferred_flush_table *flush_table =
		&flush_data->tables[i];
		if (!iommu)
		continue;

		if (!flush_table->next)
		continue;

		/* In caching mode, global flushes turn emulation expensive */
		if (!cap_caching_mode(iommu->cap))
		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
		DMA_TLB_GLOBAL_FLUSH);
		for (j = 0; j < flush_table->next; j++) {
		unsigned long mask;
		struct deferred_flush_entry *entry =
		&flush_table->entries[j];
		unsigned long iova_pfn = entry->iova_pfn;
		unsigned long nrpages = entry->nrpages;
		struct dmar_domain *domain = entry->domain;
		struct page *freelist = entry->freelist;

		/* On real hardware multiple invalidations are expensive */
		if (cap_caching_mode(iommu->cap))
		iommu_flush_iotlb_psi(iommu, domain,
		mm_to_dma_pfn(iova_pfn),
		nrpages, !freelist, 0);
		else {
		mask = ilog2(nrpages);
		iommu_flush_dev_iotlb(domain,
		(uint64_t)iova_pfn << PAGE_SHIFT, mask);
		}
		free_iova_fast(&domain->iovad, iova_pfn, nrpages);
		if (freelist)
		dma_free_pagelist(freelist);
		}
		flush_table->next = 0;
		}

		flush_data->size = 0;
		}

		static void flush_unmaps_timeout(unsigned long cpuid)
		{
		struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
		unsigned long flags;

		spin_lock_irqsave(&flush_data->lock, flags);
		flush_unmaps(flush_data);
		spin_unlock_irqrestore(&flush_data->lock, flags);
		}

		static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
		unsigned long nrpages, struct page *freelist)
		{
		unsigned long flags;
		int entry_id, iommu_id;
		struct intel_iommu *iommu;
		struct deferred_flush_entry *entry;
		struct deferred_flush_data *flush_data;

		flush_data = raw_cpu_ptr(&deferred_flush);

		/* Flush all CPUs' entries to avoid deferring too much. If
		* this becomes a bottleneck, can just flush us, and rely on
		* flush timer for the rest.
		*/
		if (flush_data->size == HIGH_WATER_MARK) {
		int cpu;

		for_each_online_cpu(cpu)
		flush_unmaps_timeout(cpu);
		}

		spin_lock_irqsave(&flush_data->lock, flags);

		iommu = domain_get_iommu(dom);
		iommu_id = iommu->seq_id;

		entry_id = flush_data->tables[iommu_id].next;
		++(flush_data->tables[iommu_id].next);

		entry = &flush_data->tables[iommu_id].entries[entry_id];
		entry->domain = dom;
		entry->iova_pfn = iova_pfn;
		entry->nrpages = nrpages;
		entry->freelist = freelist;

		if (!flush_data->timer_on) {
		mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
		flush_data->timer_on = 1;
		}
		flush_data->size++;
		spin_unlock_irqrestore(&flush_data->lock, flags);
		}

		static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
		{
		struct dmar_domain *domain;
		@@ -3784,7 +3663,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
		dma_free_pagelist(freelist);
		} else {
		add_unmap(domain, iova_pfn, nrpages, freelist);
		queue_iova(&domain->iovad, iova_pfn, nrpages,
		(unsigned long)freelist);
		/*
		* queue up the release of the unmap to save the 1/6th of the
		* cpu used up by the iotlb flush operation...
		@@ -4721,7 +4601,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
		static int intel_iommu_cpu_dead(unsigned int cpu)
		{
		free_all_cpu_cached_iovas(cpu);
		flush_unmaps_timeout(cpu);
		return 0;
		}