Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx (d9e8a3a5) · Commits · e / devices / android_kernel_xiaomi_markw

Documentation/crypto/async-tx-api.txt

+44 −52

Original line number	Diff line number	Diff line
		@@ -13,9 +13,9 @@
		3.6 Constraints
		3.7 Example

		4 DRIVER DEVELOPER NOTES
		4 DMAENGINE DRIVER DEVELOPER NOTES
		4.1 Conformance points
		4.2 "My application needs finer control of hardware channels"
		4.2 "My application needs exclusive control of hardware channels"

		5 SOURCE

		@@ -150,6 +150,7 @@ ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
		implementation examples.

		4 DRIVER DEVELOPMENT NOTES

		4.1 Conformance points:
		There are a few conformance points required in dmaengine drivers to
		accommodate assumptions made by applications using the async_tx API:
		@@ -158,58 +159,49 @@ accommodate assumptions made by applications using the async_tx API:
		3/ Use async_tx_run_dependencies() in the descriptor clean up path to
		handle submission of dependent operations

		4.2 "My application needs finer control of hardware channels"
		This requirement seems to arise from cases where a DMA engine driver is
		trying to support device-to-memory DMA. The dmaengine and async_tx
		implementations were designed for offloading memory-to-memory
		operations; however, there are some capabilities of the dmaengine layer
		that can be used for platform-specific channel management.
		Platform-specific constraints can be handled by registering the
		application as a 'dma_client' and implementing a 'dma_event_callback' to
		apply a filter to the available channels in the system. Before showing
		how to implement a custom dma_event callback some background of
		dmaengine's client support is required.

		The following routines in dmaengine support multiple clients requesting
		use of a channel:
		- dma_async_client_register(struct dma_client *client)
		- dma_async_client_chan_request(struct dma_client *client)

		dma_async_client_register takes a pointer to an initialized dma_client
		structure. It expects that the 'event_callback' and 'cap_mask' fields
		are already initialized.

		dma_async_client_chan_request triggers dmaengine to notify the client of
		all channels that satisfy the capability mask. It is up to the client's
		event_callback routine to track how many channels the client needs and
		how many it is currently using. The dma_event_callback routine returns a
		dma_state_client code to let dmaengine know the status of the
		allocation.

		Below is the example of how to extend this functionality for
		platform-specific filtering of the available channels beyond the
		standard capability mask:

		static enum dma_state_client
		my_dma_client_callback(struct dma_client *client,
		struct dma_chan *chan, enum dma_state state)
		{
		struct dma_device *dma_dev;
		struct my_platform_specific_dma *plat_dma_dev;

		dma_dev = chan->device;
		plat_dma_dev = container_of(dma_dev,
		struct my_platform_specific_dma,
		dma_dev);

		if (!plat_dma_dev->platform_specific_capability)
		return DMA_DUP;

		. . .
		}
		4.2 "My application needs exclusive control of hardware channels"
		Primarily this requirement arises from cases where a DMA engine driver
		is being used to support device-to-memory operations. A channel that is
		performing these operations cannot, for many platform specific reasons,
		be shared. For these cases the dma_request_channel() interface is
		provided.

		The interface is:
		struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
		dma_filter_fn filter_fn,
		void *filter_param);

		Where dma_filter_fn is defined as:
		typedef bool (dma_filter_fn)(struct dma_chan chan, void *filter_param);

		When the optional 'filter_fn' parameter is set to NULL
		dma_request_channel simply returns the first channel that satisfies the
		capability mask. Otherwise, when the mask parameter is insufficient for
		specifying the necessary channel, the filter_fn routine can be used to
		disposition the available channels in the system. The filter_fn routine
		is called once for each free channel in the system. Upon seeing a
		suitable channel filter_fn returns DMA_ACK which flags that channel to
		be the return value from dma_request_channel. A channel allocated via
		this interface is exclusive to the caller, until dma_release_channel()
		is called.

		The DMA_PRIVATE capability flag is used to tag dma devices that should
		not be used by the general-purpose allocator. It can be set at
		initialization time if it is known that a channel will always be
		private. Alternatively, it is set when dma_request_channel() finds an
		unused "public" channel.

		A couple caveats to note when implementing a driver and consumer:
		1/ Once a channel has been privately allocated it will no longer be
		considered by the general-purpose allocator even after a call to
		dma_release_channel().
		2/ Since capabilities are specified at the device level a dma_device
		with multiple channels will either have all channels public, or all
		channels private.

		5 SOURCE
		include/linux/dmaengine.h: core header file for DMA drivers and clients

		include/linux/dmaengine.h: core header file for DMA drivers and api users
		drivers/dma/dmaengine.c: offload engine channel management routines
		drivers/dma/: location for offload engine drivers
		include/linux/async_tx.h: core header file for the async_tx api

Documentation/dmaengine.txt

0 → 100644

+1 −0

Original line number	Diff line number	Diff line
		See Documentation/crypto/async-tx-api.txt

arch/avr32/mach-at32ap/at32ap700x.c

+3 −12

Original line number	Diff line number	Diff line
		@@ -1305,7 +1305,7 @@ struct platform_device *__init
		at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
		{
		struct platform_device *pdev;
		struct dw_dma_slave *dws;
		struct dw_dma_slave *dws = &data->dma_slave;
		u32 pioa_mask;
		u32 piob_mask;

		@@ -1324,22 +1324,13 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
		ARRAY_SIZE(atmel_mci0_resource)))
		goto fail;

		if (data->dma_slave)
		dws = kmemdup(to_dw_dma_slave(data->dma_slave),
		sizeof(struct dw_dma_slave), GFP_KERNEL);
		else
		dws = kzalloc(sizeof(struct dw_dma_slave), GFP_KERNEL);

		dws->slave.dev = &pdev->dev;
		dws->slave.dma_dev = &dw_dmac0_device.dev;
		dws->slave.reg_width = DMA_SLAVE_WIDTH_32BIT;
		dws->dma_dev = &dw_dmac0_device.dev;
		dws->reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
		dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
		\| DWC_CFGH_DST_PER(1));
		dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
		\| DWC_CFGL_HS_SRC_POL);

		data->dma_slave = &dws->slave;

		if (platform_device_add_data(pdev, data,
		sizeof(struct mci_platform_data)))
		goto fail;

crypto/async_tx/async_tx.c

+5 −345

Original line number	Diff line number	Diff line
		@@ -28,351 +28,18 @@
		#include <linux/async_tx.h>

		#ifdef CONFIG_DMA_ENGINE
		static enum dma_state_client
		dma_channel_add_remove(struct dma_client *client,
		struct dma_chan *chan, enum dma_state state);

		static struct dma_client async_tx_dma = {
		.event_callback = dma_channel_add_remove,
		/* .cap_mask == 0 defaults to all channels */
		};

		/**
		* dma_cap_mask_all - enable iteration over all operation types
		*/
		static dma_cap_mask_t dma_cap_mask_all;

		/**
		* chan_ref_percpu - tracks channel allocations per core/opertion
		*/
		struct chan_ref_percpu {
		struct dma_chan_ref *ref;
		};

		static int channel_table_initialized;
		static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];

		/**
		* async_tx_lock - protect modification of async_tx_master_list and serialize
		* rebalance operations
		*/
		static spinlock_t async_tx_lock;

		static LIST_HEAD(async_tx_master_list);

		/* async_tx_issue_pending_all - start all transactions on all channels */
		void async_tx_issue_pending_all(void)
		{
		struct dma_chan_ref *ref;

		rcu_read_lock();
		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
		ref->chan->device->device_issue_pending(ref->chan);
		rcu_read_unlock();
		}
		EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);

		/* dma_wait_for_async_tx - spin wait for a transcation to complete
		* @tx: transaction to wait on
		*/
		enum dma_status
		dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
		{
		enum dma_status status;
		struct dma_async_tx_descriptor *iter;
		struct dma_async_tx_descriptor *parent;

		if (!tx)
		return DMA_SUCCESS;

		/* poll through the dependency chain, return when tx is complete */
		do {
		iter = tx;

		/* find the root of the unsubmitted dependency chain */
		do {
		parent = iter->parent;
		if (!parent)
		break;
		else
		iter = parent;
		} while (parent);

		/* there is a small window for ->parent == NULL and
		* ->cookie == -EBUSY
		*/
		while (iter->cookie == -EBUSY)
		cpu_relax();

		status = dma_sync_wait(iter->chan, iter->cookie);
		} while (status == DMA_IN_PROGRESS \|\| (iter != tx));

		return status;
		}
		EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);

		/* async_tx_run_dependencies - helper routine for dma drivers to process
		* (start) dependent operations on their target channel
		* @tx: transaction with dependencies
		*/
		void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
		{
		struct dma_async_tx_descriptor *dep = tx->next;
		struct dma_async_tx_descriptor *dep_next;
		struct dma_chan *chan;

		if (!dep)
		return;

		chan = dep->chan;

		/* keep submitting up until a channel switch is detected
		* in that case we will be called again as a result of
		* processing the interrupt from async_tx_channel_switch
		*/
		for (; dep; dep = dep_next) {
		spin_lock_bh(&dep->lock);
		dep->parent = NULL;
		dep_next = dep->next;
		if (dep_next && dep_next->chan == chan)
		dep->next = NULL; /* ->next will be submitted */
		else
		dep_next = NULL; /* submit current dep and terminate */
		spin_unlock_bh(&dep->lock);

		dep->tx_submit(dep);
		}

		chan->device->device_issue_pending(chan);
		}
		EXPORT_SYMBOL_GPL(async_tx_run_dependencies);

		static void
		free_dma_chan_ref(struct rcu_head *rcu)
		{
		struct dma_chan_ref *ref;
		ref = container_of(rcu, struct dma_chan_ref, rcu);
		kfree(ref);
		}

		static void
		init_dma_chan_ref(struct dma_chan_ref ref, struct dma_chan chan)
		{
		INIT_LIST_HEAD(&ref->node);
		INIT_RCU_HEAD(&ref->rcu);
		ref->chan = chan;
		atomic_set(&ref->count, 0);
		}

		/**
		* get_chan_ref_by_cap - returns the nth channel of the given capability
		* defaults to returning the channel with the desired capability and the
		* lowest reference count if the index can not be satisfied
		* @cap: capability to match
		* @index: nth channel desired, passing -1 has the effect of forcing the
		* default return value
		*/
		static struct dma_chan_ref *
		get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
		{
		struct dma_chan_ref ret_ref = NULL, min_ref = NULL, *ref;

		rcu_read_lock();
		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
		if (!min_ref)
		min_ref = ref;
		else if (atomic_read(&ref->count) <
		atomic_read(&min_ref->count))
		min_ref = ref;

		if (index-- == 0) {
		ret_ref = ref;
		break;
		}
		}
		rcu_read_unlock();

		if (!ret_ref)
		ret_ref = min_ref;

		if (ret_ref)
		atomic_inc(&ret_ref->count);

		return ret_ref;
		}

		/**
		* async_tx_rebalance - redistribute the available channels, optimize
		* for cpu isolation in the SMP case, and opertaion isolation in the
		* uniprocessor case
		*/
		static void async_tx_rebalance(void)
		{
		int cpu, cap, cpu_idx = 0;
		unsigned long flags;

		if (!channel_table_initialized)
		return;

		spin_lock_irqsave(&async_tx_lock, flags);

		/* undo the last distribution */
		for_each_dma_cap_mask(cap, dma_cap_mask_all)
		for_each_possible_cpu(cpu) {
		struct dma_chan_ref *ref =
		per_cpu_ptr(channel_table[cap], cpu)->ref;
		if (ref) {
		atomic_set(&ref->count, 0);
		per_cpu_ptr(channel_table[cap], cpu)->ref =
		NULL;
		}
		}

		for_each_dma_cap_mask(cap, dma_cap_mask_all)
		for_each_online_cpu(cpu) {
		struct dma_chan_ref *new;
		if (NR_CPUS > 1)
		new = get_chan_ref_by_cap(cap, cpu_idx++);
		else
		new = get_chan_ref_by_cap(cap, -1);

		per_cpu_ptr(channel_table[cap], cpu)->ref = new;
		}

		spin_unlock_irqrestore(&async_tx_lock, flags);
		}

		static enum dma_state_client
		dma_channel_add_remove(struct dma_client *client,
		struct dma_chan *chan, enum dma_state state)
		{
		unsigned long found, flags;
		struct dma_chan_ref master_ref, ref;
		enum dma_state_client ack = DMA_DUP; /* default: take no action */

		switch (state) {
		case DMA_RESOURCE_AVAILABLE:
		found = 0;
		rcu_read_lock();
		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
		if (ref->chan == chan) {
		found = 1;
		break;
		}
		rcu_read_unlock();

		pr_debug("async_tx: dma resource available [%s]\n",
		found ? "old" : "new");

		if (!found)
		ack = DMA_ACK;
		else
		break;

		/* add the channel to the generic management list */
		master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
		if (master_ref) {
		/* keep a reference until async_tx is unloaded */
		dma_chan_get(chan);
		init_dma_chan_ref(master_ref, chan);
		spin_lock_irqsave(&async_tx_lock, flags);
		list_add_tail_rcu(&master_ref->node,
		&async_tx_master_list);
		spin_unlock_irqrestore(&async_tx_lock,
		flags);
		} else {
		printk(KERN_WARNING "async_tx: unable to create"
		" new master entry in response to"
		" a DMA_RESOURCE_ADDED event"
		" (-ENOMEM)\n");
		return 0;
		}

		async_tx_rebalance();
		break;
		case DMA_RESOURCE_REMOVED:
		found = 0;
		spin_lock_irqsave(&async_tx_lock, flags);
		list_for_each_entry(ref, &async_tx_master_list, node)
		if (ref->chan == chan) {
		/* permit backing devices to go away */
		dma_chan_put(ref->chan);
		list_del_rcu(&ref->node);
		call_rcu(&ref->rcu, free_dma_chan_ref);
		found = 1;
		break;
		}
		spin_unlock_irqrestore(&async_tx_lock, flags);

		pr_debug("async_tx: dma resource removed [%s]\n",
		found ? "ours" : "not ours");

		if (found)
		ack = DMA_ACK;
		else
		break;

		async_tx_rebalance();
		break;
		case DMA_RESOURCE_SUSPEND:
		case DMA_RESOURCE_RESUME:
		printk(KERN_WARNING "async_tx: does not support dma channel"
		" suspend/resume\n");
		break;
		default:
		BUG();
		}

		return ack;
		}

		static int __init
		async_tx_init(void)
		static int __init async_tx_init(void)
		{
		enum dma_transaction_type cap;

		spin_lock_init(&async_tx_lock);
		bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);

		/* an interrupt will never be an explicit operation type.
		* clearing this bit prevents allocation to a slot in 'channel_table'
		*/
		clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);

		for_each_dma_cap_mask(cap, dma_cap_mask_all) {
		channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
		if (!channel_table[cap])
		goto err;
		}

		channel_table_initialized = 1;
		dma_async_client_register(&async_tx_dma);
		dma_async_client_chan_request(&async_tx_dma);
		dmaengine_get();

		printk(KERN_INFO "async_tx: api initialized (async)\n");

		return 0;
		err:
		printk(KERN_ERR "async_tx: initialization failure\n");

		while (--cap >= 0)
		free_percpu(channel_table[cap]);

		return 1;
		}

		static void __exit async_tx_exit(void)
		{
		enum dma_transaction_type cap;

		channel_table_initialized = 0;

		for_each_dma_cap_mask(cap, dma_cap_mask_all)
		if (channel_table[cap])
		free_percpu(channel_table[cap]);

		dma_async_client_unregister(&async_tx_dma);
		dmaengine_put();
		}

		/**
		@@ -389,14 +56,7 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
		if (depend_tx &&
		dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
		return depend_tx->chan;
		else if (likely(channel_table_initialized)) {
		struct dma_chan_ref *ref;
		int cpu = get_cpu();
		ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
		put_cpu();
		return ref ? ref->chan : NULL;
		} else
		return NULL;
		return dma_find_channel(tx_type);
		}
		EXPORT_SYMBOL_GPL(__async_tx_find_channel);
		#else

drivers/dca/dca-core.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -270,6 +270,6 @@ static void __exit dca_exit(void)
		dca_sysfs_exit();
		}

		subsys_initcall(dca_init);
		arch_initcall(dca_init);
		module_exit(dca_exit);