--- /dev/null
+From a8135d0d79e9d0ad3a4ff494fceeaae838becf38 Mon Sep 17 00:00:00 2001
+From: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Date: Mon, 14 Dec 2015 22:47:40 +0200
+Subject: [PATCH 2/3] dmaengine: core: Introduce new, universal API to request
+ a channel
+
+The two API function can cover most, if not all current APIs used to
+request a channel. With minimal effort dmaengine drivers, platforms and
+dmaengine user drivers can be converted to use the two function.
+
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask);
+
+To request any channel matching with the requested capabilities, can be
+used to request channel for memcpy, memset, xor, etc where no hardware
+synchronization is needed.
+
+struct dma_chan *dma_request_chan(struct device *dev, const char *name);
+To request a slave channel. The dma_request_chan() will try to find the
+channel via DT, ACPI or in case if the kernel booted in non DT/ACPI mode
+it will use a filter lookup table and retrieves the needed information from
+the dma_slave_map provided by the DMA drivers.
+This legacy mode needs changes in platform code, in dmaengine drivers and
+finally the dmaengine user drivers can be converted:
+
+For each dmaengine driver an array of DMA device, slave and the parameter
+for the filter function needs to be added:
+
+static const struct dma_slave_map da830_edma_map[] = {
+ { "davinci-mcasp.0", "rx", EDMA_FILTER_PARAM(0, 0) },
+ { "davinci-mcasp.0", "tx", EDMA_FILTER_PARAM(0, 1) },
+ { "davinci-mcasp.1", "rx", EDMA_FILTER_PARAM(0, 2) },
+ { "davinci-mcasp.1", "tx", EDMA_FILTER_PARAM(0, 3) },
+ { "davinci-mcasp.2", "rx", EDMA_FILTER_PARAM(0, 4) },
+ { "davinci-mcasp.2", "tx", EDMA_FILTER_PARAM(0, 5) },
+ { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 14) },
+ { "spi_davinci.0", "tx", EDMA_FILTER_PARAM(0, 15) },
+ { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 16) },
+ { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 17) },
+ { "spi_davinci.1", "rx", EDMA_FILTER_PARAM(0, 18) },
+ { "spi_davinci.1", "tx", EDMA_FILTER_PARAM(0, 19) },
+};
+
+This information is going to be needed by the dmaengine driver, so
+modification to the platform_data is needed, and the driver map should be
+added to the pdata of the DMA driver:
+
+da8xx_edma0_pdata.slave_map = da830_edma_map;
+da8xx_edma0_pdata.slavecnt = ARRAY_SIZE(da830_edma_map);
+
+The DMA driver then needs to configure the needed device -> filter_fn
+mapping before it registers with dma_async_device_register() :
+
+ecc->dma_slave.filter_map.map = info->slave_map;
+ecc->dma_slave.filter_map.mapcnt = info->slavecnt;
+ecc->dma_slave.filter_map.fn = edma_filter_fn;
+
+When neither DT or ACPI lookup is available the dma_request_chan() will
+try to match the requester's device name with the filter_map's list of
+device names, when a match found it will use the information from the
+dma_slave_map to get the channel with the dma_get_channel() internal
+function.
+
+Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+---
+ Documentation/dmaengine/client.txt | 23 +++-------
+ drivers/dma/dmaengine.c | 89 +++++++++++++++++++++++++++++++++-----
+ include/linux/dmaengine.h | 51 +++++++++++++++++++---
+ 3 files changed, 127 insertions(+), 36 deletions(-)
+
+diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt
+index 11fb87f..4b04d89 100644
+--- a/Documentation/dmaengine/client.txt
++++ b/Documentation/dmaengine/client.txt
+@@ -22,25 +22,14 @@ The slave DMA usage consists of following steps:
+ Channel allocation is slightly different in the slave DMA context,
+ client drivers typically need a channel from a particular DMA
+ controller only and even in some cases a specific channel is desired.
+- To request a channel dma_request_channel() API is used.
++ To request a channel dma_request_chan() API is used.
+
+ Interface:
+- struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+- dma_filter_fn filter_fn,
+- void *filter_param);
+- where dma_filter_fn is defined as:
+- typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+-
+- The 'filter_fn' parameter is optional, but highly recommended for
+- slave and cyclic channels as they typically need to obtain a specific
+- DMA channel.
+-
+- When the optional 'filter_fn' parameter is NULL, dma_request_channel()
+- simply returns the first channel that satisfies the capability mask.
+-
+- Otherwise, the 'filter_fn' routine will be called once for each free
+- channel which has a capability in 'mask'. 'filter_fn' is expected to
+- return 'true' when the desired DMA channel is found.
++ struct dma_chan *dma_request_chan(struct device *dev, const char *name);
++
++ Which will find and return the 'name' DMA channel associated with the 'dev'
++ device. The association is done via DT, ACPI or board file based
++ dma_slave_map matching table.
+
+ A channel allocated via this interface is exclusive to the caller,
+ until dma_release_channel() is called.
+diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
+index 81a36fc..a094dbb 100644
+--- a/drivers/dma/dmaengine.c
++++ b/drivers/dma/dmaengine.c
+@@ -43,6 +43,7 @@
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
++#include <linux/platform_device.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -665,27 +666,73 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+ }
+ EXPORT_SYMBOL_GPL(__dma_request_channel);
+
++static const struct dma_slave_map *dma_filter_match(struct dma_device *device,
++ const char *name,
++ struct device *dev)
++{
++ int i;
++
++ if (!device->filter.mapcnt)
++ return NULL;
++
++ for (i = 0; i < device->filter.mapcnt; i++) {
++ const struct dma_slave_map *map = &device->filter.map[i];
++
++ if (!strcmp(map->devname, dev_name(dev)) &&
++ !strcmp(map->slave, name))
++ return map;
++ }
++
++ return NULL;
++}
++
+ /**
+- * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
++ * dma_request_chan - try to allocate an exclusive slave channel
+ * @dev: pointer to client device structure
+ * @name: slave channel name
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
+- const char *name)
++struct dma_chan *dma_request_chan(struct device *dev, const char *name)
+ {
++ struct dma_device *d, *_d;
++ struct dma_chan *chan = NULL;
++
+ /* If device-tree is present get slave info from here */
+ if (dev->of_node)
+- return of_dma_request_slave_channel(dev->of_node, name);
++ chan = of_dma_request_slave_channel(dev->of_node, name);
+
+ /* If device was enumerated by ACPI get slave info from here */
+- if (ACPI_HANDLE(dev))
+- return acpi_dma_request_slave_chan_by_name(dev, name);
++ if (has_acpi_companion(dev) && !chan)
++ chan = acpi_dma_request_slave_chan_by_name(dev, name);
++
++ if (chan) {
++ /* Valid channel found or requester need to be deferred */
++ if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
++ return chan;
++ }
++
++ /* Try to find the channel via the DMA filter map(s) */
++ mutex_lock(&dma_list_mutex);
++ list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
++ dma_cap_mask_t mask;
++ const struct dma_slave_map *map = dma_filter_match(d, name, dev);
++
++ if (!map)
++ continue;
++
++ dma_cap_zero(mask);
++ dma_cap_set(DMA_SLAVE, mask);
+
+- return ERR_PTR(-ENODEV);
++ chan = find_candidate(d, &mask, d->filter.fn, map->param);
++ if (!IS_ERR(chan))
++ break;
++ }
++ mutex_unlock(&dma_list_mutex);
++
++ return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+ }
+-EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason);
++EXPORT_SYMBOL_GPL(dma_request_chan);
+
+ /**
+ * dma_request_slave_channel - try to allocate an exclusive slave channel
+@@ -697,17 +744,35 @@ EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason);
+ struct dma_chan *dma_request_slave_channel(struct device *dev,
+ const char *name)
+ {
+- struct dma_chan *ch = dma_request_slave_channel_reason(dev, name);
++ struct dma_chan *ch = dma_request_chan(dev, name);
+ if (IS_ERR(ch))
+ return NULL;
+
+- dma_cap_set(DMA_PRIVATE, ch->device->cap_mask);
+- ch->device->privatecnt++;
+-
+ return ch;
+ }
+ EXPORT_SYMBOL_GPL(dma_request_slave_channel);
+
++/**
++ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities
++ * @mask: capabilities that the channel must satisfy
++ *
++ * Returns pointer to appropriate DMA channel on success or an error pointer.
++ */
++struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
++{
++ struct dma_chan *chan;
++
++ if (!mask)
++ return ERR_PTR(-ENODEV);
++
++ chan = __dma_request_channel(mask, NULL, NULL);
++ if (!chan)
++ chan = ERR_PTR(-ENODEV);
++
++ return chan;
++}
++EXPORT_SYMBOL_GPL(dma_request_chan_by_mask);
++
+ void dma_release_channel(struct dma_chan *chan)
+ {
+ mutex_lock(&dma_list_mutex);
+diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
+index c47c68e..d50a6b51 100644
+--- a/include/linux/dmaengine.h
++++ b/include/linux/dmaengine.h
+@@ -607,11 +607,38 @@ enum dmaengine_alignment {
+ };
+
+ /**
++ * struct dma_slave_map - associates slave device and it's slave channel with
++ * parameter to be used by a filter function
++ * @devname: name of the device
++ * @slave: slave channel name
++ * @param: opaque parameter to pass to struct dma_filter.fn
++ */
++struct dma_slave_map {
++ const char *devname;
++ const char *slave;
++ void *param;
++};
++
++/**
++ * struct dma_filter - information for slave device/channel to filter_fn/param
++ * mapping
++ * @fn: filter function callback
++ * @mapcnt: number of slave device/channel in the map
++ * @map: array of channel to filter mapping data
++ */
++struct dma_filter {
++ dma_filter_fn fn;
++ int mapcnt;
++ const struct dma_slave_map *map;
++};
++
++/**
+ * struct dma_device - info on the entity supplying DMA services
+ * @chancnt: how many DMA channels are supported
+ * @privatecnt: how many DMA channels are requested by dma_request_channel
+ * @channels: the list of struct dma_chan
+ * @global_node: list_head for global dma_device_list
++ * @filter: information for device/slave to filter function/param mapping
+ * @cap_mask: one or more dma_capability flags
+ * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
+@@ -666,6 +693,7 @@ struct dma_device {
+ unsigned int privatecnt;
+ struct list_head channels;
+ struct list_head global_node;
++ struct dma_filter filter;
+ dma_cap_mask_t cap_mask;
+ unsigned short max_xor;
+ unsigned short max_pq;
+@@ -1140,9 +1168,11 @@ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
+ void dma_issue_pending_all(void);
+ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+ dma_filter_fn fn, void *fn_param);
+-struct dma_chan *dma_request_slave_channel_reason(struct device *dev,
+- const char *name);
+ struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
++
++struct dma_chan *dma_request_chan(struct device *dev, const char *name);
++struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask);
++
+ void dma_release_channel(struct dma_chan *chan);
+ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps);
+ #else
+@@ -1166,16 +1196,21 @@ static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+ {
+ return NULL;
+ }
+-static inline struct dma_chan *dma_request_slave_channel_reason(
+- struct device *dev, const char *name)
+-{
+- return ERR_PTR(-ENODEV);
+-}
+ static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
+ const char *name)
+ {
+ return NULL;
+ }
++static inline struct dma_chan *dma_request_chan(struct device *dev,
++ const char *name)
++{
++ return ERR_PTR(-ENODEV);
++}
++static inline struct dma_chan *dma_request_chan_by_mask(
++ const dma_cap_mask_t *mask)
++{
++ return ERR_PTR(-ENODEV);
++}
+ static inline void dma_release_channel(struct dma_chan *chan)
+ {
+ }
+@@ -1186,6 +1221,8 @@ static inline int dma_get_slave_caps(struct dma_chan *chan,
+ }
+ #endif
+
++#define dma_request_slave_channel_reason(dev, name) dma_request_chan(dev, name)
++
+ static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
+ {
+ struct dma_slave_caps caps;
+--
+2.8.1
+
--- /dev/null
+From b36f09c3c441a6e59eab9315032e7d546571de3f Mon Sep 17 00:00:00 2001
+From: Lars-Peter Clausen <lars@metafoo.de>
+Date: Tue, 20 Oct 2015 11:46:28 +0200
+Subject: [PATCH] dmaengine: Add transfer termination synchronization support
+
+The DMAengine API has a long standing race condition that is inherent to
+the API itself. Calling dmaengine_terminate_all() is supposed to stop and
+abort any pending or active transfers that have previously been submitted.
+Unfortunately it is possible that this operation races against a currently
+running (or with some drivers also scheduled) completion callback.
+
+Since the API allows dmaengine_terminate_all() to be called from atomic
+context as well as from within a completion callback it is not possible to
+synchronize to the execution of the completion callback from within
+dmaengine_terminate_all() itself.
+
+This means that a user of the DMAengine API does not know when it is safe
+to free resources used in the completion callback, which can result in a
+use-after-free race condition.
+
+This patch addresses the issue by introducing an explicit synchronization
+primitive to the DMAengine API called dmaengine_synchronize().
+
+The existing dmaengine_terminate_all() is deprecated in favor of
+dmaengine_terminate_sync() and dmaengine_terminate_async(). The former
+aborts all pending and active transfers and synchronizes to the current
+context, meaning it will wait until all running completion callbacks have
+finished. This means it is only possible to call this function from
+non-atomic context. The later function does not synchronize, but can still
+be used in atomic context or from within a complete callback. It has to be
+followed up by dmaengine_synchronize() before a client can free the
+resources used in a completion callback.
+
+In addition to this the semantics of the device_terminate_all() callback
+are slightly relaxed by this patch. It is now OK for a driver to only
+schedule the termination of the active transfer, but does not necessarily
+have to wait until the DMA controller has completely stopped. The driver
+must ensure though that the controller has stopped and no longer accesses
+any memory when the device_synchronize() callback returns.
+
+This was in part done since most drivers do not pay attention to this
+anyway at the moment and to emphasize that this needs to be done when the
+device_synchronize() callback is implemented. But it also helps with
+implementing support for devices where stopping the controller can require
+operations that may sleep.
+
+Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+---
+ Documentation/dmaengine/client.txt | 38 ++++++++++++++-
+ Documentation/dmaengine/provider.txt | 20 +++++++-
+ drivers/dma/dmaengine.c | 5 +-
+ include/linux/dmaengine.h | 90 ++++++++++++++++++++++++++++++++++++
+ 4 files changed, 148 insertions(+), 5 deletions(-)
+
+diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt
+index 11fb87f..d9f9f46 100644
+--- a/Documentation/dmaengine/client.txt
++++ b/Documentation/dmaengine/client.txt
+@@ -128,7 +128,7 @@ The slave DMA usage consists of following steps:
+ transaction.
+
+ For cyclic DMA, a callback function may wish to terminate the
+- DMA via dmaengine_terminate_all().
++ DMA via dmaengine_terminate_async().
+
+ Therefore, it is important that DMA engine drivers drop any
+ locks before calling the callback function which may cause a
+@@ -166,12 +166,29 @@ The slave DMA usage consists of following steps:
+
+ Further APIs:
+
+-1. int dmaengine_terminate_all(struct dma_chan *chan)
++1. int dmaengine_terminate_sync(struct dma_chan *chan)
++ int dmaengine_terminate_async(struct dma_chan *chan)
++ int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */
+
+ This causes all activity for the DMA channel to be stopped, and may
+ discard data in the DMA FIFO which hasn't been fully transferred.
+ No callback functions will be called for any incomplete transfers.
+
++ Two variants of this function are available.
++
++ dmaengine_terminate_async() might not wait until the DMA has been fully
++ stopped or until any running complete callbacks have finished. But it is
++ possible to call dmaengine_terminate_async() from atomic context or from
++ within a complete callback. dmaengine_synchronize() must be called before it
++ is safe to free the memory accessed by the DMA transfer or free resources
++ accessed from within the complete callback.
++
++ dmaengine_terminate_sync() will wait for the transfer and any running
++ complete callbacks to finish before it returns. But the function must not be
++ called from atomic context or from within a complete callback.
++
++ dmaengine_terminate_all() is deprecated and should not be used in new code.
++
+ 2. int dmaengine_pause(struct dma_chan *chan)
+
+ This pauses activity on the DMA channel without data loss.
+@@ -197,3 +214,20 @@ Further APIs:
+ a running DMA channel. It is recommended that DMA engine users
+ pause or stop (via dmaengine_terminate_all()) the channel before
+ using this API.
++
++5. void dmaengine_synchronize(struct dma_chan *chan)
++
++ Synchronize the termination of the DMA channel to the current context.
++
++ This function should be used after dmaengine_terminate_async() to synchronize
++ the termination of the DMA channel to the current context. The function will
++ wait for the transfer and any running complete callbacks to finish before it
++ returns.
++
++ If dmaengine_terminate_async() is used to stop the DMA channel this function
++ must be called before it is safe to free memory accessed by previously
++ submitted descriptors or to free any resources accessed within the complete
++ callback of previously submitted descriptors.
++
++ The behavior of this function is undefined if dma_async_issue_pending() has
++ been called between dmaengine_terminate_async() and this function.
+diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
+index 67d4ce4..122b7f4 100644
+--- a/Documentation/dmaengine/provider.txt
++++ b/Documentation/dmaengine/provider.txt
+@@ -327,8 +327,24 @@ supported.
+
+ * device_terminate_all
+ - Aborts all the pending and ongoing transfers on the channel
+- - This command should operate synchronously on the channel,
+- terminating right away all the channels
++ - For aborted transfers the complete callback should not be called
++ - Can be called from atomic context or from within a complete
++ callback of a descriptor. Must not sleep. Drivers must be able
++ to handle this correctly.
++ - Termination may be asynchronous. The driver does not have to
++ wait until the currently active transfer has completely stopped.
++ See device_synchronize.
++
++ * device_synchronize
++ - Must synchronize the termination of a channel to the current
++ context.
++ - Must make sure that memory for previously submitted
++ descriptors is no longer accessed by the DMA controller.
++ - Must make sure that all complete callbacks for previously
++ submitted descriptors have finished running and none are
++ scheduled to run.
++ - May sleep.
++
+
+ Misc notes (stuff that should be documented, but don't really know
+ where to put them)
+diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
+index 3ecec14..d6fc82e 100644
+--- a/drivers/dma/dmaengine.c
++++ b/drivers/dma/dmaengine.c
+@@ -265,8 +265,11 @@ static void dma_chan_put(struct dma_chan *chan)
+ module_put(dma_chan_to_owner(chan));
+
+ /* This channel is not in use anymore, free it */
+- if (!chan->client_count && chan->device->device_free_chan_resources)
++ if (!chan->client_count && chan->device->device_free_chan_resources) {
++ /* Make sure all operations have completed */
++ dmaengine_synchronize(chan);
+ chan->device->device_free_chan_resources(chan);
++ }
+
+ /* If the channel is used via a DMA request router, free the mapping */
+ if (chan->router && chan->router->route_free) {
+diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
+index c47c68e..4662d9a 100644
+--- a/include/linux/dmaengine.h
++++ b/include/linux/dmaengine.h
+@@ -654,6 +654,8 @@ enum dmaengine_alignment {
+ * paused. Returns 0 or an error code
+ * @device_terminate_all: Aborts all transfers on a channel. Returns 0
+ * or an error code
++ * @device_synchronize: Synchronizes the termination of a transfers to the
++ * current context.
+ * @device_tx_status: poll for transaction completion, the optional
+ * txstate parameter can be supplied with a pointer to get a
+ * struct with auxiliary transfer status information, otherwise the call
+@@ -737,6 +739,7 @@ struct dma_device {
+ int (*device_pause)(struct dma_chan *chan);
+ int (*device_resume)(struct dma_chan *chan);
+ int (*device_terminate_all)(struct dma_chan *chan);
++ void (*device_synchronize)(struct dma_chan *chan);
+
+ enum dma_status (*device_tx_status)(struct dma_chan *chan,
+ dma_cookie_t cookie,
+@@ -828,6 +831,13 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
+ src_sg, src_nents, flags);
+ }
+
++/**
++ * dmaengine_terminate_all() - Terminate all active DMA transfers
++ * @chan: The channel for which to terminate the transfers
++ *
++ * This function is DEPRECATED use either dmaengine_terminate_sync() or
++ * dmaengine_terminate_async() instead.
++ */
+ static inline int dmaengine_terminate_all(struct dma_chan *chan)
+ {
+ if (chan->device->device_terminate_all)
+@@ -836,6 +846,86 @@ static inline int dmaengine_terminate_all(struct dma_chan *chan)
+ return -ENOSYS;
+ }
+
++/**
++ * dmaengine_terminate_async() - Terminate all active DMA transfers
++ * @chan: The channel for which to terminate the transfers
++ *
++ * Calling this function will terminate all active and pending descriptors
++ * that have previously been submitted to the channel. It is not guaranteed
++ * though that the transfer for the active descriptor has stopped when the
++ * function returns. Furthermore it is possible the complete callback of a
++ * submitted transfer is still running when this function returns.
++ *
++ * dmaengine_synchronize() needs to be called before it is safe to free
++ * any memory that is accessed by previously submitted descriptors or before
++ * freeing any resources accessed from within the completion callback of any
++ * perviously submitted descriptors.
++ *
++ * This function can be called from atomic context as well as from within a
++ * complete callback of a descriptor submitted on the same channel.
++ *
++ * If none of the two conditions above apply consider using
++ * dmaengine_terminate_sync() instead.
++ */
++static inline int dmaengine_terminate_async(struct dma_chan *chan)
++{
++ if (chan->device->device_terminate_all)
++ return chan->device->device_terminate_all(chan);
++
++ return -EINVAL;
++}
++
++/**
++ * dmaengine_synchronize() - Synchronize DMA channel termination
++ * @chan: The channel to synchronize
++ *
++ * Synchronizes to the DMA channel termination to the current context. When this
++ * function returns it is guaranteed that all transfers for previously issued
++ * descriptors have stopped and and it is safe to free the memory assoicated
++ * with them. Furthermore it is guaranteed that all complete callback functions
++ * for a previously submitted descriptor have finished running and it is safe to
++ * free resources accessed from within the complete callbacks.
++ *
++ * The behavior of this function is undefined if dma_async_issue_pending() has
++ * been called between dmaengine_terminate_async() and this function.
++ *
++ * This function must only be called from non-atomic context and must not be
++ * called from within a complete callback of a descriptor submitted on the same
++ * channel.
++ */
++static inline void dmaengine_synchronize(struct dma_chan *chan)
++{
++ if (chan->device->device_synchronize)
++ chan->device->device_synchronize(chan);
++}
++
++/**
++ * dmaengine_terminate_sync() - Terminate all active DMA transfers
++ * @chan: The channel for which to terminate the transfers
++ *
++ * Calling this function will terminate all active and pending transfers
++ * that have previously been submitted to the channel. It is similar to
++ * dmaengine_terminate_async() but guarantees that the DMA transfer has actually
++ * stopped and that all complete callbacks have finished running when the
++ * function returns.
++ *
++ * This function must only be called from non-atomic context and must not be
++ * called from within a complete callback of a descriptor submitted on the same
++ * channel.
++ */
++static inline int dmaengine_terminate_sync(struct dma_chan *chan)
++{
++ int ret;
++
++ ret = dmaengine_terminate_async(chan);
++ if (ret)
++ return ret;
++
++ dmaengine_synchronize(chan);
++
++ return 0;
++}
++
+ static inline int dmaengine_pause(struct dma_chan *chan)
+ {
+ if (chan->device->device_pause)
+--
+2.8.1
+
--- /dev/null
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems)
+Date: Mon, 25 Apr 2016 15:35:05 +0300
+
+This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html)
+contains a number of mostly minor fixes and cleanups for the DW DMA driver. A
+couple of them affect the DT binding so these may need to be updated to
+maintain compatibility (old format is still supported though). The rest should
+be relatively straight-forward.
+
+This version has been tested on the following bare metal platforms:
+- ATNGW100 (avr32 based platform) with dmatest
+- Sam460ex (powerpc 44x based platform) with SATA
+- Intel Braswell with UART
+- Intel Galileo (Intel Quark based platform) with UART
+
+(SATA driver and Intel Galileo UART support are based on this series and just
+ published recently for a review)
+
+Vinod, there are few patch sets developed on top of this one, so, the idea is
+to keep this in an immuutable branch / tag.
+
+Changes since v5:
+- fixed an issue found by kbuildbot
+
+Changes since v4:
+- send proper set of patches
+- add changelog
+
+Changes since v3:
+- add patch 1 to check value of dma-masters property
+- drop the upstreamed patches
+- update patch 2 to keep an array for data-width property as well
+
+Changes since v2:
+- add patch 1 to fix master selection which was broken for long time
+- remove "use field-by-field initialization" patch since like Mans metioned in
+ has mostly no value and even might increase error prone
+- rebase on top of recent linux-next
+- wide testing on several platforms
+
+Changes since v1:
+- zeroing struct dw_dma_slave before use
+- fall back to old data_width property if data-width is not found
+- append tags for few patches
+- correct title of cover letter
+- rebase on top of recent linux-next
+
+Andy Shevchenko (4):
+ dmaengine: dw: platform: check nr_masters to be non-zero
+ dmaengine: dw: revisit data_width property
+ dmaengine: dw: keep entire platform data in struct dw_dma
+ dmaengine: dw: pass platform data via struct dw_dma_chip
+
+ Documentation/devicetree/bindings/dma/snps-dma.txt | 6 +-
+ arch/arc/boot/dts/abilis_tb10x.dtsi | 2 +-
+ arch/arm/boot/dts/spear13xx.dtsi | 4 +-
+ drivers/ata/sata_dwc_460ex.c | 2 +-
+ drivers/dma/dw/core.c | 75 ++++++++--------------
+ drivers/dma/dw/pci.c | 5 +-
+ drivers/dma/dw/platform.c | 32 +++++----
+ drivers/dma/dw/regs.h | 5 +-
+ include/linux/dma/dw.h | 5 +-
+ include/linux/platform_data/dma-dw.h | 4 +-
+ sound/soc/intel/common/sst-firmware.c | 2 +-
+ 11 files changed, 64 insertions(+), 78 deletions(-)
+
+--- a/drivers/dma/dw/core.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/core.c 2016-05-21 22:47:08.665465180 +0200
+@@ -45,22 +45,19 @@
+ DW_DMA_MSIZE_16; \
+ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \
+ DW_DMA_MSIZE_16; \
++ u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \
++ _dwc->p_master : _dwc->m_master; \
++ u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \
++ _dwc->p_master : _dwc->m_master; \
+ \
+ (DWC_CTLL_DST_MSIZE(_dmsize) \
+ | DWC_CTLL_SRC_MSIZE(_smsize) \
+ | DWC_CTLL_LLP_D_EN \
+ | DWC_CTLL_LLP_S_EN \
+- | DWC_CTLL_DMS(_dwc->dst_master) \
+- | DWC_CTLL_SMS(_dwc->src_master)); \
++ | DWC_CTLL_DMS(_dms) \
++ | DWC_CTLL_SMS(_sms)); \
+ })
+
+-/*
+- * Number of descriptors to allocate for each channel. This should be
+- * made configurable somehow; preferably, the clients (at least the
+- * ones using slave transfers) should be able to give us a hint.
+- */
+-#define NR_DESCS_PER_CHANNEL 64
+-
+ /* The set of bus widths supported by the DMA controller */
+ #define DW_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+@@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(
+ return to_dw_desc(dwc->active_list.next);
+ }
+
+-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+ {
+- struct dw_desc *desc, *_desc;
+- struct dw_desc *ret = NULL;
+- unsigned int i = 0;
+- unsigned long flags;
++ struct dw_desc *desc = txd_to_dw_desc(tx);
++ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
++ dma_cookie_t cookie;
++ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+- i++;
+- if (async_tx_test_ack(&desc->txd)) {
+- list_del(&desc->desc_node);
+- ret = desc;
+- break;
+- }
+- dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
+- }
++ cookie = dma_cookie_assign(tx);
++
++ /*
++ * REVISIT: We should attempt to chain as many descriptors as
++ * possible, perhaps even appending to those already submitted
++ * for DMA. But this is hard to do in a race-free manner.
++ */
++
++ list_add_tail(&desc->desc_node, &dwc->queue);
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
++ __func__, desc->txd.cookie);
+
+- dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
++ return cookie;
++}
+
+- return ret;
++static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++{
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *desc;
++ dma_addr_t phys;
++
++ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
++ if (!desc)
++ return NULL;
++
++ dwc->descs_allocated++;
++ INIT_LIST_HEAD(&desc->tx_list);
++ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
++ desc->txd.tx_submit = dwc_tx_submit;
++ desc->txd.flags = DMA_CTRL_ACK;
++ desc->txd.phys = phys;
++ return desc;
+ }
+
+-/*
+- * Move a descriptor, including any children, to the free list.
+- * `desc' must not be on any lists.
+- */
+ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+- unsigned long flags;
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *child, *_next;
+
+- if (desc) {
+- struct dw_desc *child;
++ if (unlikely(!desc))
++ return;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry(child, &desc->tx_list, desc_node)
+- dev_vdbg(chan2dev(&dwc->chan),
+- "moving child desc %p to freelist\n",
+- child);
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
+- list_add(&desc->desc_node, &dwc->free_list);
+- spin_unlock_irqrestore(&dwc->lock, flags);
++ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
++ list_del(&child->desc_node);
++ dma_pool_free(dw->desc_pool, child, child->txd.phys);
++ dwc->descs_allocated--;
+ }
++
++ dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
++ dwc->descs_allocated--;
+ }
+
+ static void dwc_initialize(struct dw_dma_chan *dwc)
+@@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma
+ u32 cfghi = DWC_CFGH_FIFO_MODE;
+ u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+
+- if (dwc->initialized == true)
++ if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
+ return;
+
+ cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
+@@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+- dwc->initialized = true;
++ set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+ }
+
+ /*----------------------------------------------------------------------*/
+
+-static inline unsigned int dwc_fast_ffs(unsigned long long v)
+-{
+- /*
+- * We can be a lot more clever here, but this should take care
+- * of the most common optimization.
+- */
+- if (!(v & 7))
+- return 3;
+- else if (!(v & 3))
+- return 2;
+- else if (!(v & 1))
+- return 1;
+- return 0;
+-}
+-
+ static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+ {
+ dev_err(chan2dev(&dwc->chan),
+@@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s
+ * Software emulation of LLP mode relies on interrupts to continue
+ * multi block transfer.
+ */
+- ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
++ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
+
+- channel_writel(dwc, SAR, desc->lli.sar);
+- channel_writel(dwc, DAR, desc->lli.dar);
++ channel_writel(dwc, SAR, lli_read(desc, sar));
++ channel_writel(dwc, DAR, lli_read(desc, dar));
+ channel_writel(dwc, CTL_LO, ctllo);
+- channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
++ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ /* Move pointer to next descriptor */
+@@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s
+ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+ {
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_soft_llp;
+
+ /* ASSERT: channel is idle */
+@@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- dwc->residue = first->total_len;
++ first->residue = first->total_len;
+ dwc->tx_node_active = &first->tx_list;
+
+ /* Submit first block */
+@@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- channel_writel(dwc, LLP, first->txd.phys);
+- channel_writel(dwc, CTL_LO,
+- DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
++ channel_writel(dwc, LLP, first->txd.phys | lms);
++ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+ }
+@@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ async_tx_ack(&child->txd);
+ async_tx_ack(&desc->txd);
+-
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- list_move(&desc->desc_node, &dwc->free_list);
+-
+- dma_descriptor_unmap(txd);
++ dwc_desc_put(dwc, desc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ if (callback)
+@@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct
+
+ head = &desc->tx_list;
+ if (active != head) {
+- /* Update desc to reflect last sent one */
+- if (active != head->next)
+- desc = to_dw_desc(active->prev);
+-
+- dwc->residue -= desc->len;
++ /* Update residue to reflect last sent descriptor */
++ if (active == head->next)
++ desc->residue -= desc->len;
++ else
++ desc->residue -= to_dw_desc(active->prev)->len;
+
+ child = to_dw_desc(active);
+
+@@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct
+ clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+ }
+
+- dwc->residue = 0;
+-
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ dwc_complete_all(dw, dwc);
+@@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct
+ }
+
+ if (list_empty(&dwc->active_list)) {
+- dwc->residue = 0;
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+@@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ /* Initial residue value */
+- dwc->residue = desc->total_len;
++ desc->residue = desc->total_len;
+
+ /* Check first descriptors addr */
+- if (desc->txd.phys == llp) {
++ if (desc->txd.phys == DWC_LLP_LOC(llp)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ /* Check first descriptors llp */
+- if (desc->lli.llp == llp) {
++ if (lli_read(desc, llp) == llp) {
+ /* This one is currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+- dwc->residue -= desc->len;
++ desc->residue -= desc->len;
+ list_for_each_entry(child, &desc->tx_list, desc_node) {
+- if (child->lli.llp == llp) {
++ if (lli_read(child, llp) == llp) {
+ /* Currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+- dwc->residue -= child->len;
++ desc->residue -= child->len;
+ }
+
+ /*
+@@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+
+-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
++static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+ dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+- lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
++ lli_read(desc, sar),
++ lli_read(desc, dar),
++ lli_read(desc, llp),
++ lli_read(desc, ctlhi),
++ lli_read(desc, ctllo));
+ }
+
+ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+@@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d
+ */
+ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
+ " cookie: %d\n", bad_desc->txd.cookie);
+- dwc_dump_lli(dwc, &bad_desc->lli);
++ dwc_dump_lli(dwc, bad_desc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+- dwc_dump_lli(dwc, &child->lli);
++ dwc_dump_lli(dwc, child);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_
+ */
+ if (unlikely(status_err & dwc->mask) ||
+ unlikely(status_xfer & dwc->mask)) {
+- int i;
++ unsigned int i;
+
+ dev_err(chan2dev(&dwc->chan),
+ "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
+@@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ for (i = 0; i < dwc->cdesc->periods; i++)
+- dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
++ dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+@@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long
+ u32 status_block;
+ u32 status_xfer;
+ u32 status_err;
+- int i;
++ unsigned int i;
+
+ status_block = dma_readl(dw, RAW.BLOCK);
+ status_xfer = dma_readl(dw, RAW.XFER);
+@@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long
+ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+ {
+ struct dw_dma *dw = dev_id;
+- u32 status = dma_readl(dw, STATUS_INT);
++ u32 status;
++
++ /* Check if we have any interrupt from the DMAC which is not in use */
++ if (!dw->in_use)
++ return IRQ_NONE;
+
++ status = dma_readl(dw, STATUS_INT);
+ dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+ /* Check if we have any interrupt from the DMAC */
+- if (!status || !dw->in_use)
++ if (!status)
+ return IRQ_NONE;
+
+ /*
+@@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int
+
+ /*----------------------------------------------------------------------*/
+
+-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+-{
+- struct dw_desc *desc = txd_to_dw_desc(tx);
+- struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+- dma_cookie_t cookie;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- cookie = dma_cookie_assign(tx);
+-
+- /*
+- * REVISIT: We should attempt to chain as many descriptors as
+- * possible, perhaps even appending to those already submitted
+- * for DMA. But this is hard to do in a race-free manner.
+- */
+-
+- dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
+- list_add_tail(&desc->desc_node, &dwc->queue);
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- return cookie;
+-}
+-
+ static struct dma_async_tx_descriptor *
+ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+@@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
++ u8 m_master = dwc->m_master;
+ unsigned int src_width;
+ unsigned int dst_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ u32 ctllo;
++ u8 lms = DWC_LLP_LMS(m_master);
+
+ dev_vdbg(chan2dev(chan),
+ "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
+@@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+
+ dwc->direction = DMA_MEM_TO_MEM;
+
+- data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
+- dw->data_width[dwc->dst_master]);
+-
+- src_width = dst_width = min_t(unsigned int, data_width,
+- dwc_fast_ffs(src | dest | len));
++ src_width = dst_width = __ffs(data_width | src | dest | len);
+
+ ctllo = DWC_DEFAULT_CTLLO(chan)
+ | DWC_CTLL_DST_WIDTH(dst_width)
+@@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = src + offset;
+- desc->lli.dar = dest + offset;
+- desc->lli.ctllo = ctllo;
+- desc->lli.ctlhi = xfer_count;
++ lli_write(desc, sar, src + offset);
++ lli_write(desc, dar, dest + offset);
++ lli_write(desc, ctllo, ctllo);
++ lli_write(desc, ctlhi, xfer_count);
+ desc->len = xfer_count << src_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->txd.flags = flags;
+ first->total_len = len;
+
+@@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo;
++ u8 m_master = dwc->m_master;
++ u8 lms = DWC_LLP_LMS(m_master);
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+@@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+ DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+- data_width = dw->data_width[dwc->src_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_todev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = mem;
+- desc->lli.dar = reg;
+- desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
++ lli_write(desc, sar, mem);
++ lli_write(desc, dar, reg);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
+ if ((len >> mem_width) > dwc->block_size) {
+ dlen = dwc->block_size << mem_width;
+ mem += dlen;
+@@ -826,15 +797,14 @@ slave_sg_todev_fill_desc:
+ len = 0;
+ }
+
+- desc->lli.ctlhi = dlen >> mem_width;
++ lli_write(desc, ctlhi, dlen >> mem_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -854,8 +824,6 @@ slave_sg_todev_fill_desc:
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+ DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+- data_width = dw->data_width[dwc->dst_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -863,17 +831,16 @@ slave_sg_todev_fill_desc:
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_fromdev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = reg;
+- desc->lli.dar = mem;
+- desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
++ lli_write(desc, sar, reg);
++ lli_write(desc, dar, mem);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
+ if ((len >> reg_width) > dwc->block_size) {
+ dlen = dwc->block_size << reg_width;
+ mem += dlen;
+@@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc:
+ dlen = len;
+ len = 0;
+ }
+- desc->lli.ctlhi = dlen >> reg_width;
++ lli_write(desc, ctlhi, dlen >> reg_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc:
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->total_len = total_len;
+
+ return &first->txd;
+@@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan
+ dwc->src_id = dws->src_id;
+ dwc->dst_id = dws->dst_id;
+
+- dwc->src_master = dws->src_master;
+- dwc->dst_master = dws->dst_master;
++ dwc->m_master = dws->m_master;
++ dwc->p_master = dws->p_master;
+
+ return true;
+ }
+@@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch
+ while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+ udelay(2);
+
+- dwc->paused = true;
++ set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc
+
+ channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+
+- dwc->paused = false;
++ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+ }
+
+ static int dwc_resume(struct dma_chan *chan)
+@@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+- if (!dwc->paused)
+- return 0;
+-
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- dwc_chan_resume(dwc);
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
++ dwc_chan_resume(dwc);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_
+ return 0;
+ }
+
+-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc)
++static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
++{
++ struct dw_desc *desc;
++
++ list_for_each_entry(desc, &dwc->active_list, desc_node)
++ if (desc->txd.cookie == c)
++ return desc;
++
++ return NULL;
++}
++
++static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+ {
++ struct dw_desc *desc;
+ unsigned long flags;
+ u32 residue;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- residue = dwc->residue;
+- if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+- residue -= dwc_get_sent(dwc);
++ desc = dwc_find_desc(dwc, cookie);
++ if (desc) {
++ if (desc == dwc_first_active(dwc)) {
++ residue = desc->residue;
++ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
++ residue -= dwc_get_sent(dwc);
++ } else {
++ residue = desc->total_len;
++ }
++ } else {
++ residue = 0;
++ }
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return residue;
+@@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan,
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+- if (ret != DMA_COMPLETE)
+- dma_set_residue(txstate, dwc_get_residue(dwc));
++ if (ret == DMA_COMPLETE)
++ return ret;
++
++ dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+- if (dwc->paused && ret == DMA_IN_PROGRESS)
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
+ return DMA_PAUSED;
+
+ return ret;
+@@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma
+
+ static void dw_dma_off(struct dw_dma *dw)
+ {
+- int i;
++ unsigned int i;
+
+ dma_writel(dw, CFG, 0);
+
+@@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw
+ cpu_relax();
+
+ for (i = 0; i < dw->dma.chancnt; i++)
+- dw->chan[i].initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
+ }
+
+ static void dw_dma_on(struct dw_dma *dw)
+@@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc;
+- int i;
+- unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+@@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru
+ dw_dma_on(dw);
+ dw->in_use |= dwc->mask;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = dwc->descs_allocated;
+- while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+- dma_addr_t phys;
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys);
+- if (!desc)
+- goto err_desc_alloc;
+-
+- memset(desc, 0, sizeof(struct dw_desc));
+-
+- INIT_LIST_HEAD(&desc->tx_list);
+- dma_async_tx_descriptor_init(&desc->txd, chan);
+- desc->txd.tx_submit = dwc_tx_submit;
+- desc->txd.flags = DMA_CTRL_ACK;
+- desc->txd.phys = phys;
+-
+- dwc_desc_put(dwc, desc);
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = ++dwc->descs_allocated;
+- }
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+-
+- return i;
+-
+-err_desc_alloc:
+- dev_info(chan2dev(chan), "only allocated %d descriptors\n", i);
+-
+- return i;
++ return 0;
+ }
+
+ static void dwc_free_chan_resources(struct dma_chan *chan)
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+@@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_splice_init(&dwc->free_list, &list);
+- dwc->descs_allocated = 0;
+
+ /* Clear custom channel configuration */
+ dwc->src_id = 0;
+ dwc->dst_id = 0;
+
+- dwc->src_master = 0;
+- dwc->dst_master = 0;
++ dwc->m_master = 0;
++ dwc->p_master = 0;
+
+- dwc->initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+@@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru
+ if (!dw->in_use)
+ dw_dma_off(dw);
+
+- list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+- dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+- dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+- }
+-
+ dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
+ }
+
+@@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+ struct dw_cyclic_desc *retval = NULL;
+ struct dw_desc *desc;
+ struct dw_desc *last = NULL;
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods;
+@@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ retval = ERR_PTR(-ENOMEM);
+
+- if (periods > NR_DESCS_PER_CHANNEL)
+- goto out_err;
+-
+ cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+ if (!cdesc)
+ goto out_err;
+@@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+- desc->lli.dar = sconfig->dst_addr;
+- desc->lli.sar = buf_addr + (period_len * i);
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_FIX
+- | DWC_CTLL_SRC_INC
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+- DWC_CTLL_FC(DW_DMA_FC_D_M2P);
++ lli_write(desc, dar, sconfig->dst_addr);
++ lli_write(desc, sar, buf_addr + period_len * i);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_FIX
++ | DWC_CTLL_SRC_INC
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
++ DWC_CTLL_FC(DW_DMA_FC_D_M2P));
+
+ break;
+ case DMA_DEV_TO_MEM:
+- desc->lli.dar = buf_addr + (period_len * i);
+- desc->lli.sar = sconfig->src_addr;
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_DST_INC
+- | DWC_CTLL_SRC_FIX
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+- DWC_CTLL_FC(DW_DMA_FC_D_P2M);
++ lli_write(desc, dar, buf_addr + period_len * i);
++ lli_write(desc, sar, sconfig->src_addr);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_DST_INC
++ | DWC_CTLL_SRC_FIX
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
++ DWC_CTLL_FC(DW_DMA_FC_D_P2M));
+
+ break;
+ default:
+ break;
+ }
+
+- desc->lli.ctlhi = (period_len >> reg_width);
++ lli_write(desc, ctlhi, period_len >> reg_width);
+ cdesc->desc[i] = desc;
+
+ if (last)
+- last->lli.llp = desc->txd.phys;
++ lli_write(last, llp, desc->txd.phys | lms);
+
+ last = desc;
+ }
+
+ /* Let's make a cyclic list */
+- last->lli.llp = cdesc->desc[0]->txd.phys;
++ lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
+
+ dev_dbg(chan2dev(&dwc->chan),
+ "cyclic prepared buf %pad len %zu period %zu periods %d\n",
+@@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_cyclic_desc *cdesc = dwc->cdesc;
+- int i;
++ unsigned int i;
+ unsigned long flags;
+
+ dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
+@@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan
+ kfree(cdesc->desc);
+ kfree(cdesc);
+
++ dwc->cdesc = NULL;
++
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ }
+ EXPORT_SYMBOL(dw_dma_cyclic_free);
+
+ /*----------------------------------------------------------------------*/
+
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
++int dw_dma_probe(struct dw_dma_chip *chip)
+ {
++ struct dw_dma_platform_data *pdata;
+ struct dw_dma *dw;
+ bool autocfg = false;
+ unsigned int dw_params;
+- unsigned int max_blk_size = 0;
++ unsigned int i;
+ int err;
+- int i;
+
+ dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
++ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
++ if (!dw->pdata)
++ return -ENOMEM;
++
+ dw->regs = chip->regs;
+ chip->dw = dw;
+
+ pm_runtime_get_sync(chip->dev);
+
+- if (!pdata) {
+- dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
++ if (!chip->pdata) {
++ dw_params = dma_readl(dw, DW_PARAMS);
+ dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+
+ autocfg = dw_params >> DW_PARAMS_EN & 1;
+@@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
+- if (!pdata) {
+- err = -ENOMEM;
+- goto err_pdata;
+- }
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+
+ /* Get hardware configuration parameters */
+ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+ pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+ for (i = 0; i < pdata->nr_masters; i++) {
+ pdata->data_width[i] =
+- (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
++ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
+ }
+- max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
++ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
+
+ /* Fill platform data with the default values */
+ pdata->is_private = true;
+ pdata->is_memcpy = true;
+ pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
+ pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
+- } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
++ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+ err = -EINVAL;
+ goto err_pdata;
++ } else {
++ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
++
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+ }
+
+ dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
+@@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- /* Get hardware configuration parameters */
+- dw->nr_masters = pdata->nr_masters;
+- for (i = 0; i < dw->nr_masters; i++)
+- dw->data_width[i] = pdata->data_width[i];
+-
+ /* Calculate all channel mask before DMA setup */
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+@@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+- INIT_LIST_HEAD(&dwc->free_list);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+@@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ /* Hardware configuration */
+ if (autocfg) {
+- unsigned int dwc_params;
+ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+- void __iomem *addr = chip->regs + r * sizeof(u32);
+-
+- dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
++ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
++ unsigned int dwc_params = dma_readl_native(addr);
+
+ dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
+ dwc_params);
+@@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ * up to 0x0a for 4095.
+ */
+ dwc->block_size =
+- (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
++ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+ dwc->nollp =
+ (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+ } else {
+ dwc->block_size = pdata->block_size;
+
+ /* Check if channel supports multi block transfer */
+- channel_writel(dwc, LLP, 0xfffffffc);
+- dwc->nollp =
+- (channel_readl(dwc, LLP) & 0xfffffffc) == 0;
++ channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
++ dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
+ channel_writel(dwc, LLP, 0);
+ }
+ }
+--- a/drivers/dma/dw/pci.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/pci.c 2016-05-21 22:47:08.665465180 +0200
+@@ -17,8 +17,8 @@
+
+ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
+ {
++ const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ struct dw_dma_chip *chip;
+- struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+@@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *
+ chip->dev = &pdev->dev;
+ chip->regs = pcim_iomap_table(pdev)[0];
+ chip->irq = pdev->irq;
++ chip->pdata = pdata;
+
+- ret = dw_dma_probe(chip, pdata);
++ ret = dw_dma_probe(chip);
+ if (ret)
+ return ret;
+
+@@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci
+
+ /* Haswell */
+ { PCI_VDEVICE(INTEL, 0x9c60) },
++
++ /* Broadwell */
++ { PCI_VDEVICE(INTEL, 0x9ce0) },
++
+ { }
+ };
+ MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
+--- a/drivers/dma/dw/platform.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/platform.c 2016-05-21 22:47:08.665465180 +0200
+@@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(
+
+ slave.src_id = dma_spec->args[0];
+ slave.dst_id = dma_spec->args[0];
+- slave.src_master = dma_spec->args[1];
+- slave.dst_master = dma_spec->args[2];
++ slave.m_master = dma_spec->args[1];
++ slave.p_master = dma_spec->args[2];
+
+ if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
+ slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
+- slave.src_master >= dw->nr_masters ||
+- slave.dst_master >= dw->nr_masters))
++ slave.m_master >= dw->pdata->nr_masters ||
++ slave.p_master >= dw->pdata->nr_masters))
+ return NULL;
+
+ dma_cap_zero(cap);
+@@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm
+ .dma_dev = dma_spec->dev,
+ .src_id = dma_spec->slave_id,
+ .dst_id = dma_spec->slave_id,
+- .src_master = 1,
+- .dst_master = 0,
++ .m_master = 0,
++ .p_master = 1,
+ };
+
+ return dw_dma_filter(chan, &slave);
+@@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device *
+ struct device_node *np = pdev->dev.of_node;
+ struct dw_dma_platform_data *pdata;
+ u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
++ u32 nr_masters;
++ u32 nr_channels;
+
+ if (!np) {
+ dev_err(&pdev->dev, "Missing DT data\n");
+ return NULL;
+ }
+
++ if (of_property_read_u32(np, "dma-masters", &nr_masters))
++ return NULL;
++ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
++ return NULL;
++
++ if (of_property_read_u32(np, "dma-channels", &nr_channels))
++ return NULL;
++
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return NULL;
+
+- if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
+- return NULL;
++ pdata->nr_masters = nr_masters;
++ pdata->nr_channels = nr_channels;
+
+ if (of_property_read_bool(np, "is_private"))
+ pdata->is_private = true;
+@@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *
+ if (!of_property_read_u32(np, "block_size", &tmp))
+ pdata->block_size = tmp;
+
+- if (!of_property_read_u32(np, "dma-masters", &tmp)) {
+- if (tmp > DW_DMA_MAX_NR_MASTERS)
+- return NULL;
+-
+- pdata->nr_masters = tmp;
+- }
+-
+- if (!of_property_read_u32_array(np, "data_width", arr,
+- pdata->nr_masters))
+- for (tmp = 0; tmp < pdata->nr_masters; tmp++)
++ if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
+ pdata->data_width[tmp] = arr[tmp];
++ } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
++ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
++ }
+
+ return pdata;
+ }
+@@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi
+ struct dw_dma_chip *chip;
+ struct device *dev = &pdev->dev;
+ struct resource *mem;
+- const struct acpi_device_id *id;
+- struct dw_dma_platform_data *pdata;
++ const struct dw_dma_platform_data *pdata;
+ int err;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+@@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi
+ pdata = dev_get_platdata(dev);
+ if (!pdata)
+ pdata = dw_dma_parse_dt(pdev);
+- if (!pdata && has_acpi_companion(dev)) {
+- id = acpi_match_device(dev->driver->acpi_match_table, dev);
+- if (id)
+- pdata = (struct dw_dma_platform_data *)id->driver_data;
+- }
+
+ chip->dev = dev;
++ chip->pdata = pdata;
+
+ chip->clk = devm_clk_get(chip->dev, "hclk");
+ if (IS_ERR(chip->clk))
+@@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi
+
+ pm_runtime_enable(&pdev->dev);
+
+- err = dw_dma_probe(chip, pdata);
++ err = dw_dma_probe(chip);
+ if (err)
+ goto err_dw_dma_probe;
+
+@@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_
+ {
+ struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+
++ /*
++ * We have to call dw_dma_disable() to stop any ongoing transfer. On
++ * some platforms we can't do that since DMA device is powered off.
++ * Moreover we have no possibility to check if the platform is affected
++ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
++ * unconditionally. On the other hand we can't use
++ * pm_runtime_suspended() because runtime PM framework is not fully
++ * used by the driver.
++ */
++ pm_runtime_get_sync(chip->dev);
+ dw_dma_disable(chip);
++ pm_runtime_put_sync_suspend(chip->dev);
++
+ clk_disable_unprepare(chip->clk);
+ }
+
+@@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab
+ #endif
+
+ #ifdef CONFIG_ACPI
+-static struct dw_dma_platform_data dw_dma_acpi_pdata = {
+- .nr_channels = 8,
+- .is_private = true,
+- .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+- .chan_priority = CHAN_PRIORITY_ASCENDING,
+- .block_size = 4095,
+- .nr_masters = 2,
+-};
+-
+ static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+- { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata },
++ { "INTL9C60", 0 },
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
+--- a/drivers/dma/dw/regs.h 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/regs.h 2016-05-21 22:47:08.665465180 +0200
+@@ -114,10 +114,6 @@ struct dw_dma_regs {
+ #define dma_writel_native writel
+ #endif
+
+-/* To access the registers in early stage of probe */
+-#define dma_read_byaddr(addr, name) \
+- dma_readl_native((addr) + offsetof(struct dw_dma_regs, name))
+-
+ /* Bitfields in DW_PARAMS */
+ #define DW_PARAMS_NR_CHAN 8 /* number of channels */
+ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */
+@@ -143,6 +139,10 @@ enum dw_dma_msize {
+ DW_DMA_MSIZE_256,
+ };
+
++/* Bitfields in LLP */
++#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */
++#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */
++
+ /* Bitfields in CTL_LO */
+ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+@@ -150,7 +150,7 @@ enum dw_dma_msize {
+ #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+ #define DWC_CTLL_DST_DEC (1<<7)
+ #define DWC_CTLL_DST_FIX (2<<7)
+-#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
++#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */
+ #define DWC_CTLL_SRC_DEC (1<<9)
+ #define DWC_CTLL_SRC_FIX (2<<9)
+ #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+@@ -216,6 +216,8 @@ enum dw_dma_msize {
+ enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+ DW_DMA_IS_SOFT_LLP = 1,
++ DW_DMA_IS_PAUSED = 2,
++ DW_DMA_IS_INITIALIZED = 3,
+ };
+
+ struct dw_dma_chan {
+@@ -224,8 +226,6 @@ struct dw_dma_chan {
+ u8 mask;
+ u8 priority;
+ enum dma_transfer_direction direction;
+- bool paused;
+- bool initialized;
+
+ /* software emulation of the LLP transfers */
+ struct list_head *tx_node_active;
+@@ -236,8 +236,6 @@ struct dw_dma_chan {
+ unsigned long flags;
+ struct list_head active_list;
+ struct list_head queue;
+- struct list_head free_list;
+- u32 residue;
+ struct dw_cyclic_desc *cdesc;
+
+ unsigned int descs_allocated;
+@@ -249,8 +247,8 @@ struct dw_dma_chan {
+ /* custom slave configuration */
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+
+ /* configuration passed via .device_config */
+ struct dma_slave_config dma_sconfig;
+@@ -283,9 +281,8 @@ struct dw_dma {
+ u8 all_chan_mask;
+ u8 in_use;
+
+- /* hardware configuration */
+- unsigned char nr_masters;
+- unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
++ /* platform data */
++ struct dw_dma_platform_data *pdata;
+ };
+
+ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+@@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s
+ return container_of(ddev, struct dw_dma, dma);
+ }
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++typedef __be32 __dw32;
++#else
++typedef __le32 __dw32;
++#endif
++
+ /* LLI == Linked List Item; a.k.a. DMA block descriptor */
+ struct dw_lli {
+ /* values that are not changed by hardware */
+- u32 sar;
+- u32 dar;
+- u32 llp; /* chain to next lli */
+- u32 ctllo;
++ __dw32 sar;
++ __dw32 dar;
++ __dw32 llp; /* chain to next lli */
++ __dw32 ctllo;
+ /* values that may get written back: */
+- u32 ctlhi;
++ __dw32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+- u32 sstat;
+- u32 dstat;
++ __dw32 sstat;
++ __dw32 dstat;
+ };
+
+ struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v))
++#define lli_read(d, reg) be32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v))
++#else
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v))
++#define lli_read(d, reg) le32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v))
++#endif
++
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+ size_t total_len;
++ u32 residue;
+ };
+
+ #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node)
+--- a/include/linux/dma/dw.h
++++ b/include/linux/dma/dw.h
+@@ -27,6 +27,7 @@ struct dw_dma;
+ * @regs: memory mapped I/O space
+ * @clk: hclk clock
+ * @dw: struct dw_dma that is filed by dw_dma_probe()
++ * @pdata: pointer to platform data
+ */
+ struct dw_dma_chip {
+ struct device *dev;
+@@ -34,10 +35,12 @@ struct dw_dma_chip {
+ void __iomem *regs;
+ struct clk *clk;
+ struct dw_dma *dw;
++
++ const struct dw_dma_platform_data *pdata;
+ };
+
+ /* Export to the platform drivers */
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
++int dw_dma_probe(struct dw_dma_chip *chip);
+ int dw_dma_remove(struct dw_dma_chip *chip);
+
+ /* DMA API extensions */
+diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
+index 03b6095..d15d8ba 100644
+--- a/include/linux/platform_data/dma-dw.h
++++ b/include/linux/platform_data/dma-dw.h
+@@ -21,15 +21,15 @@
+ * @dma_dev: required DMA master device
+ * @src_id: src request line
+ * @dst_id: dst request line
+- * @src_master: src master for transfers on allocated channel.
+- * @dst_master: dest master for transfers on allocated channel.
++ * @m_master: memory master for transfers on allocated channel
++ * @p_master: peripheral master for transfers on allocated channel
+ */
+ struct dw_dma_slave {
+ struct device *dma_dev;
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+ };
+
+ /**
+@@ -43,7 +43,7 @@ struct dw_dma_slave {
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+- * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
++ * (in bytes, power of 2)
+ */
+ struct dw_dma_platform_data {
+ unsigned int nr_channels;
+@@ -55,7 +55,7 @@ struct dw_dma_platform_data {
+ #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */
+ #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */
+ unsigned char chan_priority;
+- unsigned short block_size;
++ unsigned int block_size;
+ unsigned char nr_masters;
+ unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
+ };
+--
+2.8.1
+