diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-19 11:47:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-19 11:47:18 -0700 |
commit | a0d3c7c5c07cfbe00ab89438ddf82482f5a99422 (patch) | |
tree | 560def78af776bef5d0d0202580c4be0fc6219c6 | |
parent | ec67b14c1be4ebe4cf08f06746a8d0313ab85432 (diff) | |
parent | f9114a54c1d828abbe87ac446a2da49d9720203f (diff) | |
download | op-kernel-dev-a0d3c7c5c07cfbe00ab89438ddf82482f5a99422.zip op-kernel-dev-a0d3c7c5c07cfbe00ab89438ddf82482f5a99422.tar.gz |
Merge tag 'dmaengine-4.7-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul:
"This time round the update brings in following changes:
- new tegra driver for ADMA device
- support for Xilinx AXI Direct Memory Access Engine and Xilinx AXI
Central Direct Memory Access Engine and few updates to this driver
- new cyclic capability to sun6i and few updates
- slave-sg support in bcm2835
- updates to many drivers like designware, hsu, mv_xor, pxa, edma,
qcom_hidma & bam"
* tag 'dmaengine-4.7-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (84 commits)
dmaengine: ioatdma: disable relaxed ordering for ioatdma
dmaengine: of_dma: approximate an average distribution
dmaengine: core: Use IS_ENABLED() instead of checking for built-in or module
dmaengine: edma: Re-evaluate errors when ccerr is triggered w/o error event
dmaengine: qcom_hidma: add support for object hierarchy
dmaengine: qcom_hidma: add debugfs hooks
dmaengine: qcom_hidma: implement lower level hardware interface
dmaengine: vdma: Add clock support
Documentation: DT: vdma: Add clock support for dmas
dmaengine: vdma: Add config structure to differentiate dmas
MAINTAINERS: Update Tegra DMA maintainers
dmaengine: tegra-adma: Add support for Tegra210 ADMA
Documentation: DT: Add binding documentation for NVIDIA ADMA
dmaengine: vdma: Add Support for Xilinx AXI Central Direct Memory Access Engine
Documentation: DT: vdma: update binding doc for AXI CDMA
dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine
Documentation: DT: vdma: update binding doc for AXI DMA
dmaengine: vdma: Rename xilinx_vdma_ prefix to xilinx_dma
dmaengine: slave means at least one of DMA_SLAVE, DMA_CYCLIC
dmaengine: mv_xor: Allow selecting mv_xor for mvebu only compatible SoC
...
53 files changed, 4959 insertions, 1114 deletions
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma new file mode 100644 index 0000000..d364415 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-platform-hidma @@ -0,0 +1,9 @@ +What: /sys/devices/platform/hidma-*/chid + /sys/devices/platform/QCOM8061:*/chid +Date: Dec 2015 +KernelVersion: 4.4 +Contact: "Sinan Kaya <okaya@cudeaurora.org>" +Description: + Contains the ID of the channel within the HIDMA instance. + It is used to associate a given HIDMA channel with the + priority and weight calls in the management interface. diff --git a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt index 1396078..baf9b34 100644 --- a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt +++ b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt @@ -12,6 +12,10 @@ Required properties: - reg: Should contain DMA registers location and length. - interrupts: Should contain the DMA interrupts associated to the DMA channels in ascending order. +- interrupt-names: Should contain the names of the interrupt + in the form "dmaXX". + Use "dma-shared-all" for the common interrupt line + that is shared by all dma channels. - #dma-cells: Must be <1>, the cell in the dmas property of the client device represents the DREQ number. - brcm,dma-channel-mask: Bit mask representing the channels @@ -34,13 +38,35 @@ dma: dma@7e007000 { <1 24>, <1 25>, <1 26>, + /* dma channel 11-14 share one irq */ <1 27>, + <1 27>, + <1 27>, + <1 27>, + /* unused shared irq for all channels */ <1 28>; + interrupt-names = "dma0", + "dma1", + "dma2", + "dma3", + "dma4", + "dma5", + "dma6", + "dma7", + "dma8", + "dma9", + "dma10", + "dma11", + "dma12", + "dma13", + "dma14", + "dma-shared-all"; #dma-cells = <1>; brcm,dma-channel-mask = <0x7f35>; }; + DMA clients connected to the BCM2835 DMA controller must use the format described in the dma.txt file, using a two-cell specifier for each channel. diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt index 276ef81..c075f59 100644 --- a/Documentation/devicetree/bindings/dma/mv-xor.txt +++ b/Documentation/devicetree/bindings/dma/mv-xor.txt @@ -1,7 +1,10 @@ * Marvell XOR engines Required properties: -- compatible: Should be "marvell,orion-xor" or "marvell,armada-380-xor" +- compatible: Should be one of the following: + - "marvell,orion-xor" + - "marvell,armada-380-xor" + - "marvell,armada-3700-xor". - reg: Should contain registers location and length (two sets) the first set is the low registers, the second set the high registers for the XOR engine. diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt new file mode 100644 index 0000000..1e1dc8f9 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt @@ -0,0 +1,55 @@ +* NVIDIA Tegra Audio DMA (ADMA) controller + +The Tegra Audio DMA controller that is used for transferring data +between system memory and the Audio Processing Engine (APE). + +Required properties: +- compatible: Must be "nvidia,tegra210-adma". +- reg: Should contain DMA registers location and length. This should be + a single entry that includes all of the per-channel registers in one + contiguous bank. +- interrupt-parent: Phandle to the interrupt parent controller. +- interrupts: Should contain all of the per-channel DMA interrupts in + ascending order with respect to the DMA channel index. +- clocks: Must contain one entry for the ADMA module clock + (TEGRA210_CLK_D_AUDIO). +- clock-names: Must contain the name "d_audio" for the corresponding + 'clocks' entry. +- #dma-cells : Must be 1. The first cell denotes the receive/transmit + request number and should be between 1 and the maximum number of + requests supported. This value corresponds to the RX/TX_REQUEST_SELECT + fields in the ADMA_CHn_CTRL register. + + +Example: + +adma: dma@702e2000 { + compatible = "nvidia,tegra210-adma"; + reg = <0x0 0x702e2000 0x0 0x2000>; + interrupt-parent = <&tegra_agic>; + interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA210_CLK_D_AUDIO>; + clock-names = "d_audio"; + #dma-cells = <1>; +}; diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt index 1c9d48e..9cbf5d9 100644 --- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt +++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt @@ -13,6 +13,8 @@ Required properties: - clock-names: must contain "bam_clk" entry - qcom,ee : indicates the active Execution Environment identifier (0-7) used in the secure world. +- qcom,controlled-remotely : optional, indicates that the bam is controlled by + remote proccessor i.e. execution environment. Example: diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index c261598..0f55832 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -13,6 +13,11 @@ Required properties: - chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1: increase from chan n->0 - block_size: Maximum block size supported by the controller +- data-width: Maximum data width supported by hardware per AHB master + (in bytes, power of 2) + + +Deprecated properties: - data_width: Maximum data width supported by hardware per AHB master (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) @@ -38,7 +43,7 @@ Example: chan_allocation_order = <1>; chan_priority = <1>; block_size = <0xfff>; - data_width = <3 3>; + data-width = <8 8>; }; DMA clients connected to the Designware DMA controller must use the format @@ -47,8 +52,8 @@ The four cells in order are: 1. A phandle pointing to the DMA controller 2. The DMA request line number -3. Source master for transfers on allocated channel -4. Destination master for transfers on allocated channel +3. Memory master for transfers on allocated channel +4. Peripheral master for transfers on allocated channel Example: diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt index e4c4d47..a1f2683 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt @@ -3,18 +3,44 @@ It can be configured to have one channel or two channels. If configured as two channels, one is to transmit to the video device and another is to receive from the video device. +Xilinx AXI DMA engine, it does transfers between memory and AXI4 stream +target devices. It can be configured to have one channel or two channels. +If configured as two channels, one is to transmit to the device and another +is to receive from the device. + +Xilinx AXI CDMA engine, it does transfers between memory-mapped source +address and a memory-mapped destination address. + Required properties: -- compatible: Should be "xlnx,axi-vdma-1.00.a" +- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or + "xlnx,axi-cdma-1.00.a"" - #dma-cells: Should be <1>, see "dmas" property below - reg: Should contain VDMA registers location and length. -- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w. +- xlnx,addrwidth: Should be the vdma addressing size in bits(ex: 32 bits). +- dma-ranges: Should be as the following <dma_addr cpu_addr max_len>. - dma-channel child node: Should have at least one channel and can have up to two channels per device. This node specifies the properties of each DMA channel (see child node properties below). +- clocks: Input clock specifier. Refer to common clock bindings. +- clock-names: List of input clocks + For VDMA: + Required elements: "s_axi_lite_aclk" + Optional elements: "m_axi_mm2s_aclk" "m_axi_s2mm_aclk", + "m_axis_mm2s_aclk", "s_axis_s2mm_aclk" + For CDMA: + Required elements: "s_axi_lite_aclk", "m_axi_aclk" + FOR AXIDMA: + Required elements: "s_axi_lite_aclk" + Optional elements: "m_axi_mm2s_aclk", "m_axi_s2mm_aclk", + "m_axi_sg_aclk" + +Required properties for VDMA: +- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w. Optional properties: - xlnx,include-sg: Tells configured for Scatter-mode in the hardware. +Optional properties for VDMA: - xlnx,flush-fsync: Tells which channel to Flush on Frame sync. It takes following values: {1}, flush both channels @@ -31,6 +57,7 @@ Required child node properties: Optional child node properties: - xlnx,include-dre: Tells hardware is configured for Data Realignment Engine. +Optional child node properties for VDMA: - xlnx,genlock-mode: Tells Genlock synchronization is enabled/disabled in hardware. @@ -41,8 +68,13 @@ axi_vdma_0: axivdma@40030000 { compatible = "xlnx,axi-vdma-1.00.a"; #dma_cells = <1>; reg = < 0x40030000 0x10000 >; + dma-ranges = <0x00000000 0x00000000 0x40000000>; xlnx,num-fstores = <0x8>; xlnx,flush-fsync = <0x1>; + xlnx,addrwidth = <0x20>; + clocks = <&clk 0>, <&clk 1>, <&clk 2>, <&clk 3>, <&clk 4>; + clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk", + "m_axis_mm2s_aclk", "s_axis_s2mm_aclk"; dma-channel@40030000 { compatible = "xlnx,axi-vdma-mm2s-channel"; interrupts = < 0 54 4 >; diff --git a/MAINTAINERS b/MAINTAINERS index ef8a56e..f825014 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11017,10 +11017,11 @@ M: Prashant Gaikwad <pgaikwad@nvidia.com> S: Supported F: drivers/clk/tegra/ -TEGRA DMA DRIVER +TEGRA DMA DRIVERS M: Laxman Dewangan <ldewangan@nvidia.com> +M: Jon Hunter <jonathanh@nvidia.com> S: Supported -F: drivers/dma/tegra20-apb-dma.c +F: drivers/dma/tegra* TEGRA I2C DRIVER M: Laxman Dewangan <ldewangan@nvidia.com> diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi index 663671f2..de53f5c 100644 --- a/arch/arc/boot/dts/abilis_tb10x.dtsi +++ b/arch/arc/boot/dts/abilis_tb10x.dtsi @@ -126,7 +126,7 @@ chan_allocation_order = <0>; chan_priority = <1>; block_size = <0x7ff>; - data_width = <2>; + data-width = <4>; clocks = <&ahb_clk>; clock-names = "hclk"; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 31cc2f2..10b27b9 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -48,9 +48,29 @@ <1 24>, <1 25>, <1 26>, + /* dma channel 11-14 share one irq */ <1 27>, + <1 27>, + <1 27>, + <1 27>, + /* unused shared irq for all channels */ <1 28>; - + interrupt-names = "dma0", + "dma1", + "dma2", + "dma3", + "dma4", + "dma5", + "dma6", + "dma7", + "dma8", + "dma9", + "dma10", + "dma11", + "dma12", + "dma13", + "dma14", + "dma-shared-all"; #dma-cells = <1>; brcm,dma-channel-mask = <0x7f35>; }; diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 14594ce..449acf0 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -117,7 +117,7 @@ chan_priority = <1>; block_size = <0xfff>; dma-masters = <2>; - data_width = <3 3>; + data-width = <8 8>; }; dma@eb000000 { @@ -133,7 +133,7 @@ chan_allocation_order = <1>; chan_priority = <1>; block_size = <0xfff>; - data_width = <3 3>; + data-width = <8 8>; }; fsmc: flash@b0000000 { diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index bf445aa..00d6dcc 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1365,8 +1365,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) slave->dma_dev = &dw_dmac0_device.dev; slave->src_id = 0; slave->dst_id = 1; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 1; + slave->p_master = 0; data->dma_slave = slave; data->dma_filter = at32_mci_dma_filter; @@ -2061,16 +2061,16 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, if (flags & AC97C_CAPTURE) { rx_dws->dma_dev = &dw_dmac0_device.dev; rx_dws->src_id = 3; - rx_dws->src_master = 0; - rx_dws->dst_master = 1; + rx_dws->m_master = 0; + rx_dws->p_master = 1; } /* Check if DMA slave interface for playback should be configured. */ if (flags & AC97C_PLAYBACK) { tx_dws->dma_dev = &dw_dmac0_device.dev; tx_dws->dst_id = 4; - tx_dws->src_master = 0; - tx_dws->dst_master = 1; + tx_dws->m_master = 0; + tx_dws->p_master = 1; } if (platform_device_add_data(pdev, data, @@ -2141,8 +2141,8 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data) dws->dma_dev = &dw_dmac0_device.dev; dws->dst_id = 2; - dws->src_master = 0; - dws->dst_master = 1; + dws->m_master = 0; + dws->p_master = 1; if (platform_device_add_data(pdev, data, sizeof(struct atmel_abdac_pdata))) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 9020349..2cb6f7e 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -201,8 +201,8 @@ static struct sata_dwc_host_priv host_pvt; static struct dw_dma_slave sata_dwc_dma_dws = { .src_id = 0, .dst_id = 0, - .src_master = 0, - .dst_master = 1, + .m_master = 1, + .p_master = 0, }; /* @@ -1248,7 +1248,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) hsdev->dma->dev = &ofdev->dev; /* Initialize AHB DMAC */ - err = dw_dma_probe(hsdev->dma, NULL); + err = dw_dma_probe(hsdev->dma); if (err) goto error_dma_iomap; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index d96d87c..8c98779 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -332,7 +332,7 @@ config MPC512X_DMA config MV_XOR bool "Marvell XOR engine support" - depends on PLAT_ORION + depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST select DMA_ENGINE select DMA_ENGINE_RAID select ASYNC_TX_ENABLE_CHANNEL_SWITCH @@ -467,6 +467,20 @@ config TEGRA20_APB_DMA This DMA controller transfers data from memory to peripheral fifo or vice versa. It does not support memory to memory data transfer. +config TEGRA210_ADMA + bool "NVIDIA Tegra210 ADMA support" + depends on ARCH_TEGRA_210_SOC + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select PM_CLK + help + Support for the NVIDIA Tegra210 ADMA controller driver. The + DMA controller has multiple DMA channels and is used to service + various audio clients in the Tegra210 audio processing engine + (APE). This DMA controller transfers data from memory to + peripheral and vice versa. It does not support memory to + memory data transfer. + config TIMB_DMA tristate "Timberdale FPGA DMA support" depends on MFD_TIMBERDALE @@ -507,7 +521,7 @@ config XGENE_DMA config XILINX_VDMA tristate "Xilinx AXI VDMA Engine" - depends on (ARCH_ZYNQ || MICROBLAZE) + depends on (ARCH_ZYNQ || MICROBLAZE || ARM64) select DMA_ENGINE help Enable support for Xilinx AXI VDMA Soft IP. diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 6084127..614f28b 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o +obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_TI_CPPI41) += cppi41.o obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9b42c05..81db1c4 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -107,16 +107,20 @@ struct pl08x_driver_data; /** * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant + * @signals: the number of request signals available from the hardware * @dualmaster: whether this version supports dual AHB masters or not. * @nomadik: whether the channels have Nomadik security extension bits * that need to be checked for permission before use and some registers are * missing * @pl080s: whether this version is a PL080S, which has separate register and * LLI word for transfer size. + * @max_transfer_size: the maximum single element transfer size for this + * PL08x variant. */ struct vendor_data { u8 config_offset; u8 channels; + u8 signals; bool dualmaster; bool nomadik; bool pl080s; @@ -235,7 +239,7 @@ struct pl08x_dma_chan { struct virt_dma_chan vc; struct pl08x_phy_chan *phychan; const char *name; - const struct pl08x_channel_data *cd; + struct pl08x_channel_data *cd; struct dma_slave_config cfg; struct pl08x_txd *at; struct pl08x_driver_data *host; @@ -1909,6 +1913,12 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, if (slave) { chan->cd = &pl08x->pd->slave_channels[i]; + /* + * Some implementations have muxed signals, whereas some + * use a mux in front of the signals and need dynamic + * assignment of signals. + */ + chan->signal = i; pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; @@ -2050,40 +2060,33 @@ static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct pl08x_driver_data *pl08x = ofdma->of_dma_data; - struct pl08x_channel_data *data; - struct pl08x_dma_chan *chan; struct dma_chan *dma_chan; + struct pl08x_dma_chan *plchan; if (!pl08x) return NULL; - if (dma_spec->args_count != 2) + if (dma_spec->args_count != 2) { + dev_err(&pl08x->adev->dev, + "DMA channel translation requires two cells\n"); return NULL; + } dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); - if (dma_chan) - return dma_get_slave_channel(dma_chan); - - chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), - GFP_KERNEL); - if (!chan) + if (!dma_chan) { + dev_err(&pl08x->adev->dev, + "DMA slave channel not found\n"); return NULL; + } - data = (void *)&chan[1]; - data->bus_id = "(none)"; - data->periph_buses = dma_spec->args[1]; - - chan->cd = data; - chan->host = pl08x; - chan->slave = true; - chan->name = data->bus_id; - chan->state = PL08X_CHAN_IDLE; - chan->signal = dma_spec->args[0]; - chan->vc.desc_free = pl08x_desc_free; - - vchan_init(&chan->vc, &pl08x->slave); + plchan = to_pl08x_chan(dma_chan); + dev_dbg(&pl08x->adev->dev, + "translated channel for signal %d\n", + dma_spec->args[0]); - return dma_get_slave_channel(&chan->vc.chan); + /* Augment channel data for applicable AHB buses */ + plchan->cd->periph_buses = dma_spec->args[1]; + return dma_get_slave_channel(dma_chan); } static int pl08x_of_probe(struct amba_device *adev, @@ -2091,9 +2094,11 @@ static int pl08x_of_probe(struct amba_device *adev, struct device_node *np) { struct pl08x_platform_data *pd; + struct pl08x_channel_data *chanp = NULL; u32 cctl_memcpy = 0; u32 val; int ret; + int i; pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); if (!pd) @@ -2195,6 +2200,27 @@ static int pl08x_of_probe(struct amba_device *adev, /* Use the buses that can access memory, obviously */ pd->memcpy_channel.periph_buses = pd->mem_buses; + /* + * Allocate channel data for all possible slave channels (one + * for each possible signal), channels will then be allocated + * for a device and have it's AHB interfaces set up at + * translation time. + */ + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; + + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* chanp->periph_buses will be assigned at translation */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; + pl08x->pd = pd; return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, @@ -2234,6 +2260,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_pl08x; } + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + /* Initialize memcpy engine */ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); pl08x->memcpy.dev = &adev->dev; @@ -2284,10 +2314,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } } - /* Assign useful pointers to the driver state */ - pl08x->adev = adev; - pl08x->vd = vd; - /* By default, AHB1 only. If dualmaster, from platform */ pl08x->lli_buses = PL08X_AHB1; pl08x->mem_buses = PL08X_AHB1; @@ -2438,6 +2464,7 @@ out_no_pl08x: static struct vendor_data vendor_pl080 = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 16, .dualmaster = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2445,6 +2472,7 @@ static struct vendor_data vendor_pl080 = { static struct vendor_data vendor_nomadik = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 32, .dualmaster = true, .nomadik = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, @@ -2453,6 +2481,7 @@ static struct vendor_data vendor_nomadik = { static struct vendor_data vendor_pl080s = { .config_offset = PL080S_CH_CONFIG, .channels = 8, + .signals = 32, .pl080s = true, .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2460,6 +2489,7 @@ static struct vendor_data vendor_pl080s = { static struct vendor_data vendor_pl081 = { .config_offset = PL080_CH_CONFIG, .channels = 2, + .signals = 16, .dualmaster = false, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 996c4b0..6149b27 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -46,6 +46,9 @@ #include "virt-dma.h" +#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14 +#define BCM2835_DMA_CHAN_NAME_SIZE 8 + struct bcm2835_dmadev { struct dma_device ddev; spinlock_t lock; @@ -73,7 +76,6 @@ struct bcm2835_chan { struct list_head node; struct dma_slave_config cfg; - bool cyclic; unsigned int dreq; int ch; @@ -82,6 +84,9 @@ struct bcm2835_chan { void __iomem *chan_base; int irq_number; + unsigned int irq_flags; + + bool is_lite_channel; }; struct bcm2835_desc { @@ -89,47 +94,104 @@ struct bcm2835_desc { struct virt_dma_desc vd; enum dma_transfer_direction dir; - struct bcm2835_cb_entry *cb_list; - unsigned int frames; size_t size; + + bool cyclic; + + struct bcm2835_cb_entry cb_list[]; }; #define BCM2835_DMA_CS 0x00 #define BCM2835_DMA_ADDR 0x04 +#define BCM2835_DMA_TI 0x08 #define BCM2835_DMA_SOURCE_AD 0x0c #define BCM2835_DMA_DEST_AD 0x10 -#define BCM2835_DMA_NEXTCB 0x1C +#define BCM2835_DMA_LEN 0x14 +#define BCM2835_DMA_STRIDE 0x18 +#define BCM2835_DMA_NEXTCB 0x1c +#define BCM2835_DMA_DEBUG 0x20 /* DMA CS Control and Status bits */ -#define BCM2835_DMA_ACTIVE BIT(0) -#define BCM2835_DMA_INT BIT(2) +#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */ +#define BCM2835_DMA_END BIT(1) /* current CB has ended */ +#define BCM2835_DMA_INT BIT(2) /* interrupt status */ +#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */ #define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */ #define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */ -#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last + * AXI-write to ack + */ +#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */ +#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */ +/* current value of TI.BCM2835_DMA_WAIT_RESP */ +#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28) +#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */ #define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */ #define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ +/* Transfer information bits - also bcm2835_cb.info field */ #define BCM2835_DMA_INT_EN BIT(0) +#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */ +#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */ #define BCM2835_DMA_D_INC BIT(4) -#define BCM2835_DMA_D_DREQ BIT(6) +#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */ +#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */ +#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */ #define BCM2835_DMA_S_INC BIT(8) -#define BCM2835_DMA_S_DREQ BIT(10) - -#define BCM2835_DMA_PER_MAP(x) ((x) << 16) +#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */ +#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */ +#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */ +#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12) +#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */ +#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */ +#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */ + +/* debug register bits */ +#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0) +#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1) +#define BCM2835_DMA_DEBUG_READ_ERR BIT(2) +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4 +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4 +#define BCM2835_DMA_DEBUG_ID_SHIFT 16 +#define BCM2835_DMA_DEBUG_ID_BITS 9 +#define BCM2835_DMA_DEBUG_STATE_SHIFT 16 +#define BCM2835_DMA_DEBUG_STATE_BITS 9 +#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25 +#define BCM2835_DMA_DEBUG_VERSION_BITS 3 +#define BCM2835_DMA_DEBUG_LITE BIT(28) + +/* shared registers for all dma channels */ +#define BCM2835_DMA_INT_STATUS 0xfe0 +#define BCM2835_DMA_ENABLE 0xff0 #define BCM2835_DMA_DATA_TYPE_S8 1 #define BCM2835_DMA_DATA_TYPE_S16 2 #define BCM2835_DMA_DATA_TYPE_S32 4 #define BCM2835_DMA_DATA_TYPE_S128 16 -#define BCM2835_DMA_BULK_MASK BIT(0) -#define BCM2835_DMA_FIQ_MASK (BIT(2) | BIT(3)) - /* Valid only for channels 0 - 14, 15 has its own base address */ #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) +/* the max dma length for different channels */ +#define MAX_DMA_LEN SZ_1G +#define MAX_LITE_DMA_LEN (SZ_64K - 4) + +static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) +{ + /* lite and normal channels have different max frame length */ + return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN; +} + +/* how many frames of max_len size do we need to transfer len bytes */ +static inline size_t bcm2835_dma_frames_for_length(size_t len, + size_t max_len) +{ + return DIV_ROUND_UP(len, max_len); +} + static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -146,19 +208,209 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc( return container_of(t, struct bcm2835_desc, vd.tx); } -static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc) { - struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd); - int i; + size_t i; for (i = 0; i < desc->frames; i++) dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, desc->cb_list[i].paddr); - kfree(desc->cb_list); kfree(desc); } +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +{ + bcm2835_dma_free_cb_chain( + container_of(vd, struct bcm2835_desc, vd)); +} + +static void bcm2835_dma_create_cb_set_length( + struct bcm2835_chan *chan, + struct bcm2835_dma_cb *control_block, + size_t len, + size_t period_len, + size_t *total_len, + u32 finalextrainfo) +{ + size_t max_len = bcm2835_dma_max_frame_length(chan); + + /* set the length taking lite-channel limitations into account */ + control_block->length = min_t(u32, len, max_len); + + /* finished if we have no period_length */ + if (!period_len) + return; + + /* + * period_len means: that we need to generate + * transfers that are terminating at every + * multiple of period_len - this is typically + * used to set the interrupt flag in info + * which is required during cyclic transfers + */ + + /* have we filled in period_length yet? */ + if (*total_len + control_block->length < period_len) + return; + + /* calculate the length that remains to reach period_length */ + control_block->length = period_len - *total_len; + + /* reset total_length for next period */ + *total_len = 0; + + /* add extrainfo bits in info */ + control_block->info |= finalextrainfo; +} + +static inline size_t bcm2835_dma_count_frames_for_sg( + struct bcm2835_chan *c, + struct scatterlist *sgl, + unsigned int sg_len) +{ + size_t frames = 0; + struct scatterlist *sgent; + unsigned int i; + size_t plength = bcm2835_dma_max_frame_length(c); + + for_each_sg(sgl, sgent, sg_len, i) + frames += bcm2835_dma_frames_for_length( + sg_dma_len(sgent), plength); + + return frames; +} + +/** + * bcm2835_dma_create_cb_chain - create a control block and fills data in + * + * @chan: the @dma_chan for which we run this + * @direction: the direction in which we transfer + * @cyclic: it is a cyclic transfer + * @info: the default info bits to apply per controlblock + * @frames: number of controlblocks to allocate + * @src: the src address to assign (if the S_INC bit is set + * in @info, then it gets incremented) + * @dst: the dst address to assign (if the D_INC bit is set + * in @info, then it gets incremented) + * @buf_len: the full buffer length (may also be 0) + * @period_len: the period length when to apply @finalextrainfo + * in addition to the last transfer + * this will also break some control-blocks early + * @finalextrainfo: additional bits in last controlblock + * (or when period_len is reached in case of cyclic) + * @gfp: the GFP flag to use for allocation + */ +static struct bcm2835_desc *bcm2835_dma_create_cb_chain( + struct dma_chan *chan, enum dma_transfer_direction direction, + bool cyclic, u32 info, u32 finalextrainfo, size_t frames, + dma_addr_t src, dma_addr_t dst, size_t buf_len, + size_t period_len, gfp_t gfp) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len = buf_len, total_len; + size_t frame; + struct bcm2835_desc *d; + struct bcm2835_cb_entry *cb_entry; + struct bcm2835_dma_cb *control_block; + + if (!frames) + return NULL; + + /* allocate and setup the descriptor. */ + d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry), + gfp); + if (!d) + return NULL; + + d->c = c; + d->dir = direction; + d->cyclic = cyclic; + + /* + * Iterate over all frames, create a control block + * for each frame and link them together. + */ + for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) { + cb_entry = &d->cb_list[frame]; + cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + + /* fill in the control block */ + control_block = cb_entry->cb; + control_block->info = info; + control_block->src = src; + control_block->dst = dst; + control_block->stride = 0; + control_block->next = 0; + /* set up length in control_block if requested */ + if (buf_len) { + /* calculate length honoring period_length */ + bcm2835_dma_create_cb_set_length( + c, control_block, + len, period_len, &total_len, + cyclic ? finalextrainfo : 0); + + /* calculate new remaining length */ + len -= control_block->length; + } + + /* link this the last controlblock */ + if (frame) + d->cb_list[frame - 1].cb->next = cb_entry->paddr; + + /* update src and dst and length */ + if (src && (info & BCM2835_DMA_S_INC)) + src += control_block->length; + if (dst && (info & BCM2835_DMA_D_INC)) + dst += control_block->length; + + /* Length of total transfer */ + d->size += control_block->length; + } + + /* the last frame requires extra flags */ + d->cb_list[d->frames - 1].cb->info |= finalextrainfo; + + /* detect a size missmatch */ + if (buf_len && (d->size != buf_len)) + goto error_cb; + + return d; +error_cb: + bcm2835_dma_free_cb_chain(d); + + return NULL; +} + +static void bcm2835_dma_fill_cb_chain_with_sg( + struct dma_chan *chan, + enum dma_transfer_direction direction, + struct bcm2835_cb_entry *cb, + struct scatterlist *sgl, + unsigned int sg_len) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t max_len = bcm2835_dma_max_frame_length(c); + unsigned int i, len; + dma_addr_t addr; + struct scatterlist *sgent; + + for_each_sg(sgl, sgent, sg_len, i) { + for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent); + len > 0; + addr += cb->cb->length, len -= cb->cb->length, cb++) { + if (direction == DMA_DEV_TO_MEM) + cb->cb->dst = addr; + else + cb->cb->src = addr; + cb->cb->length = min(len, max_len); + } + } +} + static int bcm2835_dma_abort(void __iomem *chan_base) { unsigned long cs; @@ -218,6 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) struct bcm2835_desc *d; unsigned long flags; + /* check the shared interrupt */ + if (c->irq_flags & IRQF_SHARED) { + /* check if the interrupt is enabled */ + flags = readl(c->chan_base + BCM2835_DMA_CS); + /* if not set then we are not the reason for the irq */ + if (!(flags & BCM2835_DMA_INT)) + return IRQ_NONE; + } + spin_lock_irqsave(&c->vc.lock, flags); /* Acknowledge interrupt */ @@ -226,12 +487,18 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) d = c->desc; if (d) { - /* TODO Only works for cyclic DMA */ - vchan_cyclic_callback(&d->vd); - } + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); + /* Keep the DMA engine running */ + writel(BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); + } else { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } + } spin_unlock_irqrestore(&c->vc.lock, flags); @@ -252,8 +519,8 @@ static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) return -ENOMEM; } - return request_irq(c->irq_number, - bcm2835_dma_callback, 0, "DMA IRQ", c); + return request_irq(c->irq_number, bcm2835_dma_callback, + c->irq_flags, "DMA IRQ", c); } static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) @@ -339,8 +606,6 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; - c->cyclic = true; /* Nothing else is implemented */ - spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); @@ -348,122 +613,160 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&c->vc.lock, flags); } -static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( - struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, - size_t period_len, enum dma_transfer_direction direction, - unsigned long flags) +struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); - enum dma_slave_buswidth dev_width; struct bcm2835_desc *d; - dma_addr_t dev_addr; - unsigned int es, sync_type; - unsigned int frame; - int i; + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC; + u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* if src, dst or len is not given return with an error */ + if (!src || !dst || !len) + return NULL; + + /* calculate number of frames */ + frames = bcm2835_dma_frames_for_length(len, max_len); + + /* allocate the CB chain - this also fills in the pointers */ + d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false, + info, extra, frames, + src, dst, len, 0, GFP_KERNEL); + if (!d) + return NULL; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( + struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src = 0, dst = 0; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; - /* Grab configuration */ if (!is_slave_direction(direction)) { - dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + dev_err(chan->device->dev, + "%s: bad direction?\n", __func__); return NULL; } + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + if (direction == DMA_DEV_TO_MEM) { - dev_addr = c->cfg.src_addr; - dev_width = c->cfg.src_addr_width; - sync_type = BCM2835_DMA_S_DREQ; + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; } else { - dev_addr = c->cfg.dst_addr; - dev_width = c->cfg.dst_addr_width; - sync_type = BCM2835_DMA_D_DREQ; + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; } - /* Bus width translates to the element size (ES) */ - switch (dev_width) { - case DMA_SLAVE_BUSWIDTH_4_BYTES: - es = BCM2835_DMA_DATA_TYPE_S32; - break; - default: - return NULL; - } + /* count frames in sg list */ + frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len); - /* Now allocate and setup the descriptor. */ - d = kzalloc(sizeof(*d), GFP_NOWAIT); + /* allocate the CB chain */ + d = bcm2835_dma_create_cb_chain(chan, direction, false, + info, extra, + frames, src, dst, 0, 0, + GFP_KERNEL); if (!d) return NULL; - d->c = c; - d->dir = direction; - d->frames = buf_len / period_len; + /* fill in frames with scatterlist pointers */ + bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list, + sgl, sg_len); - d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); - if (!d->cb_list) { - kfree(d); + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src, dst; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* Grab configuration */ + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); return NULL; } - /* Allocate memory for control blocks */ - for (i = 0; i < d->frames; i++) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, - &cb_entry->paddr); - if (!cb_entry->cb) - goto error_cb; + if (!buf_len) { + dev_err(chan->device->dev, + "%s: bad buffer length (= 0)\n", __func__); + return NULL; } /* - * Iterate over all frames, create a control block - * for each frame and link them together. + * warn if buf_len is not a multiple of period_len - this may leed + * to unexpected latencies for interrupts and thus audiable clicks */ - for (frame = 0; frame < d->frames; frame++) { - struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb; - - /* Setup adresses */ - if (d->dir == DMA_DEV_TO_MEM) { - control_block->info = BCM2835_DMA_D_INC; - control_block->src = dev_addr; - control_block->dst = buf_addr + frame * period_len; - } else { - control_block->info = BCM2835_DMA_S_INC; - control_block->src = buf_addr + frame * period_len; - control_block->dst = dev_addr; - } + if (buf_len % period_len) + dev_warn_once(chan->device->dev, + "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n", + __func__, buf_len, period_len); - /* Enable interrupt */ - control_block->info |= BCM2835_DMA_INT_EN; + /* Setup DREQ channel */ + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); - /* Setup synchronization */ - if (sync_type != 0) - control_block->info |= sync_type; + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + dst = buf_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + src = buf_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + } - /* Setup DREQ channel */ - if (c->dreq != 0) - control_block->info |= - BCM2835_DMA_PER_MAP(c->dreq); + /* calculate number of frames */ + frames = /* number of periods */ + DIV_ROUND_UP(buf_len, period_len) * + /* number of frames per period */ + bcm2835_dma_frames_for_length(period_len, max_len); - /* Length of a frame */ - control_block->length = period_len; - d->size += control_block->length; + /* + * allocate the CB chain + * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine + * implementation calls prep_dma_cyclic with interrupts disabled. + */ + d = bcm2835_dma_create_cb_chain(chan, direction, true, + info, extra, + frames, src, dst, buf_len, + period_len, GFP_NOWAIT); + if (!d) + return NULL; - /* - * Next block is the next frame. - * This DMA engine driver currently only supports cyclic DMA. - * Therefore, wrap around at number of frames. - */ - control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; - } + /* wrap around into a loop */ + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr; return vchan_tx_prep(&c->vc, &d->vd, flags); -error_cb: - i--; - for (; i >= 0; i--) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); - } - - kfree(d->cb_list); - kfree(d); - return NULL; } static int bcm2835_dma_slave_config(struct dma_chan *chan, @@ -529,7 +832,8 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) return 0; } -static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, + int irq, unsigned int irq_flags) { struct bcm2835_chan *c; @@ -544,6 +848,12 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id); c->ch = chan_id; c->irq_number = irq; + c->irq_flags = irq_flags; + + /* check in DEBUG register if this is a LITE channel */ + if (readl(c->chan_base + BCM2835_DMA_DEBUG) & + BCM2835_DMA_DEBUG_LITE) + c->is_lite_channel = true; return 0; } @@ -587,9 +897,11 @@ static int bcm2835_dma_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; int rc; - int i; - int irq; + int i, j; + int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1]; + int irq_flags; uint32_t chans_available; + char chan_name[BCM2835_DMA_CHAN_NAME_SIZE]; if (!pdev->dev.dma_mask) pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; @@ -615,16 +927,22 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; + od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; + od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); - od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; od->ddev.dev = &pdev->dev; INIT_LIST_HEAD(&od->ddev.channels); spin_lock_init(&od->lock); @@ -640,22 +958,48 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - /* - * Do not use the FIQ and BULK channels, - * because they are used by the GPU. - */ - chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK); + /* get irqs for each channel that we support */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip masked out channels */ + if (!(chans_available & (1 << i))) { + irq[i] = -1; + continue; + } - for (i = 0; i < pdev->num_resources; i++) { - irq = platform_get_irq(pdev, i); - if (irq < 0) - break; + /* get the named irq */ + snprintf(chan_name, sizeof(chan_name), "dma%i", i); + irq[i] = platform_get_irq_byname(pdev, chan_name); + if (irq[i] >= 0) + continue; - if (chans_available & (1 << i)) { - rc = bcm2835_dma_chan_init(od, i, irq); - if (rc) - goto err_no_dma; - } + /* legacy device tree case handling */ + dev_warn_once(&pdev->dev, + "missing interrupt-names property in device tree - legacy interpretation is used\n"); + /* + * in case of channel >= 11 + * use the 11th interrupt and that is shared + */ + irq[i] = platform_get_irq(pdev, i < 11 ? i : 11); + } + + /* get irqs for each channel */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip channels without irq */ + if (irq[i] < 0) + continue; + + /* check if there are other channels that also use this irq */ + irq_flags = 0; + for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) + if ((i != j) && (irq[j] == irq[i])) { + irq_flags = IRQF_SHARED; + break; + } + + /* initialize the channel */ + rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags); + if (rc) + goto err_no_dma; } dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 0cb259c5..8c9f45f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -289,7 +289,7 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) do { status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s: timeout!\n", __func__); + dev_err(chan->device->dev, "%s: timeout!\n", __func__); return DMA_ERROR; } if (status != DMA_IN_PROGRESS) @@ -482,7 +482,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) device = chan->device; /* check if the channel supports slave transactions */ - if (!test_bit(DMA_SLAVE, device->cap_mask.bits)) + if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) || + test_bit(DMA_CYCLIC, device->cap_mask.bits))) return -ENXIO; /* @@ -518,7 +519,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, struct dma_chan *chan; if (mask && !__dma_device_satisfies_mask(dev, mask)) { - pr_debug("%s: wrong capabilities\n", __func__); + dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__); return NULL; } /* devices with multiple channels need special handling as we need to @@ -533,12 +534,12 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, list_for_each_entry(chan, &dev->channels, device_node) { if (chan->client_count) { - pr_debug("%s: %s busy\n", + dev_dbg(dev->dev, "%s: %s busy\n", __func__, dma_chan_name(chan)); continue; } if (fn && !fn(chan, fn_param)) { - pr_debug("%s: %s filter said false\n", + dev_dbg(dev->dev, "%s: %s filter said false\n", __func__, dma_chan_name(chan)); continue; } @@ -567,11 +568,12 @@ static struct dma_chan *find_candidate(struct dma_device *device, if (err) { if (err == -ENODEV) { - pr_debug("%s: %s module removed\n", __func__, - dma_chan_name(chan)); + dev_dbg(device->dev, "%s: %s module removed\n", + __func__, dma_chan_name(chan)); list_del_rcu(&device->global_node); } else - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); if (--device->privatecnt == 0) @@ -602,7 +604,8 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) device->privatecnt++; err = dma_chan_get(chan); if (err) { - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); chan = NULL; if (--device->privatecnt == 0) @@ -814,8 +817,9 @@ void dmaengine_get(void) list_del_rcu(&device->global_node); break; } else if (err) - pr_debug("%s: failed to get %s: (%d)\n", - __func__, dma_chan_name(chan), err); + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); } } @@ -862,12 +866,12 @@ static bool device_has_all_tx_types(struct dma_device *device) return false; #endif - #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_MEMCPY) if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) return false; #endif - #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_XOR) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; @@ -877,7 +881,7 @@ static bool device_has_all_tx_types(struct dma_device *device) #endif #endif - #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_PQ) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; @@ -1222,8 +1226,9 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) while (tx->cookie == -EBUSY) { if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s timeout waiting for descriptor submission\n", - __func__); + dev_err(tx->chan->device->dev, + "%s timeout waiting for descriptor submission\n", + __func__); return DMA_ERROR; } cpu_relax(); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 97199b3..edf053f 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -45,22 +45,19 @@ DW_DMA_MSIZE_16; \ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \ DW_DMA_MSIZE_16; \ + u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \ + _dwc->p_master : _dwc->m_master; \ + u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \ + _dwc->p_master : _dwc->m_master; \ \ (DWC_CTLL_DST_MSIZE(_dmsize) \ | DWC_CTLL_SRC_MSIZE(_smsize) \ | DWC_CTLL_LLP_D_EN \ | DWC_CTLL_LLP_S_EN \ - | DWC_CTLL_DMS(_dwc->dst_master) \ - | DWC_CTLL_SMS(_dwc->src_master)); \ + | DWC_CTLL_DMS(_dms) \ + | DWC_CTLL_SMS(_sms)); \ }) -/* - * Number of descriptors to allocate for each channel. This should be - * made configurable somehow; preferably, the clients (at least the - * ones using slave transfers) should be able to give us a hint. - */ -#define NR_DESCS_PER_CHANNEL 64 - /* The set of bus widths supported by the DMA controller */ #define DW_DMA_BUSWIDTHS \ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ @@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) return to_dw_desc(dwc->active_list.next); } -static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) { - struct dw_desc *desc, *_desc; - struct dw_desc *ret = NULL; - unsigned int i = 0; - unsigned long flags; + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { - i++; - if (async_tx_test_ack(&desc->txd)) { - list_del(&desc->desc_node); - ret = desc; - break; - } - dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); - } + cookie = dma_cookie_assign(tx); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + + list_add_tail(&desc->desc_node, &dwc->queue); spin_unlock_irqrestore(&dwc->lock, flags); + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", + __func__, desc->txd.cookie); - dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); + return cookie; +} - return ret; +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); + if (!desc) + return NULL; + + dwc->descs_allocated++; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = phys; + return desc; } -/* - * Move a descriptor, including any children, to the free list. - * `desc' must not be on any lists. - */ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) { - unsigned long flags; + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *child, *_next; - if (desc) { - struct dw_desc *child; + if (unlikely(!desc)) + return; - spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&dwc->chan), - "moving child desc %p to freelist\n", - child); - list_splice_init(&desc->tx_list, &dwc->free_list); - dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); - list_add(&desc->desc_node, &dwc->free_list); - spin_unlock_irqrestore(&dwc->lock, flags); + list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { + list_del(&child->desc_node); + dma_pool_free(dw->desc_pool, child, child->txd.phys); + dwc->descs_allocated--; } + + dma_pool_free(dw->desc_pool, desc, desc->txd.phys); + dwc->descs_allocated--; } static void dwc_initialize(struct dw_dma_chan *dwc) @@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); - if (dwc->initialized == true) + if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) return; cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); @@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma_chan *dwc) channel_set_bit(dw, MASK.XFER, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); - dwc->initialized = true; + set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); } /*----------------------------------------------------------------------*/ -static inline unsigned int dwc_fast_ffs(unsigned long long v) -{ - /* - * We can be a lot more clever here, but this should take care - * of the most common optimization. - */ - if (!(v & 7)) - return 3; - else if (!(v & 3)) - return 2; - else if (!(v & 1)) - return 1; - return 0; -} - static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) { dev_err(chan2dev(&dwc->chan), @@ -197,12 +193,12 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, * Software emulation of LLP mode relies on interrupts to continue * multi block transfer. */ - ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN; + ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; - channel_writel(dwc, SAR, desc->lli.sar); - channel_writel(dwc, DAR, desc->lli.dar); + channel_writel(dwc, SAR, lli_read(desc, sar)); + channel_writel(dwc, DAR, lli_read(desc, dar)); channel_writel(dwc, CTL_LO, ctllo); - channel_writel(dwc, CTL_HI, desc->lli.ctlhi); + channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); channel_set_bit(dw, CH_EN, dwc->mask); /* Move pointer to next descriptor */ @@ -213,6 +209,7 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_soft_llp; /* ASSERT: channel is idle */ @@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - dwc->residue = first->total_len; + first->residue = first->total_len; dwc->tx_node_active = &first->tx_list; /* Submit first block */ @@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - channel_writel(dwc, LLP, first->txd.phys); - channel_writel(dwc, CTL_LO, - DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, LLP, first->txd.phys | lms); + channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); channel_writel(dwc, CTL_HI, 0); channel_set_bit(dw, CH_EN, dwc->mask); } @@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, list_for_each_entry(child, &desc->tx_list, desc_node) async_tx_ack(&child->txd); async_tx_ack(&desc->txd); - - list_splice_init(&desc->tx_list, &dwc->free_list); - list_move(&desc->desc_node, &dwc->free_list); - - dma_descriptor_unmap(txd); + dwc_desc_put(dwc, desc); spin_unlock_irqrestore(&dwc->lock, flags); if (callback) @@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) head = &desc->tx_list; if (active != head) { - /* Update desc to reflect last sent one */ - if (active != head->next) - desc = to_dw_desc(active->prev); - - dwc->residue -= desc->len; + /* Update residue to reflect last sent descriptor */ + if (active == head->next) + desc->residue -= desc->len; + else + desc->residue -= to_dw_desc(active->prev)->len; child = to_dw_desc(active); @@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); } - dwc->residue = 0; - spin_unlock_irqrestore(&dwc->lock, flags); dwc_complete_all(dw, dwc); @@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) } if (list_empty(&dwc->active_list)) { - dwc->residue = 0; spin_unlock_irqrestore(&dwc->lock, flags); return; } @@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { /* Initial residue value */ - dwc->residue = desc->total_len; + desc->residue = desc->total_len; /* Check first descriptors addr */ - if (desc->txd.phys == llp) { + if (desc->txd.phys == DWC_LLP_LOC(llp)) { spin_unlock_irqrestore(&dwc->lock, flags); return; } /* Check first descriptors llp */ - if (desc->lli.llp == llp) { + if (lli_read(desc, llp) == llp) { /* This one is currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= desc->len; + desc->residue -= desc->len; list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.llp == llp) { + if (lli_read(child, llp) == llp) { /* Currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= child->len; + desc->residue -= child->len; } /* @@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) spin_unlock_irqrestore(&dwc->lock, flags); } -static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) { dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", - lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo); + lli_read(desc, sar), + lli_read(desc, dar), + lli_read(desc, llp), + lli_read(desc, ctlhi), + lli_read(desc, ctllo)); } static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) */ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" " cookie: %d\n", bad_desc->txd.cookie); - dwc_dump_lli(dwc, &bad_desc->lli); + dwc_dump_lli(dwc, bad_desc); list_for_each_entry(child, &bad_desc->tx_list, desc_node) - dwc_dump_lli(dwc, &child->lli); + dwc_dump_lli(dwc, child); spin_unlock_irqrestore(&dwc->lock, flags); @@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, */ if (unlikely(status_err & dwc->mask) || unlikely(status_xfer & dwc->mask)) { - int i; + unsigned int i; dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", @@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, dma_writel(dw, CLEAR.XFER, dwc->mask); for (i = 0; i < dwc->cdesc->periods; i++) - dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + dwc_dump_lli(dwc, dwc->cdesc->desc[i]); spin_unlock_irqrestore(&dwc->lock, flags); } @@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long data) u32 status_block; u32 status_xfer; u32 status_err; - int i; + unsigned int i; status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); @@ -658,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /*----------------------------------------------------------------------*/ -static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct dw_desc *desc = txd_to_dw_desc(tx); - struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); - dma_cookie_t cookie; - unsigned long flags; - - spin_lock_irqsave(&dwc->lock, flags); - cookie = dma_cookie_assign(tx); - - /* - * REVISIT: We should attempt to chain as many descriptors as - * possible, perhaps even appending to those already submitted - * for DMA. But this is hard to do in a race-free manner. - */ - - dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->queue); - - spin_unlock_irqrestore(&dwc->lock, flags); - - return cookie; -} - static struct dma_async_tx_descriptor * dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -693,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, struct dw_desc *prev; size_t xfer_count; size_t offset; + u8 m_master = dwc->m_master; unsigned int src_width; unsigned int dst_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; u32 ctllo; + u8 lms = DWC_LLP_LMS(m_master); dev_vdbg(chan2dev(chan), "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, @@ -709,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dwc->direction = DMA_MEM_TO_MEM; - data_width = min_t(unsigned int, dw->data_width[dwc->src_master], - dw->data_width[dwc->dst_master]); - - src_width = dst_width = min_t(unsigned int, data_width, - dwc_fast_ffs(src | dest | len)); + src_width = dst_width = __ffs(data_width | src | dest | len); ctllo = DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_DST_WIDTH(dst_width) @@ -731,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) goto err_desc_get; - desc->lli.sar = src + offset; - desc->lli.dar = dest + offset; - desc->lli.ctllo = ctllo; - desc->lli.ctlhi = xfer_count; + lli_write(desc, sar, src + offset); + lli_write(desc, dar, dest + offset); + lli_write(desc, ctllo, ctllo); + lli_write(desc, ctlhi, xfer_count); desc->len = xfer_count << src_width; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->txd.flags = flags; first->total_len = len; @@ -773,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dw_desc *prev; struct dw_desc *first; u32 ctllo; + u8 m_master = dwc->m_master; + u8 lms = DWC_LLP_LMS(m_master); dma_addr_t reg; unsigned int reg_width; unsigned int mem_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; unsigned int i; struct scatterlist *sg; size_t total_len = 0; @@ -802,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : DWC_CTLL_FC(DW_DMA_FC_D_M2P); - data_width = dw->data_width[dwc->src_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -811,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = mem; - desc->lli.dar = reg; - desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + lli_write(desc, sar, mem); + lli_write(desc, dar, reg); + lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); if ((len >> mem_width) > dwc->block_size) { dlen = dwc->block_size << mem_width; mem += dlen; @@ -831,15 +797,14 @@ slave_sg_todev_fill_desc: len = 0; } - desc->lli.ctlhi = dlen >> mem_width; + lli_write(desc, ctlhi, dlen >> mem_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -859,8 +824,6 @@ slave_sg_todev_fill_desc: ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : DWC_CTLL_FC(DW_DMA_FC_D_P2M); - data_width = dw->data_width[dwc->dst_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -868,17 +831,16 @@ slave_sg_todev_fill_desc: mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_fromdev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = reg; - desc->lli.dar = mem; - desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + lli_write(desc, sar, reg); + lli_write(desc, dar, mem); + lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); if ((len >> reg_width) > dwc->block_size) { dlen = dwc->block_size << reg_width; mem += dlen; @@ -887,15 +849,14 @@ slave_sg_fromdev_fill_desc: dlen = len; len = 0; } - desc->lli.ctlhi = dlen >> reg_width; + lli_write(desc, ctlhi, dlen >> reg_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -910,9 +871,10 @@ slave_sg_fromdev_fill_desc: if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->total_len = total_len; return &first->txd; @@ -937,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) dwc->src_id = dws->src_id; dwc->dst_id = dws->dst_id; - dwc->src_master = dws->src_master; - dwc->dst_master = dws->dst_master; + dwc->m_master = dws->m_master; + dwc->p_master = dws->p_master; return true; } @@ -991,7 +953,7 @@ static int dwc_pause(struct dma_chan *chan) while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) udelay(2); - dwc->paused = true; + set_bit(DW_DMA_IS_PAUSED, &dwc->flags); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1004,7 +966,7 @@ static inline void dwc_chan_resume(struct dw_dma_chan *dwc) channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); - dwc->paused = false; + clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); } static int dwc_resume(struct dma_chan *chan) @@ -1012,12 +974,10 @@ static int dwc_resume(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); unsigned long flags; - if (!dwc->paused) - return 0; - spin_lock_irqsave(&dwc->lock, flags); - dwc_chan_resume(dwc); + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) + dwc_chan_resume(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1053,16 +1013,37 @@ static int dwc_terminate_all(struct dma_chan *chan) return 0; } -static inline u32 dwc_get_residue(struct dw_dma_chan *dwc) +static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) { + struct dw_desc *desc; + + list_for_each_entry(desc, &dwc->active_list, desc_node) + if (desc->txd.cookie == c) + return desc; + + return NULL; +} + +static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie) +{ + struct dw_desc *desc; unsigned long flags; u32 residue; spin_lock_irqsave(&dwc->lock, flags); - residue = dwc->residue; - if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) - residue -= dwc_get_sent(dwc); + desc = dwc_find_desc(dwc, cookie); + if (desc) { + if (desc == dwc_first_active(dwc)) { + residue = desc->residue; + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) + residue -= dwc_get_sent(dwc); + } else { + residue = desc->total_len; + } + } else { + residue = 0; + } spin_unlock_irqrestore(&dwc->lock, flags); return residue; @@ -1083,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan, dwc_scan_descriptors(to_dw_dma(chan->device), dwc); ret = dma_cookie_status(chan, cookie, txstate); - if (ret != DMA_COMPLETE) - dma_set_residue(txstate, dwc_get_residue(dwc)); + if (ret == DMA_COMPLETE) + return ret; + + dma_set_residue(txstate, dwc_get_residue(dwc, cookie)); - if (dwc->paused && ret == DMA_IN_PROGRESS) + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS) return DMA_PAUSED; return ret; @@ -1107,7 +1090,7 @@ static void dwc_issue_pending(struct dma_chan *chan) static void dw_dma_off(struct dw_dma *dw) { - int i; + unsigned int i; dma_writel(dw, CFG, 0); @@ -1121,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw) cpu_relax(); for (i = 0; i < dw->dma.chancnt; i++) - dw->chan[i].initialized = false; + clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); } static void dw_dma_on(struct dw_dma *dw) @@ -1133,9 +1116,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc; - int i; - unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1166,48 +1146,13 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) dw_dma_on(dw); dw->in_use |= dwc->mask; - spin_lock_irqsave(&dwc->lock, flags); - i = dwc->descs_allocated; - while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { - dma_addr_t phys; - - spin_unlock_irqrestore(&dwc->lock, flags); - - desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys); - if (!desc) - goto err_desc_alloc; - - memset(desc, 0, sizeof(struct dw_desc)); - - INIT_LIST_HEAD(&desc->tx_list); - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = dwc_tx_submit; - desc->txd.flags = DMA_CTRL_ACK; - desc->txd.phys = phys; - - dwc_desc_put(dwc, desc); - - spin_lock_irqsave(&dwc->lock, flags); - i = ++dwc->descs_allocated; - } - - spin_unlock_irqrestore(&dwc->lock, flags); - - dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); - - return i; - -err_desc_alloc: - dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); - - return i; + return 0; } static void dwc_free_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc, *_desc; unsigned long flags; LIST_HEAD(list); @@ -1220,17 +1165,15 @@ static void dwc_free_chan_resources(struct dma_chan *chan) BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); spin_lock_irqsave(&dwc->lock, flags); - list_splice_init(&dwc->free_list, &list); - dwc->descs_allocated = 0; /* Clear custom channel configuration */ dwc->src_id = 0; dwc->dst_id = 0; - dwc->src_master = 0; - dwc->dst_master = 0; + dwc->m_master = 0; + dwc->p_master = 0; - dwc->initialized = false; + clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); @@ -1244,11 +1187,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) if (!dw->in_use) dw_dma_off(dw); - list_for_each_entry_safe(desc, _desc, &list, desc_node) { - dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); - dma_pool_free(dw->desc_pool, desc, desc->txd.phys); - } - dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } @@ -1326,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, struct dw_cyclic_desc *retval = NULL; struct dw_desc *desc; struct dw_desc *last = NULL; + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_cyclic; unsigned int reg_width; unsigned int periods; @@ -1379,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, retval = ERR_PTR(-ENOMEM); - if (periods > NR_DESCS_PER_CHANNEL) - goto out_err; - cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); if (!cdesc) goto out_err; @@ -1397,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, switch (direction) { case DMA_MEM_TO_DEV: - desc->lli.dar = sconfig->dst_addr; - desc->lli.sar = buf_addr + (period_len * i); - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_FIX - | DWC_CTLL_SRC_INC - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_M2P) : - DWC_CTLL_FC(DW_DMA_FC_D_M2P); + lli_write(desc, dar, sconfig->dst_addr); + lli_write(desc, sar, buf_addr + period_len * i); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P)); break; case DMA_DEV_TO_MEM: - desc->lli.dar = buf_addr + (period_len * i); - desc->lli.sar = sconfig->src_addr; - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_DST_INC - | DWC_CTLL_SRC_FIX - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_P2M) : - DWC_CTLL_FC(DW_DMA_FC_D_P2M); + lli_write(desc, dar, buf_addr + period_len * i); + lli_write(desc, sar, sconfig->src_addr); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M)); break; default: break; } - desc->lli.ctlhi = (period_len >> reg_width); + lli_write(desc, ctlhi, period_len >> reg_width); cdesc->desc[i] = desc; if (last) - last->lli.llp = desc->txd.phys; + lli_write(last, llp, desc->txd.phys | lms); last = desc; } /* Let's make a cyclic list */ - last->lli.llp = cdesc->desc[0]->txd.phys; + lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf %pad len %zu period %zu periods %d\n", @@ -1471,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); struct dw_cyclic_desc *cdesc = dwc->cdesc; - int i; + unsigned int i; unsigned long flags; dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); @@ -1495,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan *chan) kfree(cdesc->desc); kfree(cdesc); + dwc->cdesc = NULL; + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); } EXPORT_SYMBOL(dw_dma_cyclic_free); /*----------------------------------------------------------------------*/ -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) +int dw_dma_probe(struct dw_dma_chip *chip) { + struct dw_dma_platform_data *pdata; struct dw_dma *dw; bool autocfg = false; unsigned int dw_params; - unsigned int max_blk_size = 0; + unsigned int i; int err; - int i; dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); if (!dw) return -ENOMEM; + dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); + if (!dw->pdata) + return -ENOMEM; + dw->regs = chip->regs; chip->dw = dw; pm_runtime_get_sync(chip->dev); - if (!pdata) { - dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); + if (!chip->pdata) { + dw_params = dma_readl(dw, DW_PARAMS); dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); autocfg = dw_params >> DW_PARAMS_EN & 1; @@ -1529,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - err = -ENOMEM; - goto err_pdata; - } + /* Reassign the platform data pointer */ + pdata = dw->pdata; /* Get hardware configuration parameters */ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; for (i = 0; i < pdata->nr_masters; i++) { pdata->data_width[i] = - (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; + 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); } - max_blk_size = dma_readl(dw, MAX_BLK_SIZE); + pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ pdata->is_private = true; pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; - } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { err = -EINVAL; goto err_pdata; + } else { + memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); + + /* Reassign the platform data pointer */ + pdata = dw->pdata; } dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), @@ -1561,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - /* Get hardware configuration parameters */ - dw->nr_masters = pdata->nr_masters; - for (i = 0; i < dw->nr_masters; i++) - dw->data_width[i] = pdata->data_width[i]; - /* Calculate all channel mask before DMA setup */ dw->all_chan_mask = (1 << pdata->nr_channels) - 1; @@ -1612,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dwc->active_list); INIT_LIST_HEAD(&dwc->queue); - INIT_LIST_HEAD(&dwc->free_list); channel_clear_bit(dw, CH_EN, dwc->mask); @@ -1620,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { - unsigned int dwc_params; unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; - void __iomem *addr = chip->regs + r * sizeof(u32); - - dwc_params = dma_read_byaddr(addr, DWC_PARAMS); + void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; + unsigned int dwc_params = dma_readl_native(addr); dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, dwc_params); @@ -1635,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) * up to 0x0a for 4095. */ dwc->block_size = - (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1; + (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; dwc->nollp = (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; } else { dwc->block_size = pdata->block_size; /* Check if channel supports multi block transfer */ - channel_writel(dwc, LLP, 0xfffffffc); - dwc->nollp = - (channel_readl(dwc, LLP) & 0xfffffffc) == 0; + channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff)); + dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0; channel_writel(dwc, LLP, 0); } } diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 358f968..0ae6c3b 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,8 +17,8 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { + const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; struct dw_dma_chip *chip; - struct dw_dma_platform_data *pdata = (void *)pid->driver_data; int ret; ret = pcim_enable_device(pdev); @@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) chip->dev = &pdev->dev; chip->regs = pcim_iomap_table(pdev)[0]; chip->irq = pdev->irq; + chip->pdata = pdata; - ret = dw_dma_probe(chip, pdata); + ret = dw_dma_probe(chip); if (ret) return ret; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 26edbe3..5bda0eb 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, slave.src_id = dma_spec->args[0]; slave.dst_id = dma_spec->args[0]; - slave.src_master = dma_spec->args[1]; - slave.dst_master = dma_spec->args[2]; + slave.m_master = dma_spec->args[1]; + slave.p_master = dma_spec->args[2]; if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || - slave.src_master >= dw->nr_masters || - slave.dst_master >= dw->nr_masters)) + slave.m_master >= dw->pdata->nr_masters || + slave.p_master >= dw->pdata->nr_masters)) return NULL; dma_cap_zero(cap); @@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) .dma_dev = dma_spec->dev, .src_id = dma_spec->slave_id, .dst_id = dma_spec->slave_id, - .src_master = 1, - .dst_master = 0, + .m_master = 0, + .p_master = 1, }; return dw_dma_filter(chan, &slave); @@ -103,6 +103,7 @@ dw_dma_parse_dt(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; + u32 nr_masters; u32 nr_channels; if (!np) { @@ -110,6 +111,11 @@ dw_dma_parse_dt(struct platform_device *pdev) return NULL; } + if (of_property_read_u32(np, "dma-masters", &nr_masters)) + return NULL; + if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) + return NULL; + if (of_property_read_u32(np, "dma-channels", &nr_channels)) return NULL; @@ -117,6 +123,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; + pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; if (of_property_read_bool(np, "is_private")) @@ -131,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "dma-masters", &tmp)) { - if (tmp > DW_DMA_MAX_NR_MASTERS) - return NULL; - - pdata->nr_masters = tmp; - } - - if (!of_property_read_u32_array(np, "data_width", arr, - pdata->nr_masters)) - for (tmp = 0; tmp < pdata->nr_masters; tmp++) + if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; + } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); + } return pdata; } @@ -158,7 +161,7 @@ static int dw_probe(struct platform_device *pdev) struct dw_dma_chip *chip; struct device *dev = &pdev->dev; struct resource *mem; - struct dw_dma_platform_data *pdata; + const struct dw_dma_platform_data *pdata; int err; chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); @@ -183,6 +186,7 @@ static int dw_probe(struct platform_device *pdev) pdata = dw_dma_parse_dt(pdev); chip->dev = dev; + chip->pdata = pdata; chip->clk = devm_clk_get(chip->dev, "hclk"); if (IS_ERR(chip->clk)) @@ -193,7 +197,7 @@ static int dw_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - err = dw_dma_probe(chip, pdata); + err = dw_dma_probe(chip); if (err) goto err_dw_dma_probe; diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 0a50c18..4b7bd78 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -114,10 +114,6 @@ struct dw_dma_regs { #define dma_writel_native writel #endif -/* To access the registers in early stage of probe */ -#define dma_read_byaddr(addr, name) \ - dma_readl_native((addr) + offsetof(struct dw_dma_regs, name)) - /* Bitfields in DW_PARAMS */ #define DW_PARAMS_NR_CHAN 8 /* number of channels */ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ @@ -143,6 +139,10 @@ enum dw_dma_msize { DW_DMA_MSIZE_256, }; +/* Bitfields in LLP */ +#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ +#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ + /* Bitfields in CTL_LO */ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ @@ -216,6 +216,8 @@ enum dw_dma_msize { enum dw_dmac_flags { DW_DMA_IS_CYCLIC = 0, DW_DMA_IS_SOFT_LLP = 1, + DW_DMA_IS_PAUSED = 2, + DW_DMA_IS_INITIALIZED = 3, }; struct dw_dma_chan { @@ -224,8 +226,6 @@ struct dw_dma_chan { u8 mask; u8 priority; enum dma_transfer_direction direction; - bool paused; - bool initialized; /* software emulation of the LLP transfers */ struct list_head *tx_node_active; @@ -236,8 +236,6 @@ struct dw_dma_chan { unsigned long flags; struct list_head active_list; struct list_head queue; - struct list_head free_list; - u32 residue; struct dw_cyclic_desc *cdesc; unsigned int descs_allocated; @@ -249,8 +247,8 @@ struct dw_dma_chan { /* custom slave configuration */ u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; /* configuration passed via .device_config */ struct dma_slave_config dma_sconfig; @@ -283,9 +281,8 @@ struct dw_dma { u8 all_chan_mask; u8 in_use; - /* hardware configuration */ - unsigned char nr_masters; - unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; + /* platform data */ + struct dw_dma_platform_data *pdata; }; static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) @@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) return container_of(ddev, struct dw_dma, dma); } +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +typedef __be32 __dw32; +#else +typedef __le32 __dw32; +#endif + /* LLI == Linked List Item; a.k.a. DMA block descriptor */ struct dw_lli { /* values that are not changed by hardware */ - u32 sar; - u32 dar; - u32 llp; /* chain to next lli */ - u32 ctllo; + __dw32 sar; + __dw32 dar; + __dw32 llp; /* chain to next lli */ + __dw32 ctllo; /* values that may get written back: */ - u32 ctlhi; + __dw32 ctlhi; /* sstat and dstat can snapshot peripheral register state. * silicon config may discard either or both... */ - u32 sstat; - u32 dstat; + __dw32 sstat; + __dw32 dstat; }; struct dw_desc { /* FIRST values the hardware uses */ struct dw_lli lli; +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) +#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) +#else +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) +#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) +#endif + /* THEN values for driver housekeeping */ struct list_head desc_node; struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; size_t total_len; + u32 residue; }; #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 04070ba..8181ed1 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1537,8 +1537,17 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_vdbg(ecc->dev, "dma_ccerr_handler\n"); - if (!edma_error_pending(ecc)) + if (!edma_error_pending(ecc)) { + /* + * The registers indicate no pending error event but the irq + * handler has been called. + * Ask eDMA to re-evaluate the error registers. + */ + dev_err(ecc->dev, "%s: Error interrupt without error event!\n", + __func__); + edma_write(ecc, EDMA_EEVAL, 1); return IRQ_NONE; + } while (1) { /* Event missed register(s) */ diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index aac85c3..a8828ed 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -462,13 +462,12 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) struct fsl_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { chan_dbg(chan, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); desc->async_tx.tx_submit = fsl_dma_tx_submit; diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index ee51051..f8c5cd5 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -77,8 +77,8 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr); /* Set descriptors */ - count = (desc->nents - desc->active) % HSU_DMA_CHAN_NR_DESC; - for (i = 0; i < count; i++) { + count = desc->nents - desc->active; + for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) { hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr); hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len); @@ -160,7 +160,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) return IRQ_NONE; /* Timeout IRQ, need wait some time, see Errata 2 */ - if (hsuc->direction == DMA_DEV_TO_MEM && (sr & HSU_CH_SR_DESCTO_ANY)) + if (sr & HSU_CH_SR_DESCTO_ANY) udelay(2); sr &= ~HSU_CH_SR_DESCTO_ANY; @@ -420,6 +420,8 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsu->dma.dev = chip->dev; + dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK); + ret = dma_async_device_register(&hsu->dma); if (ret) return ret; diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 6b070c2..486b023b 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -58,6 +58,10 @@ #define HSU_CH_DCR_CHEI BIT(23) #define HSU_CH_DCR_CHTOI(x) BIT(24 + (x)) +/* Bits in HSU_CH_DxTSR */ +#define HSU_CH_DxTSR_MASK GENMASK(15, 0) +#define HSU_CH_DxTSR_TSR(x) ((x) & HSU_CH_DxTSR_MASK) + struct hsu_dma_sg { dma_addr_t addr; unsigned int len; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index efdee1a..d406056 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -690,12 +690,11 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = - dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, - GFP_KERNEL, &ioat_chan->completion_dma); + dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; - memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); writel(((u64)ioat_chan->completion_dma) >> 32, @@ -1074,6 +1073,7 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) struct ioatdma_chan *ioat_chan; bool is_raid_device = false; int err; + u16 val16; dma = &ioat_dma->dma_dev; dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; @@ -1173,6 +1173,17 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) if (dca) ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + /* disable relaxed ordering */ + err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16); + if (err) + return err; + + /* clear relaxed ordering enable */ + val16 &= ~IOAT_DEVCTRL_ROE; + err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16); + if (err) + return err; + return 0; } diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 4994a36..7053498 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -26,6 +26,13 @@ #define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 +/* PCIe config registers */ + +/* EXPCAPID + N */ +#define IOAT_DEVCTRL_OFFSET 0x8 +/* relaxed ordering enable */ +#define IOAT_DEVCTRL_ROE 0x10 + /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index e39457f..56f1fd6 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -364,13 +364,12 @@ mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan) struct mmp_pdma_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { dev_err(chan->dev, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan); /* each desc has submit */ diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index aae76fb..ccadafa 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -3,6 +3,7 @@ * Copyright (C) Semihalf 2009 * Copyright (C) Ilya Yanok, Emcraft Systems 2010 * Copyright (C) Alexander Popov, Promcontroller 2014 + * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016 * * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description * (defines, structures and comments) was taken from MPC5121 DMA driver @@ -26,18 +27,19 @@ */ /* - * MPC512x and MPC8308 DMA driver. It supports - * memory to memory data transfers (tested using dmatest module) and - * data transfers between memory and peripheral I/O memory - * by means of slave scatter/gather with these limitations: - * - chunked transfers (described by s/g lists with more than one item) - * are refused as long as proper support for scatter/gather is missing; - * - transfers on MPC8308 always start from software as this SoC appears - * not to have external request lines for peripheral flow control; - * - only peripheral devices with 4-byte FIFO access register are supported; - * - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) boundary; + * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers + * (tested using dmatest module) and data transfers between memory and + * peripheral I/O memory by means of slave scatter/gather with these + * limitations: + * - chunked transfers (described by s/g lists with more than one item) are + * refused as long as proper support for scatter/gather is missing + * - transfers on MPC8308 always start from software as this SoC does not have + * external request lines for peripheral flow control + * - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for + * MPC512x), and 32 bytes are supported, and, consequently, source + * addresses and destination addresses must be aligned accordingly; + * furthermore, for MPC512x SoCs, the transfer size must be aligned on + * (chunk size * maxburst) */ #include <linux/module.h> @@ -213,8 +215,10 @@ struct mpc_dma_chan { /* Settings for access to peripheral FIFO */ dma_addr_t src_per_paddr; u32 src_tcd_nunits; + u8 swidth; dma_addr_t dst_per_paddr; u32 dst_tcd_nunits; + u8 dwidth; /* Lock for this structure */ spinlock_t lock; @@ -247,6 +251,7 @@ static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c) static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c); + return container_of(mchan, struct mpc_dma, channels[c->chan_id]); } @@ -254,9 +259,9 @@ static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) * Execute all queued DMA descriptors. * * Following requirements must be met while calling mpc_dma_execute(): - * a) mchan->lock is acquired, - * b) mchan->active list is empty, - * c) mchan->queued list contains at least one entry. + * a) mchan->lock is acquired, + * b) mchan->active list is empty, + * c) mchan->queued list contains at least one entry. */ static void mpc_dma_execute(struct mpc_dma_chan *mchan) { @@ -446,20 +451,15 @@ static void mpc_dma_tasklet(unsigned long data) if (es & MPC_DMA_DMAES_SAE) dev_err(mdma->dma.dev, "- Source Address Error\n"); if (es & MPC_DMA_DMAES_SOE) - dev_err(mdma->dma.dev, "- Source Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_DAE) - dev_err(mdma->dma.dev, "- Destination Address" - " Error\n"); + dev_err(mdma->dma.dev, "- Destination Address Error\n"); if (es & MPC_DMA_DMAES_DOE) - dev_err(mdma->dma.dev, "- Destination Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_NCE) - dev_err(mdma->dma.dev, "- NBytes/Citter" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n"); if (es & MPC_DMA_DMAES_SGE) - dev_err(mdma->dma.dev, "- Scatter/Gather" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n"); if (es & MPC_DMA_DMAES_SBE) dev_err(mdma->dma.dev, "- Source Bus Error\n"); if (es & MPC_DMA_DMAES_DBE) @@ -518,8 +518,8 @@ static int mpc_dma_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) { mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL); if (!mdesc) { - dev_notice(mdma->dma.dev, "Memory allocation error. " - "Allocated only %u descriptors\n", i); + dev_notice(mdma->dma.dev, + "Memory allocation error. Allocated only %u descriptors\n", i); break; } @@ -684,6 +684,15 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, return &mdesc->desc; } +inline u8 buswidth_to_dmatsize(u8 buswidth) +{ + u8 res; + + for (res = 0; buswidth > 1; buswidth /= 2) + res++; + return res; +} + static struct dma_async_tx_descriptor * mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, @@ -742,39 +751,54 @@ mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, memset(tcd, 0, sizeof(struct mpc_dma_tcd)); - if (!IS_ALIGNED(sg_dma_address(sg), 4)) - goto err_prep; - if (direction == DMA_DEV_TO_MEM) { tcd->saddr = per_paddr; tcd->daddr = sg_dma_address(sg); + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth)) + goto err_prep; + tcd->soff = 0; - tcd->doff = 4; + tcd->doff = mchan->dwidth; } else { tcd->saddr = sg_dma_address(sg); tcd->daddr = per_paddr; - tcd->soff = 4; + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth)) + goto err_prep; + + tcd->soff = mchan->swidth; tcd->doff = 0; } - tcd->ssize = MPC_DMA_TSIZE_4; - tcd->dsize = MPC_DMA_TSIZE_4; + tcd->ssize = buswidth_to_dmatsize(mchan->swidth); + tcd->dsize = buswidth_to_dmatsize(mchan->dwidth); - len = sg_dma_len(sg); - tcd->nbytes = tcd_nunits * 4; - if (!IS_ALIGNED(len, tcd->nbytes)) - goto err_prep; + if (mdma->is_mpc8308) { + tcd->nbytes = sg_dma_len(sg); + if (!IS_ALIGNED(tcd->nbytes, mchan->swidth)) + goto err_prep; - iter = len / tcd->nbytes; - if (iter >= 1 << 15) { - /* len is too big */ - goto err_prep; + /* No major loops for MPC8303 */ + tcd->biter = 1; + tcd->citer = 1; + } else { + len = sg_dma_len(sg); + tcd->nbytes = tcd_nunits * tcd->ssize; + if (!IS_ALIGNED(len, tcd->nbytes)) + goto err_prep; + + iter = len / tcd->nbytes; + if (iter >= 1 << 15) { + /* len is too big */ + goto err_prep; + } + /* citer_linkch contains the high bits of iter */ + tcd->biter = iter & 0x1ff; + tcd->biter_linkch = iter >> 9; + tcd->citer = tcd->biter; + tcd->citer_linkch = tcd->biter_linkch; } - /* citer_linkch contains the high bits of iter */ - tcd->biter = iter & 0x1ff; - tcd->biter_linkch = iter >> 9; - tcd->citer = tcd->biter; - tcd->citer_linkch = tcd->biter_linkch; tcd->e_sg = 0; tcd->d_req = 1; @@ -796,40 +820,62 @@ err_prep: return NULL; } +inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308) +{ + switch (buswidth) { + case 16: + if (is_mpc8308) + return false; + case 1: + case 2: + case 4: + case 32: + break; + default: + return false; + } + + return true; +} + static int mpc_dma_device_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); unsigned long flags; /* * Software constraints: - * - only transfers between a peripheral device and - * memory are supported; - * - only peripheral devices with 4-byte FIFO access register - * are supported; - * - minimal transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) - * boundary; - * - during the transfer RAM address is being incremented by - * the size of minimal transfer chunk; - * - peripheral port's address is constant during the transfer. + * - only transfers between a peripheral device and memory are + * supported + * - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes + * are supported, and, consequently, source addresses and + * destination addresses; must be aligned accordingly; furthermore, + * for MPC512x SoCs, the transfer size must be aligned on (chunk + * size * maxburst) + * - during the transfer, the RAM address is incremented by the size + * of transfer chunk + * - the peripheral port's address is constant during the transfer. */ - if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - !IS_ALIGNED(cfg->src_addr, 4) || - !IS_ALIGNED(cfg->dst_addr, 4)) { + if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) || + !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) { return -EINVAL; } + if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) || + !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308)) + return -EINVAL; + spin_lock_irqsave(&mchan->lock, flags); mchan->src_per_paddr = cfg->src_addr; mchan->src_tcd_nunits = cfg->src_maxburst; + mchan->swidth = cfg->src_addr_width; mchan->dst_per_paddr = cfg->dst_addr; mchan->dst_tcd_nunits = cfg->dst_maxburst; + mchan->dwidth = cfg->dst_addr_width; /* Apply defaults */ if (mchan->src_tcd_nunits == 0) @@ -875,7 +921,6 @@ static int mpc_dma_probe(struct platform_device *op) mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL); if (!mdma) { - dev_err(dev, "Memory exhausted!\n"); retval = -ENOMEM; goto err; } @@ -999,7 +1044,8 @@ static int mpc_dma_probe(struct platform_device *op) out_be32(&mdma->regs->dmaerrl, 0xFFFF); } else { out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG | - MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA); + MPC_DMA_DMACR_ERGA | + MPC_DMA_DMACR_ERCA); /* Disable hardware DMA requests */ out_be32(&mdma->regs->dmaerqh, 0); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3922a5d..25d1dad 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -31,6 +31,12 @@ #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_type { + XOR_ORION, + XOR_ARMADA_38X, + XOR_ARMADA_37XX, +}; + enum mv_xor_mode { XOR_MODE_IN_REG, XOR_MODE_IN_DESC, @@ -477,7 +483,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan_to_devp(mv_chan), - "%s src_cnt: %d len: %u dest %pad flags: %ld\n", + "%s src_cnt: %d len: %zu dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); sw_desc = mv_chan_alloc_slot(mv_chan); @@ -933,7 +939,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) + int idx, dma_cap_mask_t cap_mask, int irq) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -945,7 +951,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; - mv_chan->op_in_desc = op_in_desc; + if (xordev->xor_type == XOR_ORION) + mv_chan->op_in_desc = XOR_MODE_IN_REG; + else + mv_chan->op_in_desc = XOR_MODE_IN_DESC; dma_dev = &mv_chan->dmadev; @@ -1085,6 +1094,33 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static void +mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev) +{ + void __iomem *base = xordev->xor_high_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + /* + * For Armada3700 open default 4GB Mbus window. The dram + * related configuration are done at AXIS level. + */ + writel(0xffff0000, base + WINDOW_SIZE(0)); + win_enable |= 1; + win_enable |= 3 << 16; + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + writel(0, base + WINDOW_OVERRIDE_CTRL(0)); + writel(0, base + WINDOW_OVERRIDE_CTRL(1)); +} + /* * Since this XOR driver is basically used only for RAID5, we don't * need to care about synchronizing ->suspend with DMA activity, @@ -1129,6 +1165,11 @@ static int mv_xor_resume(struct platform_device *dev) XOR_INTR_MASK(mv_chan)); } + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + return 0; + } + dram = mv_mbus_dram_info(); if (dram) mv_xor_conf_mbus_windows(xordev, dram); @@ -1137,8 +1178,9 @@ static int mv_xor_resume(struct platform_device *dev) } static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, - { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + { .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X }, + { .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX }, {}, }; @@ -1152,7 +1194,6 @@ static int mv_xor_probe(struct platform_device *pdev) struct resource *res; unsigned int max_engines, max_channels; int i, ret; - int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1180,12 +1221,30 @@ static int mv_xor_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xordev); + + /* + * We need to know which type of XOR device we use before + * setting up. In non-dt case it can only be the legacy one. + */ + xordev->xor_type = XOR_ORION; + if (pdev->dev.of_node) { + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); + + xordev->xor_type = (uintptr_t)of_id->data; + } + /* * (Re-)program MBUS remapping windows if we are asked to. */ - dram = mv_mbus_dram_info(); - if (dram) - mv_xor_conf_mbus_windows(xordev, dram); + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + } else { + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(xordev, dram); + } /* Not all platforms can gate the clock, so it is not * an error if the clock does not exists. @@ -1199,12 +1258,16 @@ static int mv_xor_probe(struct platform_device *pdev) * order for async_tx to perform well. So we limit the number * of engines and channels so that we take into account this * constraint. Note that we also want to use channels from - * separate engines when possible. + * separate engines when possible. For dual-CPU Armada 3700 + * SoC with single XOR engine allow using its both channels. */ max_engines = num_present_cpus(); - max_channels = min_t(unsigned int, - MV_XOR_MAX_CHANNELS, - DIV_ROUND_UP(num_present_cpus(), 2)); + if (xordev->xor_type == XOR_ARMADA_37XX) + max_channels = num_present_cpus(); + else + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); if (mv_xor_engine_count >= max_engines) return 0; @@ -1212,15 +1275,11 @@ static int mv_xor_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *np; int i = 0; - const struct of_device_id *of_id = - of_match_device(mv_xor_dt_ids, - &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; - op_in_desc = (int)of_id->data; if (i >= max_channels) continue; @@ -1237,7 +1296,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq, op_in_desc); + cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1266,8 +1325,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq, - XOR_MODE_IN_REG); + cd->cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index c19fe30..bf56e08 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -85,6 +85,7 @@ struct mv_xor_device { void __iomem *xor_high_base; struct clk *clk; struct mv_xor_chan *channels[MV_XOR_MAX_CHANNELS]; + int xor_type; }; /** diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 1e1f298..faae0bf 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -240,8 +240,9 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, struct of_phandle_args dma_spec; struct of_dma *ofdma; struct dma_chan *chan; - int count, i; + int count, i, start; int ret_no_channel = -ENODEV; + static atomic_t last_index; if (!np || !name) { pr_err("%s: not enough information provided\n", __func__); @@ -259,8 +260,15 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, return ERR_PTR(-ENODEV); } + /* + * approximate an average distribution across multiple + * entries with the same name + */ + start = atomic_inc_return(&last_index); for (i = 0; i < count; i++) { - if (of_dma_match_channel(np, name, i, &dma_spec)) + if (of_dma_match_channel(np, name, + (i + start) % count, + &dma_spec)) continue; mutex_lock(&of_dma_lock); diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 77c1c44..e756a30c 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -117,6 +117,7 @@ struct pxad_chan { /* protected by vc->lock */ struct pxad_phy *phy; struct dma_pool *desc_pool; /* Descriptors pool */ + dma_cookie_t bus_error; }; struct pxad_device { @@ -563,6 +564,7 @@ static void pxad_launch_chan(struct pxad_chan *chan, return; } } + chan->bus_error = 0; /* * Program the descriptor's address into the DMA controller, @@ -666,6 +668,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) struct virt_dma_desc *vd, *tmp; unsigned int dcsr; unsigned long flags; + dma_cookie_t last_started = 0; BUG_ON(!chan); @@ -678,6 +681,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) dev_dbg(&chan->vc.chan.dev->device, "%s(): checking txd %p[%x]: completed=%d\n", __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + last_started = vd->tx.cookie; if (to_pxad_sw_desc(vd)->cyclic) { vchan_cyclic_callback(vd); break; @@ -690,7 +694,12 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) } } - if (dcsr & PXA_DCSR_STOPSTATE) { + if (dcsr & PXA_DCSR_BUSERR) { + chan->bus_error = last_started; + phy_disable(phy); + } + + if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) { dev_dbg(&chan->vc.chan.dev->device, "%s(): channel stopped, submitted_empty=%d issued_empty=%d", __func__, @@ -1249,6 +1258,9 @@ static enum dma_status pxad_tx_status(struct dma_chan *dchan, struct pxad_chan *chan = to_pxad_chan(dchan); enum dma_status ret; + if (cookie == chan->bus_error) + return DMA_ERROR; + ret = dma_cookie_status(dchan, cookie, txstate); if (likely(txstate && (ret != DMA_ERROR))) dma_set_residue(txstate, pxad_residue(chan, cookie)); @@ -1321,7 +1333,7 @@ static int pxad_init_phys(struct platform_device *op, return 0; } -static const struct of_device_id const pxad_dt_ids[] = { +static const struct of_device_id pxad_dt_ids[] = { { .compatible = "marvell,pdma-1.0", }, {} }; diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile index bfea699..4bfc38b 100644 --- a/drivers/dma/qcom/Makefile +++ b/drivers/dma/qcom/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o +obj-$(CONFIG_QCOM_HIDMA) += hdma.o +hdma-objs := hidma_ll.o hidma.o hidma_dbg.o diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5e0a9c..969b481 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -342,7 +342,7 @@ static const struct reg_offset_data bam_v1_7_reg_info[] = { #define BAM_DESC_FIFO_SIZE SZ_32K #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1) -#define BAM_MAX_DATA_SIZE (SZ_32K - 8) +#define BAM_FIFO_SIZE (SZ_32K - 8) struct bam_chan { struct virt_dma_chan vc; @@ -387,6 +387,7 @@ struct bam_device { /* execution environment ID, from DT */ u32 ee; + bool controlled_remotely; const struct reg_offset_data *layout; @@ -458,7 +459,7 @@ static void bam_chan_init_hw(struct bam_chan *bchan, */ writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)), bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR)); - writel_relaxed(BAM_DESC_FIFO_SIZE, + writel_relaxed(BAM_FIFO_SIZE, bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES)); /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */ @@ -604,7 +605,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, /* calculate number of required entries */ for_each_sg(sgl, sg, sg_len, i) - num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE); + num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE); /* allocate enough room to accomodate the number of entries */ async_desc = kzalloc(sizeof(*async_desc) + @@ -635,10 +636,10 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, desc->addr = cpu_to_le32(sg_dma_address(sg) + curr_offset); - if (remainder > BAM_MAX_DATA_SIZE) { - desc->size = cpu_to_le16(BAM_MAX_DATA_SIZE); - remainder -= BAM_MAX_DATA_SIZE; - curr_offset += BAM_MAX_DATA_SIZE; + if (remainder > BAM_FIFO_SIZE) { + desc->size = cpu_to_le16(BAM_FIFO_SIZE); + remainder -= BAM_FIFO_SIZE; + curr_offset += BAM_FIFO_SIZE; } else { desc->size = cpu_to_le16(remainder); remainder = 0; @@ -801,13 +802,17 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - if (srcs & BAM_IRQ) + if (srcs & BAM_IRQ) { clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS)); - /* don't allow reorder of the various accesses to the BAM registers */ - mb(); + /* + * don't allow reorder of the various accesses to the BAM + * registers + */ + mb(); - writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + } return IRQ_HANDLED; } @@ -1038,6 +1043,9 @@ static int bam_init(struct bam_device *bdev) val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); bdev->num_channels = val & BAM_NUM_PIPES_MASK; + if (bdev->controlled_remotely) + return 0; + /* s/w reset bam */ /* after reset all pipes are disabled and idle */ val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL)); @@ -1125,6 +1133,9 @@ static int bam_dma_probe(struct platform_device *pdev) return ret; } + bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node, + "qcom,controlled-remotely"); + bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk"); if (IS_ERR(bdev->bamclk)) return PTR_ERR(bdev->bamclk); @@ -1163,7 +1174,7 @@ static int bam_dma_probe(struct platform_device *pdev) /* set max dma segment size */ bdev->common.dev = bdev->dev; bdev->common.dev->dma_parms = &bdev->dma_parms; - ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE); + ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); if (ret) { dev_err(bdev->dev, "cannot set maximum segment size\n"); goto err_bam_channel_exit; @@ -1234,6 +1245,9 @@ static int bam_dma_remove(struct platform_device *pdev) bam_dma_terminate_all(&bdev->channels[i].vc.chan); tasklet_kill(&bdev->channels[i].vc.task); + if (!bdev->channels[i].fifo_virt) + continue; + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bdev->channels[i].fifo_virt, bdev->channels[i].fifo_phys); diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index cccc78e..41b5c6d 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -404,7 +404,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) spin_unlock_irqrestore(&mchan->lock, irqflags); /* this suspends the existing transfer */ - rc = hidma_ll_pause(dmadev->lldev); + rc = hidma_ll_disable(dmadev->lldev); if (rc) { dev_err(dmadev->ddev.dev, "channel did not pause\n"); goto out; @@ -427,7 +427,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) list_move(&mdesc->node, &mchan->free); } - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); out: pm_runtime_mark_last_busy(dmadev->ddev.dev); pm_runtime_put_autosuspend(dmadev->ddev.dev); @@ -488,7 +488,7 @@ static int hidma_pause(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (!mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - if (hidma_ll_pause(dmadev->lldev)) + if (hidma_ll_disable(dmadev->lldev)) dev_warn(dmadev->ddev.dev, "channel did not stop\n"); mchan->paused = true; pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -507,7 +507,7 @@ static int hidma_resume(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); if (!rc) mchan->paused = false; else @@ -530,6 +530,43 @@ static irqreturn_t hidma_chirq_handler(int chirq, void *arg) return hidma_ll_inthandler(chirq, lldev); } +static ssize_t hidma_show_values(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hidma_dev *mdev = platform_get_drvdata(pdev); + + buf[0] = 0; + + if (strcmp(attr->attr.name, "chid") == 0) + sprintf(buf, "%d\n", mdev->chidx); + + return strlen(buf); +} + +static int hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, + int mode) +{ + struct device_attribute *attrs; + char *name_copy; + + attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute), + GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL); + if (!name_copy) + return -ENOMEM; + + attrs->attr.name = name_copy; + attrs->attr.mode = mode; + attrs->show = hidma_show_values; + sysfs_attr_init(&attrs->attr); + + return device_create_file(dev->ddev.dev, attrs); +} + static int hidma_probe(struct platform_device *pdev) { struct hidma_dev *dmadev; @@ -644,6 +681,8 @@ static int hidma_probe(struct platform_device *pdev) dmadev->irq = chirq; tasklet_init(&dmadev->task, hidma_issue_task, (unsigned long)dmadev); + hidma_debug_init(dmadev); + hidma_create_sysfs_entry(dmadev, "chid", S_IRUGO); dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n"); platform_set_drvdata(pdev, dmadev); pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -651,6 +690,7 @@ static int hidma_probe(struct platform_device *pdev) return 0; uninit: + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); dmafree: if (dmadev) @@ -668,6 +708,7 @@ static int hidma_remove(struct platform_device *pdev) pm_runtime_get_sync(dmadev->ddev.dev); dma_async_device_unregister(&dmadev->ddev); devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev); + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); hidma_free(dmadev); @@ -689,7 +730,6 @@ static const struct of_device_id hidma_match[] = { {.compatible = "qcom,hidma-1.0",}, {}, }; - MODULE_DEVICE_TABLE(of, hidma_match); static struct platform_driver hidma_driver = { diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h index 231e306..db413a5 100644 --- a/drivers/dma/qcom/hidma.h +++ b/drivers/dma/qcom/hidma.h @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA data structures * - * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -20,32 +20,29 @@ #include <linux/interrupt.h> #include <linux/dmaengine.h> -#define TRE_SIZE 32 /* each TRE is 32 bytes */ -#define TRE_CFG_IDX 0 -#define TRE_LEN_IDX 1 -#define TRE_SRC_LOW_IDX 2 -#define TRE_SRC_HI_IDX 3 -#define TRE_DEST_LOW_IDX 4 -#define TRE_DEST_HI_IDX 5 - -struct hidma_tx_status { - u8 err_info; /* error record in this transfer */ - u8 err_code; /* completion code */ -}; +#define HIDMA_TRE_SIZE 32 /* each TRE is 32 bytes */ +#define HIDMA_TRE_CFG_IDX 0 +#define HIDMA_TRE_LEN_IDX 1 +#define HIDMA_TRE_SRC_LOW_IDX 2 +#define HIDMA_TRE_SRC_HI_IDX 3 +#define HIDMA_TRE_DEST_LOW_IDX 4 +#define HIDMA_TRE_DEST_HI_IDX 5 struct hidma_tre { atomic_t allocated; /* if this channel is allocated */ bool queued; /* flag whether this is pending */ u16 status; /* status */ - u32 chidx; /* index of the tre */ + u32 idx; /* index of the tre */ u32 dma_sig; /* signature of the tre */ const char *dev_name; /* name of the device */ void (*callback)(void *data); /* requester callback */ void *data; /* Data associated with this channel*/ struct hidma_lldev *lldev; /* lldma device pointer */ - u32 tre_local[TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ + u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ u32 tre_index; /* the offset where this was written*/ u32 int_flags; /* interrupt flags */ + u8 err_info; /* error record in this transfer */ + u8 err_code; /* completion code */ }; struct hidma_lldev { @@ -61,22 +58,21 @@ struct hidma_lldev { void __iomem *evca; /* Event Channel address */ struct hidma_tre **pending_tre_list; /* Pointers to pending TREs */ - struct hidma_tx_status - *tx_status_list; /* Pointers to pending TREs status*/ s32 pending_tre_count; /* Number of TREs pending */ void *tre_ring; /* TRE ring */ - dma_addr_t tre_ring_handle; /* TRE ring to be shared with HW */ + dma_addr_t tre_dma; /* TRE ring to be shared with HW */ u32 tre_ring_size; /* Byte size of the ring */ u32 tre_processed_off; /* last processed TRE */ void *evre_ring; /* EVRE ring */ - dma_addr_t evre_ring_handle; /* EVRE ring to be shared with HW */ + dma_addr_t evre_dma; /* EVRE ring to be shared with HW */ u32 evre_ring_size; /* Byte size of the ring */ u32 evre_processed_off; /* last processed EVRE */ u32 tre_write_offset; /* TRE write location */ struct tasklet_struct task; /* task delivering notifications */ + struct tasklet_struct rst_task; /* task to reset HW */ DECLARE_KFIFO_PTR(handoff_fifo, struct hidma_tre *); /* pending TREs FIFO */ }; @@ -145,8 +141,8 @@ enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch); bool hidma_ll_isenabled(struct hidma_lldev *llhndl); void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch); void hidma_ll_start(struct hidma_lldev *llhndl); -int hidma_ll_pause(struct hidma_lldev *llhndl); -int hidma_ll_resume(struct hidma_lldev *llhndl); +int hidma_ll_disable(struct hidma_lldev *lldev); +int hidma_ll_enable(struct hidma_lldev *llhndl); void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch, dma_addr_t src, dma_addr_t dest, u32 len, u32 flags); int hidma_ll_setup(struct hidma_lldev *lldev); @@ -157,4 +153,6 @@ int hidma_ll_uninit(struct hidma_lldev *llhndl); irqreturn_t hidma_ll_inthandler(int irq, void *arg); void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info, u8 err_code); +int hidma_debug_init(struct hidma_dev *dmadev); +void hidma_debug_uninit(struct hidma_dev *dmadev); #endif diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c new file mode 100644 index 0000000..fa827e5 --- /dev/null +++ b/drivers/dma/qcom/hidma_dbg.c @@ -0,0 +1,217 @@ +/* + * Qualcomm Technologies HIDMA debug file + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/pm_runtime.h> + +#include "hidma.h" + +static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch) +{ + struct hidma_lldev *lldev = llhndl; + struct hidma_tre *tre; + u32 length; + dma_addr_t src_start; + dma_addr_t dest_start; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch); + return; + } + tre = &lldev->trepool[tre_ch]; + seq_printf(s, "------Channel %d -----\n", tre_ch); + seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated)); + seq_printf(s, "queued = 0x%x\n", tre->queued); + seq_printf(s, "err_info = 0x%x\n", tre->err_info); + seq_printf(s, "err_code = 0x%x\n", tre->err_code); + seq_printf(s, "status = 0x%x\n", tre->status); + seq_printf(s, "idx = 0x%x\n", tre->idx); + seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig); + seq_printf(s, "dev_name=%s\n", tre->dev_name); + seq_printf(s, "callback=%p\n", tre->callback); + seq_printf(s, "data=%p\n", tre->data); + seq_printf(s, "tre_index = 0x%x\n", tre->tre_index); + + tre_local = &tre->tre_local[0]; + src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX]; + src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start; + dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX]; + dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32); + length = tre_local[HIDMA_TRE_LEN_IDX]; + + seq_printf(s, "src=%pap\n", &src_start); + seq_printf(s, "dest=%pap\n", &dest_start); + seq_printf(s, "length = 0x%x\n", length); +} + +static void hidma_ll_devstats(struct seq_file *s, void *llhndl) +{ + struct hidma_lldev *lldev = llhndl; + + seq_puts(s, "------Device -----\n"); + seq_printf(s, "lldev init = 0x%x\n", lldev->initialized); + seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state); + seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state); + seq_printf(s, "chidx = 0x%x\n", lldev->chidx); + seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres); + seq_printf(s, "trca=%p\n", lldev->trca); + seq_printf(s, "tre_ring=%p\n", lldev->tre_ring); + seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma); + seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size); + seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off); + seq_printf(s, "pending_tre_count=%d\n", lldev->pending_tre_count); + seq_printf(s, "evca=%p\n", lldev->evca); + seq_printf(s, "evre_ring=%p\n", lldev->evre_ring); + seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma); + seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size); + seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off); + seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset); +} + +/* + * hidma_chan_stats: display HIDMA channel statistics + * + * Display the statistics for the current HIDMA virtual channel device. + */ +static int hidma_chan_stats(struct seq_file *s, void *unused) +{ + struct hidma_chan *mchan = s->private; + struct hidma_desc *mdesc; + struct hidma_dev *dmadev = mchan->dmadev; + + pm_runtime_get_sync(dmadev->ddev.dev); + seq_printf(s, "paused=%u\n", mchan->paused); + seq_printf(s, "dma_sig=%u\n", mchan->dma_sig); + seq_puts(s, "prepared\n"); + list_for_each_entry(mdesc, &mchan->prepared, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "active\n"); + list_for_each_entry(mdesc, &mchan->active, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "completed\n"); + list_for_each_entry(mdesc, &mchan->completed, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + hidma_ll_devstats(s, mchan->dmadev->lldev); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return 0; +} + +/* + * hidma_dma_info: display HIDMA device info + * + * Display the info for the current HIDMA device. + */ +static int hidma_dma_info(struct seq_file *s, void *unused) +{ + struct hidma_dev *dmadev = s->private; + resource_size_t sz; + + seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors); + seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca); + seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start); + sz = resource_size(dmadev->trca_resource); + seq_printf(s, "dev_trca_size=%pa\n", &sz); + seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca); + seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start); + sz = resource_size(dmadev->evca_resource); + seq_printf(s, "dev_evca_size=%pa\n", &sz); + return 0; +} + +static int hidma_chan_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_chan_stats, inode->i_private); +} + +static int hidma_dma_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_dma_info, inode->i_private); +} + +static const struct file_operations hidma_chan_fops = { + .open = hidma_chan_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations hidma_dma_fops = { + .open = hidma_dma_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void hidma_debug_uninit(struct hidma_dev *dmadev) +{ + debugfs_remove_recursive(dmadev->debugfs); + debugfs_remove_recursive(dmadev->stats); +} + +int hidma_debug_init(struct hidma_dev *dmadev) +{ + int rc = 0; + int chidx = 0; + struct list_head *position = NULL; + + dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL); + if (!dmadev->debugfs) { + rc = -ENODEV; + return rc; + } + + /* walk through the virtual channel list */ + list_for_each(position, &dmadev->ddev.channels) { + struct hidma_chan *chan; + + chan = list_entry(position, struct hidma_chan, + chan.device_node); + sprintf(chan->dbg_name, "chan%d", chidx); + chan->debugfs = debugfs_create_dir(chan->dbg_name, + dmadev->debugfs); + if (!chan->debugfs) { + rc = -ENOMEM; + goto cleanup; + } + chan->stats = debugfs_create_file("stats", S_IRUGO, + chan->debugfs, chan, + &hidma_chan_fops); + if (!chan->stats) { + rc = -ENOMEM; + goto cleanup; + } + chidx++; + } + + dmadev->stats = debugfs_create_file("stats", S_IRUGO, + dmadev->debugfs, dmadev, + &hidma_dma_fops); + if (!dmadev->stats) { + rc = -ENOMEM; + goto cleanup; + } + + return 0; +cleanup: + hidma_debug_uninit(dmadev); + return rc; +} diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c new file mode 100644 index 0000000..f392900 --- /dev/null +++ b/drivers/dma/qcom/hidma_ll.c @@ -0,0 +1,872 @@ +/* + * Qualcomm Technologies HIDMA DMA engine low level code + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/dmaengine.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/dma-mapping.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/iopoll.h> +#include <linux/kfifo.h> +#include <linux/bitops.h> + +#include "hidma.h" + +#define HIDMA_EVRE_SIZE 16 /* each EVRE is 16 bytes */ + +#define HIDMA_TRCA_CTRLSTS_REG 0x000 +#define HIDMA_TRCA_RING_LOW_REG 0x008 +#define HIDMA_TRCA_RING_HIGH_REG 0x00C +#define HIDMA_TRCA_RING_LEN_REG 0x010 +#define HIDMA_TRCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_CTRLSTS_REG 0x000 +#define HIDMA_EVCA_INTCTRL_REG 0x004 +#define HIDMA_EVCA_RING_LOW_REG 0x008 +#define HIDMA_EVCA_RING_HIGH_REG 0x00C +#define HIDMA_EVCA_RING_LEN_REG 0x010 +#define HIDMA_EVCA_WRITE_PTR_REG 0x020 +#define HIDMA_EVCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_IRQ_STAT_REG 0x100 +#define HIDMA_EVCA_IRQ_CLR_REG 0x108 +#define HIDMA_EVCA_IRQ_EN_REG 0x110 + +#define HIDMA_EVRE_CFG_IDX 0 + +#define HIDMA_EVRE_ERRINFO_BIT_POS 24 +#define HIDMA_EVRE_CODE_BIT_POS 28 + +#define HIDMA_EVRE_ERRINFO_MASK GENMASK(3, 0) +#define HIDMA_EVRE_CODE_MASK GENMASK(3, 0) + +#define HIDMA_CH_CONTROL_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_BIT_POS 0x8 + +#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS 0 +#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS 1 +#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9 +#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS 10 +#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS 11 +#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS 14 + +#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)) + +#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size) \ +do { \ + iter += size; \ + if (iter >= ring_size) \ + iter -= ring_size; \ +} while (0) + +#define HIDMA_CH_STATE(val) \ + ((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK) + +#define HIDMA_ERR_INT_MASK \ + (BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)) + +enum ch_command { + HIDMA_CH_DISABLE = 0, + HIDMA_CH_ENABLE = 1, + HIDMA_CH_SUSPEND = 2, + HIDMA_CH_RESET = 9, +}; + +enum ch_state { + HIDMA_CH_DISABLED = 0, + HIDMA_CH_ENABLED = 1, + HIDMA_CH_RUNNING = 2, + HIDMA_CH_SUSPENDED = 3, + HIDMA_CH_STOPPED = 4, +}; + +enum tre_type { + HIDMA_TRE_MEMCPY = 3, +}; + +enum err_code { + HIDMA_EVRE_STATUS_COMPLETE = 1, + HIDMA_EVRE_STATUS_ERROR = 4, +}; + +static int hidma_is_chan_enabled(int state) +{ + switch (state) { + case HIDMA_CH_ENABLED: + case HIDMA_CH_RUNNING: + return true; + default: + return false; + } +} + +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch); + return; + } + + atomic_set(&tre->allocated, 0); +} + +int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name, + void (*callback)(void *data), void *data, u32 *tre_ch) +{ + unsigned int i; + struct hidma_tre *tre; + u32 *tre_local; + + if (!tre_ch || !lldev) + return -EINVAL; + + /* need to have at least one empty spot in the queue */ + for (i = 0; i < lldev->nr_tres - 1; i++) { + if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1)) + break; + } + + if (i == (lldev->nr_tres - 1)) + return -ENOMEM; + + tre = &lldev->trepool[i]; + tre->dma_sig = sig; + tre->dev_name = dev_name; + tre->callback = callback; + tre->data = data; + tre->idx = i; + tre->status = 0; + tre->queued = 0; + tre->err_code = 0; + tre->err_info = 0; + tre->lldev = lldev; + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY; + tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8; + tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */ + *tre_ch = i; + if (callback) + callback(data); + return 0; +} + +/* + * Multiple TREs may be queued and waiting in the pending queue. + */ +static void hidma_ll_tre_complete(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + struct hidma_tre *tre; + + while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) { + /* call the user if it has been read by the hardware */ + if (tre->callback) + tre->callback(tre->data); + } +} + +static int hidma_post_completed(struct hidma_lldev *lldev, int tre_iterator, + u8 err_info, u8 err_code) +{ + struct hidma_tre *tre; + unsigned long flags; + + spin_lock_irqsave(&lldev->lock, flags); + tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE]; + if (!tre) { + spin_unlock_irqrestore(&lldev->lock, flags); + dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n", + tre_iterator / HIDMA_TRE_SIZE); + return -EINVAL; + } + lldev->pending_tre_list[tre->tre_index] = NULL; + + /* + * Keep track of pending TREs that SW is expecting to receive + * from HW. We got one now. Decrement our counter. + */ + lldev->pending_tre_count--; + if (lldev->pending_tre_count < 0) { + dev_warn(lldev->dev, "tre count mismatch on completion"); + lldev->pending_tre_count = 0; + } + + spin_unlock_irqrestore(&lldev->lock, flags); + + tre->err_info = err_info; + tre->err_code = err_code; + tre->queued = 0; + + kfifo_put(&lldev->handoff_fifo, tre); + tasklet_schedule(&lldev->task); + + return 0; +} + +/* + * Called to handle the interrupt for the channel. + * Return a positive number if TRE or EVRE were consumed on this run. + * Return a positive number if there are pending TREs or EVREs. + * Return 0 if there is nothing to consume or no pending TREs/EVREs found. + */ +static int hidma_handle_tre_completion(struct hidma_lldev *lldev) +{ + u32 evre_ring_size = lldev->evre_ring_size; + u32 tre_ring_size = lldev->tre_ring_size; + u32 err_info, err_code, evre_write_off; + u32 tre_iterator, evre_iterator; + u32 num_completed = 0; + + evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + tre_iterator = lldev->tre_processed_off; + evre_iterator = lldev->evre_processed_off; + + if ((evre_write_off > evre_ring_size) || + (evre_write_off % HIDMA_EVRE_SIZE)) { + dev_err(lldev->dev, "HW reports invalid EVRE write offset\n"); + return 0; + } + + /* + * By the time control reaches here the number of EVREs and TREs + * may not match. Only consume the ones that hardware told us. + */ + while ((evre_iterator != evre_write_off)) { + u32 *current_evre = lldev->evre_ring + evre_iterator; + u32 cfg; + + cfg = current_evre[HIDMA_EVRE_CFG_IDX]; + err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS; + err_info &= HIDMA_EVRE_ERRINFO_MASK; + err_code = + (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK; + + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE, + evre_ring_size); + + /* + * Read the new event descriptor written by the HW. + * As we are processing the delivered events, other events + * get queued to the SW for processing. + */ + evre_write_off = + readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + num_completed++; + } + + if (num_completed) { + u32 evre_read_off = (lldev->evre_processed_off + + HIDMA_EVRE_SIZE * num_completed); + u32 tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + evre_read_off = evre_read_off % evre_ring_size; + tre_read_off = tre_read_off % tre_ring_size; + + writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG); + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; + lldev->evre_processed_off = evre_read_off; + } + + return num_completed; +} + +void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info, + u8 err_code) +{ + u32 tre_iterator; + u32 tre_ring_size = lldev->tre_ring_size; + int num_completed = 0; + u32 tre_read_off; + + tre_iterator = lldev->tre_processed_off; + while (lldev->pending_tre_count) { + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + num_completed++; + } + tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + tre_read_off = tre_read_off % tre_ring_size; + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; +} + +static int hidma_ll_reset(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not reset\n"); + return ret; + } + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_DISABLED; + lldev->evch_state = HIDMA_CH_DISABLED; + return 0; +} + +/* + * Abort all transactions and perform a reset. + */ +static void hidma_ll_abort(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + u8 err_code = HIDMA_EVRE_STATUS_ERROR; + u8 err_info = 0xFF; + int rc; + + hidma_cleanup_pending_tre(lldev, err_info, err_code); + + /* reset the channel for recovery */ + rc = hidma_ll_setup(lldev); + if (rc) { + dev_err(lldev->dev, "channel reinitialize failed after error\n"); + return; + } + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); +} + +/* + * The interrupt handler for HIDMA will try to consume as many pending + * EVRE from the event queue as possible. Each EVRE has an associated + * TRE that holds the user interface parameters. EVRE reports the + * result of the transaction. Hardware guarantees ordering between EVREs + * and TREs. We use last processed offset to figure out which TRE is + * associated with which EVRE. If two TREs are consumed by HW, the EVREs + * are in order in the event ring. + * + * This handler will do a one pass for consuming EVREs. Other EVREs may + * be delivered while we are working. It will try to consume incoming + * EVREs one more time and return. + * + * For unprocessed EVREs, hardware will trigger another interrupt until + * all the interrupt bits are cleared. + * + * Hardware guarantees that by the time interrupt is observed, all data + * transactions in flight are delivered to their respective places and + * are visible to the CPU. + * + * On demand paging for IOMMU is only supported for PCIe via PRI + * (Page Request Interface) not for HIDMA. All other hardware instances + * including HIDMA work on pinned DMA addresses. + * + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All + * IOMMU latency will be built into the data movement time. By the time + * interrupt happens, IOMMU lookups + data movement has already taken place. + * + * While the first read in a typical PCI endpoint ISR flushes all outstanding + * requests traditionally to the destination, this concept does not apply + * here for this HW. + */ +irqreturn_t hidma_ll_inthandler(int chirq, void *arg) +{ + struct hidma_lldev *lldev = arg; + u32 status; + u32 enable; + u32 cause; + + /* + * Fine tuned for this HW... + * + * This ISR has been designed for this particular hardware. Relaxed + * read and write accessors are used for performance reasons due to + * interrupt delivery guarantees. Do not copy this code blindly and + * expect that to work. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + + while (cause) { + if (cause & HIDMA_ERR_INT_MASK) { + dev_err(lldev->dev, "error 0x%x, resetting...\n", + cause); + + /* Clear out pending interrupts */ + writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + tasklet_schedule(&lldev->rst_task); + goto out; + } + + /* + * Try to consume as many EVREs as possible. + */ + hidma_handle_tre_completion(lldev); + + /* We consumed TREs or there are pending TREs or EVREs. */ + writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* + * Another interrupt might have arrived while we are + * processing this one. Read the new cause. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + } + +out: + return IRQ_HANDLED; +} + +int hidma_ll_enable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "event channel did not get enabled\n"); + return ret; + } + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not get enabled\n"); + return ret; + } + + lldev->trch_state = HIDMA_CH_ENABLED; + lldev->evch_state = HIDMA_CH_ENABLED; + + return 0; +} + +void hidma_ll_start(struct hidma_lldev *lldev) +{ + unsigned long irqflags; + + spin_lock_irqsave(&lldev->lock, irqflags); + writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG); + spin_unlock_irqrestore(&lldev->lock, irqflags); +} + +bool hidma_ll_isenabled(struct hidma_lldev *lldev) +{ + u32 val; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + + /* both channels have to be enabled before calling this function */ + if (hidma_is_chan_enabled(lldev->trch_state) && + hidma_is_chan_enabled(lldev->evch_state)) + return true; + + return false; +} + +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + unsigned long flags; + + tre = &lldev->trepool[tre_ch]; + + /* copy the TRE into its location in the TRE ring */ + spin_lock_irqsave(&lldev->lock, flags); + tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE; + lldev->pending_tre_list[tre->tre_index] = tre; + memcpy(lldev->tre_ring + lldev->tre_write_offset, + &tre->tre_local[0], HIDMA_TRE_SIZE); + tre->err_code = 0; + tre->err_info = 0; + tre->queued = 1; + lldev->pending_tre_count++; + lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE) + % lldev->tre_ring_size; + spin_unlock_irqrestore(&lldev->lock, flags); +} + +/* + * Note that even though we stop this channel if there is a pending transaction + * in flight it will complete and follow the callback. This request will + * prevent further requests to be made. + */ +int hidma_ll_disable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + + /* already suspended by this OS */ + if ((lldev->trch_state == HIDMA_CH_SUSPENDED) || + (lldev->evch_state == HIDMA_CH_SUSPENDED)) + return 0; + + /* already stopped by the manager */ + if ((lldev->trch_state == HIDMA_CH_STOPPED) || + (lldev->evch_state == HIDMA_CH_STOPPED)) + return 0; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed + * Delay up to 10ms after reset to allow DMA logic to quiesce. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_SUSPENDED; + lldev->evch_state = HIDMA_CH_SUSPENDED; + return 0; +} + +void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, + dma_addr_t src, dma_addr_t dest, u32 len, + u32 flags) +{ + struct hidma_tre *tre; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in transfer params:%d", + tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to set params on an unused TRE:%d", + tre_ch); + return; + } + + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_LEN_IDX] = len; + tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src); + tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src); + tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest); + tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest); + tre->int_flags = flags; +} + +/* + * Called during initialization and after an error condition + * to restore hardware state. + */ +int hidma_ll_setup(struct hidma_lldev *lldev) +{ + int rc; + u64 addr; + u32 val; + u32 nr_tres = lldev->nr_tres; + + lldev->pending_tre_count = 0; + lldev->tre_processed_off = 0; + lldev->evre_processed_off = 0; + lldev->tre_write_offset = 0; + + /* disable interrupts */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + /* clear all pending interrupts */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + rc = hidma_ll_reset(lldev); + if (rc) + return rc; + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* disable interrupts again after reset */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + addr = lldev->tre_dma; + writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG); + writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG); + + addr = lldev->evre_dma; + writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG); + writel(HIDMA_EVRE_SIZE * nr_tres, + lldev->evca + HIDMA_EVCA_RING_LEN_REG); + + /* support IRQ only for now */ + val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG); + val &= ~0xF; + val |= 0x1; + writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG); + + /* clear all pending interrupts and enable them */ + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + return hidma_ll_enable(lldev); +} + +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres, + void __iomem *trca, void __iomem *evca, + u8 chidx) +{ + u32 required_bytes; + struct hidma_lldev *lldev; + int rc; + size_t sz; + + if (!trca || !evca || !dev || !nr_tres) + return NULL; + + /* need at least four TREs */ + if (nr_tres < 4) + return NULL; + + /* need an extra space */ + nr_tres += 1; + + lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL); + if (!lldev) + return NULL; + + lldev->evca = evca; + lldev->trca = trca; + lldev->dev = dev; + sz = sizeof(struct hidma_tre); + lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL); + if (!lldev->trepool) + return NULL; + + required_bytes = sizeof(lldev->pending_tre_list[0]); + lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes, + GFP_KERNEL); + if (!lldev->pending_tre_list) + return NULL; + + sz = (HIDMA_TRE_SIZE + 1) * nr_tres; + lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma, + GFP_KERNEL); + if (!lldev->tre_ring) + return NULL; + + memset(lldev->tre_ring, 0, (HIDMA_TRE_SIZE + 1) * nr_tres); + lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres; + lldev->nr_tres = nr_tres; + + /* the TRE ring has to be TRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) { + u8 tre_ring_shift; + + tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE; + tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift; + lldev->tre_dma += tre_ring_shift; + lldev->tre_ring += tre_ring_shift; + } + + sz = (HIDMA_EVRE_SIZE + 1) * nr_tres; + lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma, + GFP_KERNEL); + if (!lldev->evre_ring) + return NULL; + + memset(lldev->evre_ring, 0, (HIDMA_EVRE_SIZE + 1) * nr_tres); + lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres; + + /* the EVRE ring has to be EVRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) { + u8 evre_ring_shift; + + evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE; + evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift; + lldev->evre_dma += evre_ring_shift; + lldev->evre_ring += evre_ring_shift; + } + lldev->nr_tres = nr_tres; + lldev->chidx = chidx; + + sz = nr_tres * sizeof(struct hidma_tre *); + rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL); + if (rc) + return NULL; + + rc = hidma_ll_setup(lldev); + if (rc) + return NULL; + + spin_lock_init(&lldev->lock); + tasklet_init(&lldev->rst_task, hidma_ll_abort, (unsigned long)lldev); + tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev); + lldev->initialized = 1; + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return lldev; +} + +int hidma_ll_uninit(struct hidma_lldev *lldev) +{ + u32 required_bytes; + int rc = 0; + u32 val; + + if (!lldev) + return -ENODEV; + + if (!lldev->initialized) + return 0; + + lldev->initialized = 0; + + required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres; + tasklet_kill(&lldev->task); + memset(lldev->trepool, 0, required_bytes); + lldev->trepool = NULL; + lldev->pending_tre_count = 0; + lldev->tre_write_offset = 0; + + rc = hidma_ll_reset(lldev); + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return rc; +} + +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch) +{ + enum dma_status ret = DMA_ERROR; + struct hidma_tre *tre; + unsigned long flags; + u8 err_code; + + spin_lock_irqsave(&lldev->lock, flags); + + tre = &lldev->trepool[tre_ch]; + err_code = tre->err_code; + + if (err_code & HIDMA_EVRE_STATUS_COMPLETE) + ret = DMA_COMPLETE; + else if (err_code & HIDMA_EVRE_STATUS_ERROR) + ret = DMA_ERROR; + else + ret = DMA_IN_PROGRESS; + spin_unlock_irqrestore(&lldev->lock, flags); + + return ret; +} diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index ef491b8..c0e3653 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine Management interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,13 +17,14 @@ #include <linux/acpi.h> #include <linux/of.h> #include <linux/property.h> -#include <linux/interrupt.h> -#include <linux/platform_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/bitops.h> +#include <linux/dma-mapping.h> #include "hidma_mgmt.h" @@ -298,5 +299,109 @@ static struct platform_driver hidma_mgmt_driver = { }, }; -module_platform_driver(hidma_mgmt_driver); +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) +static int object_counter; + +static int __init hidma_mgmt_of_populate_channels(struct device_node *np) +{ + struct platform_device *pdev_parent = of_find_device_by_node(np); + struct platform_device_info pdevinfo; + struct of_phandle_args out_irq; + struct device_node *child; + struct resource *res; + const __be32 *cell; + int ret = 0, size, i, num; + u64 addr, addr_size; + + for_each_available_child_of_node(np, child) { + struct resource *res_iter; + struct platform_device *new_pdev; + + cell = of_get_property(child, "reg", &size); + if (!cell) { + ret = -EINVAL; + goto out; + } + + size /= sizeof(*cell); + num = size / + (of_n_addr_cells(child) + of_n_size_cells(child)) + 1; + + /* allocate a resource array */ + res = kcalloc(num, sizeof(*res), GFP_KERNEL); + if (!res) { + ret = -ENOMEM; + goto out; + } + + /* read each reg value */ + i = 0; + res_iter = res; + while (i < size) { + addr = of_read_number(&cell[i], + of_n_addr_cells(child)); + i += of_n_addr_cells(child); + + addr_size = of_read_number(&cell[i], + of_n_size_cells(child)); + i += of_n_size_cells(child); + + res_iter->start = addr; + res_iter->end = res_iter->start + addr_size - 1; + res_iter->flags = IORESOURCE_MEM; + res_iter++; + } + + ret = of_irq_parse_one(child, 0, &out_irq); + if (ret) + goto out; + + res_iter->start = irq_create_of_mapping(&out_irq); + res_iter->name = "hidma event irq"; + res_iter->flags = IORESOURCE_IRQ; + + memset(&pdevinfo, 0, sizeof(pdevinfo)); + pdevinfo.fwnode = &child->fwnode; + pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL; + pdevinfo.name = child->name; + pdevinfo.id = object_counter++; + pdevinfo.res = res; + pdevinfo.num_res = num; + pdevinfo.data = NULL; + pdevinfo.size_data = 0; + pdevinfo.dma_mask = DMA_BIT_MASK(64); + new_pdev = platform_device_register_full(&pdevinfo); + if (!new_pdev) { + ret = -ENODEV; + goto out; + } + of_dma_configure(&new_pdev->dev, child); + + kfree(res); + res = NULL; + } +out: + kfree(res); + + return ret; +} +#endif + +static int __init hidma_mgmt_init(void) +{ +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) + struct device_node *child; + + for (child = of_find_matching_node(NULL, hidma_mgmt_match); child; + child = of_find_matching_node(child, hidma_mgmt_match)) { + /* device tree based firmware here */ + hidma_mgmt_of_populate_channels(child); + of_node_put(child); + } +#endif + platform_driver_register(&hidma_mgmt_driver); + + return 0; +} +module_init(hidma_mgmt_init); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 2db12e4..5065ca4 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -146,6 +146,8 @@ struct sun6i_vchan { struct dma_slave_config cfg; struct sun6i_pchan *phy; u8 port; + u8 irq_type; + bool cyclic; }; struct sun6i_dma_dev { @@ -254,6 +256,30 @@ static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) return addr_width >> 1; } +static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan) +{ + struct sun6i_desc *txd = pchan->desc; + struct sun6i_dma_lli *lli; + size_t bytes; + dma_addr_t pos; + + pos = readl(pchan->base + DMA_CHAN_LLI_ADDR); + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + + if (pos == LLI_LAST_ITEM) + return bytes; + + for (lli = txd->v_lli; lli; lli = lli->v_lli_next) { + if (lli->p_lli_next == pos) { + for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next) + bytes += lli->len; + break; + } + } + + return bytes; +} + static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, struct sun6i_dma_lli *next, dma_addr_t next_phy, @@ -276,45 +302,6 @@ static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, return next; } -static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, - dma_addr_t src, - dma_addr_t dst, u32 len, - struct dma_slave_config *config) -{ - u8 src_width, dst_width, src_burst, dst_burst; - - if (!config) - return -EINVAL; - - src_burst = convert_burst(config->src_maxburst); - if (src_burst) - return src_burst; - - dst_burst = convert_burst(config->dst_maxburst); - if (dst_burst) - return dst_burst; - - src_width = convert_buswidth(config->src_addr_width); - if (src_width) - return src_width; - - dst_width = convert_buswidth(config->dst_addr_width); - if (dst_width) - return dst_width; - - lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | - DMA_CHAN_CFG_SRC_WIDTH(src_width) | - DMA_CHAN_CFG_DST_BURST(dst_burst) | - DMA_CHAN_CFG_DST_WIDTH(dst_width); - - lli->src = src; - lli->dst = dst; - lli->len = len; - lli->para = NORMAL_WAIT; - - return 0; -} - static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, struct sun6i_dma_lli *lli) { @@ -381,9 +368,13 @@ static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; - irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset)); - irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH); - writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset)); + vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg)); + irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) << + (irq_offset * DMA_IRQ_CHAN_WIDTH)); + irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg)); writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); @@ -479,11 +470,12 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) writel(status, sdev->base + DMA_IRQ_STAT(i)); for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) { - if (status & DMA_IRQ_QUEUE) { - pchan = sdev->pchans + j; - vchan = pchan->vchan; - - if (vchan) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + if (vchan && (status & vchan->irq_type)) { + if (vchan->cyclic) { + vchan_cyclic_callback(&pchan->desc->vd); + } else { spin_lock(&vchan->vc.lock); vchan_cookie_complete(&pchan->desc->vd); pchan->done = pchan->desc; @@ -502,6 +494,55 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) return ret; } +static int set_config(struct sun6i_dma_dev *sdev, + struct dma_slave_config *sconfig, + enum dma_transfer_direction direction, + u32 *p_cfg) +{ + s8 src_width, dst_width, src_burst, dst_burst; + + switch (direction) { + case DMA_MEM_TO_DEV: + src_burst = convert_burst(sconfig->src_maxburst ? + sconfig->src_maxburst : 8); + src_width = convert_buswidth(sconfig->src_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->src_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + dst_burst = convert_burst(sconfig->dst_maxburst); + dst_width = convert_buswidth(sconfig->dst_addr_width); + break; + case DMA_DEV_TO_MEM: + src_burst = convert_burst(sconfig->src_maxburst); + src_width = convert_buswidth(sconfig->src_addr_width); + dst_burst = convert_burst(sconfig->dst_maxburst ? + sconfig->dst_maxburst : 8); + dst_width = convert_buswidth(sconfig->dst_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->dst_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + break; + default: + return -EINVAL; + } + + if (src_burst < 0) + return src_burst; + if (src_width < 0) + return src_width; + if (dst_burst < 0) + return dst_burst; + if (dst_width < 0) + return dst_width; + + *p_cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | + DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_BURST(dst_burst) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + return 0; +} + static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -569,13 +610,15 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( struct sun6i_desc *txd; struct scatterlist *sg; dma_addr_t p_lli; + u32 lli_cfg; int i, ret; if (!sgl) return NULL; - if (!is_slave_direction(dir)) { - dev_err(chan2dev(chan), "Invalid DMA direction\n"); + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); return NULL; } @@ -588,14 +631,14 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( if (!v_lli) goto err_lli_free; - if (dir == DMA_MEM_TO_DEV) { - ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg), - sconfig->dst_addr, sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; + v_lli->len = sg_dma_len(sg); + v_lli->para = NORMAL_WAIT; - v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE | + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = sg_dma_address(sg); + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(vchan->port); @@ -607,13 +650,10 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( sg_dma_len(sg), flags); } else { - ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr, - sg_dma_address(sg), sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; - - v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE | + v_lli->src = sconfig->src_addr; + v_lli->dst = sg_dma_address(sg); + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_IO_MODE | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_SRC_DRQ(vchan->port); @@ -634,8 +674,78 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( return vchan_tx_prep(&vchan->vc, &txd->vd, flags); -err_cur_lli_free: - dma_pool_free(sdev->pool, v_lli, p_lli); +err_lli_free: + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic( + struct dma_chan *chan, + dma_addr_t buf_addr, + size_t buf_len, + size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + dma_addr_t p_lli; + u32 lli_cfg; + unsigned int i, periods = buf_len / period_len; + int ret; + + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for (i = 0; i < periods; i++) { + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_lli_free; + } + + v_lli->len = period_len; + v_lli->para = NORMAL_WAIT; + + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = buf_addr + period_len * i; + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(vchan->port); + } else { + v_lli->src = sconfig->src_addr; + v_lli->dst = buf_addr + period_len * i; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_IO_MODE | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_SRC_DRQ(vchan->port); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + prev->p_lli_next = txd->p_lli; /* cyclic list */ + + vchan->cyclic = true; + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + err_lli_free: for (prev = txd->v_lli; prev; prev = prev->v_lli_next) dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); @@ -712,6 +822,16 @@ static int sun6i_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&vchan->vc.lock, flags); + if (vchan->cyclic) { + vchan->cyclic = false; + if (pchan && pchan->desc) { + struct virt_dma_desc *vd = &pchan->desc->vd; + struct virt_dma_chan *vc = &vchan->vc; + + list_add_tail(&vd->node, &vc->desc_completed); + } + } + vchan_get_all_descriptors(&vchan->vc, &head); if (pchan) { @@ -759,7 +879,7 @@ static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, } else if (!pchan || !pchan->desc) { bytes = 0; } else { - bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + bytes = sun6i_get_chan_size(pchan); } spin_unlock_irqrestore(&vchan->vc.lock, flags); @@ -963,6 +1083,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask); INIT_LIST_HEAD(&sdc->slave.channels); sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; @@ -970,6 +1091,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_prep_dma_cyclic = sun6i_dma_prep_dma_cyclic; sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 3871f29..01e316f 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -54,6 +54,7 @@ #define TEGRA_APBDMA_CSR_ONCE BIT(27) #define TEGRA_APBDMA_CSR_FLOW BIT(21) #define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT 16 +#define TEGRA_APBDMA_CSR_REQ_SEL_MASK 0x1F #define TEGRA_APBDMA_CSR_WCOUNT_MASK 0xFFFC /* STATUS register */ @@ -114,6 +115,8 @@ /* Channel base address offset from APBDMA base address */ #define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET 0x1000 +#define TEGRA_APBDMA_SLAVE_ID_INVALID (TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1) + struct tegra_dma; /* @@ -353,8 +356,11 @@ static int tegra_dma_slave_config(struct dma_chan *dc, } memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig)); - if (!tdc->slave_id) + if (tdc->slave_id == TEGRA_APBDMA_SLAVE_ID_INVALID) { + if (sconfig->slave_id > TEGRA_APBDMA_CSR_REQ_SEL_MASK) + return -EINVAL; tdc->slave_id = sconfig->slave_id; + } tdc->config_init = true; return 0; } @@ -1236,7 +1242,7 @@ static void tegra_dma_free_chan_resources(struct dma_chan *dc) } pm_runtime_put(tdma->dev); - tdc->slave_id = 0; + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; } static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -1246,6 +1252,11 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, struct dma_chan *chan; struct tegra_dma_channel *tdc; + if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) { + dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]); + return NULL; + } + chan = dma_get_any_slave_channel(&tdma->dma_dev); if (!chan) return NULL; @@ -1389,6 +1400,7 @@ static int tegra_dma_probe(struct platform_device *pdev) &tdma->dma_dev.channels); tdc->tdma = tdma; tdc->id = i; + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; tasklet_init(&tdc->tasklet, tegra_dma_tasklet, (unsigned long)tdc); diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c new file mode 100644 index 0000000..c4b121c --- /dev/null +++ b/drivers/dma/tegra210-adma.c @@ -0,0 +1,840 @@ +/* + * ADMA driver for Nvidia's Tegra210 ADMA controller. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/of_irq.h> +#include <linux/pm_clock.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> + +#include "virt-dma.h" + +#define ADMA_CH_CMD 0x00 +#define ADMA_CH_STATUS 0x0c +#define ADMA_CH_STATUS_XFER_EN BIT(0) + +#define ADMA_CH_INT_STATUS 0x10 +#define ADMA_CH_INT_STATUS_XFER_DONE BIT(0) + +#define ADMA_CH_INT_CLEAR 0x1c +#define ADMA_CH_CTRL 0x24 +#define ADMA_CH_CTRL_TX_REQ(val) (((val) & 0xf) << 28) +#define ADMA_CH_CTRL_TX_REQ_MAX 10 +#define ADMA_CH_CTRL_RX_REQ(val) (((val) & 0xf) << 24) +#define ADMA_CH_CTRL_RX_REQ_MAX 10 +#define ADMA_CH_CTRL_DIR(val) (((val) & 0xf) << 12) +#define ADMA_CH_CTRL_DIR_AHUB2MEM 2 +#define ADMA_CH_CTRL_DIR_MEM2AHUB 4 +#define ADMA_CH_CTRL_MODE_CONTINUOUS (2 << 8) +#define ADMA_CH_CTRL_FLOWCTRL_EN BIT(1) + +#define ADMA_CH_CONFIG 0x28 +#define ADMA_CH_CONFIG_SRC_BUF(val) (((val) & 0x7) << 28) +#define ADMA_CH_CONFIG_TRG_BUF(val) (((val) & 0x7) << 24) +#define ADMA_CH_CONFIG_BURST_SIZE(val) (((val) & 0x7) << 20) +#define ADMA_CH_CONFIG_BURST_16 5 +#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf) +#define ADMA_CH_CONFIG_MAX_BUFS 8 + +#define ADMA_CH_FIFO_CTRL 0x2c +#define ADMA_CH_FIFO_CTRL_OVRFW_THRES(val) (((val) & 0xf) << 24) +#define ADMA_CH_FIFO_CTRL_STARV_THRES(val) (((val) & 0xf) << 16) +#define ADMA_CH_FIFO_CTRL_TX_SIZE(val) (((val) & 0xf) << 8) +#define ADMA_CH_FIFO_CTRL_RX_SIZE(val) ((val) & 0xf) + +#define ADMA_CH_LOWER_SRC_ADDR 0x34 +#define ADMA_CH_LOWER_TRG_ADDR 0x3c +#define ADMA_CH_TC 0x44 +#define ADMA_CH_TC_COUNT_MASK 0x3ffffffc + +#define ADMA_CH_XFER_STATUS 0x54 +#define ADMA_CH_XFER_STATUS_COUNT_MASK 0xffff + +#define ADMA_GLOBAL_CMD 0xc00 +#define ADMA_GLOBAL_SOFT_RESET 0xc04 +#define ADMA_GLOBAL_INT_CLEAR 0xc20 +#define ADMA_GLOBAL_CTRL 0xc24 + +#define ADMA_CH_REG_OFFSET(a) (a * 0x80) + +#define ADMA_CH_FIFO_CTRL_DEFAULT (ADMA_CH_FIFO_CTRL_OVRFW_THRES(1) | \ + ADMA_CH_FIFO_CTRL_STARV_THRES(1) | \ + ADMA_CH_FIFO_CTRL_TX_SIZE(3) | \ + ADMA_CH_FIFO_CTRL_RX_SIZE(3)) +struct tegra_adma; + +/* + * struct tegra_adma_chip_data - Tegra chip specific data + * @nr_channels: Number of DMA channels available. + */ +struct tegra_adma_chip_data { + int nr_channels; +}; + +/* + * struct tegra_adma_chan_regs - Tegra ADMA channel registers + */ +struct tegra_adma_chan_regs { + unsigned int ctrl; + unsigned int config; + unsigned int src_addr; + unsigned int trg_addr; + unsigned int fifo_ctrl; + unsigned int tc; +}; + +/* + * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests. + */ +struct tegra_adma_desc { + struct virt_dma_desc vd; + struct tegra_adma_chan_regs ch_regs; + size_t buf_len; + size_t period_len; + size_t num_periods; +}; + +/* + * struct tegra_adma_chan - Tegra ADMA channel information + */ +struct tegra_adma_chan { + struct virt_dma_chan vc; + struct tegra_adma_desc *desc; + struct tegra_adma *tdma; + int irq; + void __iomem *chan_addr; + + /* Slave channel configuration info */ + struct dma_slave_config sconfig; + enum dma_transfer_direction sreq_dir; + unsigned int sreq_index; + bool sreq_reserved; + + /* Transfer count and position info */ + unsigned int tx_buf_count; + unsigned int tx_buf_pos; +}; + +/* + * struct tegra_adma - Tegra ADMA controller information + */ +struct tegra_adma { + struct dma_device dma_dev; + struct device *dev; + void __iomem *base_addr; + unsigned int nr_channels; + unsigned long rx_requests_reserved; + unsigned long tx_requests_reserved; + + /* Used to store global command register state when suspending */ + unsigned int global_cmd; + + /* Last member of the structure */ + struct tegra_adma_chan channels[0]; +}; + +static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val) +{ + writel(val, tdma->base_addr + reg); +} + +static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg) +{ + return readl(tdma->base_addr + reg); +} + +static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val) +{ + writel(val, tdc->chan_addr + reg); +} + +static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg) +{ + return readl(tdc->chan_addr + reg); +} + +static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc) +{ + return container_of(dc, struct tegra_adma_chan, vc.chan); +} + +static inline struct tegra_adma_desc *to_tegra_adma_desc( + struct dma_async_tx_descriptor *td) +{ + return container_of(td, struct tegra_adma_desc, vd.tx); +} + +static inline struct device *tdc2dev(struct tegra_adma_chan *tdc) +{ + return tdc->tdma->dev; +} + +static void tegra_adma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct tegra_adma_desc, vd)); +} + +static int tegra_adma_slave_config(struct dma_chan *dc, + struct dma_slave_config *sconfig) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig)); + + return 0; +} + +static int tegra_adma_init(struct tegra_adma *tdma) +{ + u32 status; + int ret; + + /* Clear any interrupts */ + tdma_write(tdma, ADMA_GLOBAL_INT_CLEAR, 0x1); + + /* Assert soft reset */ + tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); + + /* Wait for reset to clear */ + ret = readx_poll_timeout(readl, + tdma->base_addr + ADMA_GLOBAL_SOFT_RESET, + status, status == 0, 20, 10000); + if (ret) + return ret; + + /* Enable global ADMA registers */ + tdma_write(tdma, ADMA_GLOBAL_CMD, 1); + + return 0; +} + +static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc, + enum dma_transfer_direction direction) +{ + struct tegra_adma *tdma = tdc->tdma; + unsigned int sreq_index = tdc->sreq_index; + + if (tdc->sreq_reserved) + return tdc->sreq_dir == direction ? 0 : -EINVAL; + + switch (direction) { + case DMA_MEM_TO_DEV: + if (sreq_index > ADMA_CH_CTRL_TX_REQ_MAX) { + dev_err(tdma->dev, "invalid DMA request\n"); + return -EINVAL; + } + + if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + case DMA_DEV_TO_MEM: + if (sreq_index > ADMA_CH_CTRL_RX_REQ_MAX) { + dev_err(tdma->dev, "invalid DMA request\n"); + return -EINVAL; + } + + if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return -EINVAL; + } + + tdc->sreq_dir = direction; + tdc->sreq_reserved = true; + + return 0; +} + +static void tegra_adma_request_free(struct tegra_adma_chan *tdc) +{ + struct tegra_adma *tdma = tdc->tdma; + + if (!tdc->sreq_reserved) + return; + + switch (tdc->sreq_dir) { + case DMA_MEM_TO_DEV: + clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved); + break; + + case DMA_DEV_TO_MEM: + clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved); + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return; + } + + tdc->sreq_reserved = false; +} + +static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc) +{ + u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS); + + return status & ADMA_CH_INT_STATUS_XFER_DONE; +} + +static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc) +{ + u32 status = tegra_adma_irq_status(tdc); + + if (status) + tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status); + + return status; +} + +static void tegra_adma_stop(struct tegra_adma_chan *tdc) +{ + unsigned int status; + + /* Disable ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 0); + + /* Clear interrupt status */ + tegra_adma_irq_clear(tdc); + + if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS, + status, !(status & ADMA_CH_STATUS_XFER_EN), + 20, 10000)) { + dev_err(tdc2dev(tdc), "unable to stop DMA channel\n"); + return; + } + + kfree(tdc->desc); + tdc->desc = NULL; +} + +static void tegra_adma_start(struct tegra_adma_chan *tdc) +{ + struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc); + struct tegra_adma_chan_regs *ch_regs; + struct tegra_adma_desc *desc; + + if (!vd) + return; + + list_del(&vd->node); + + desc = to_tegra_adma_desc(&vd->tx); + + if (!desc) { + dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n"); + return; + } + + ch_regs = &desc->ch_regs; + + tdc->tx_buf_pos = 0; + tdc->tx_buf_count = 0; + tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config); + + /* Start ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 1); + + tdc->desc = desc; +} + +static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc) +{ + struct tegra_adma_desc *desc = tdc->desc; + unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1; + unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS); + unsigned int periods_remaining; + + /* + * Handle wrap around of buffer count register + */ + if (pos < tdc->tx_buf_pos) + tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos); + else + tdc->tx_buf_count += pos - tdc->tx_buf_pos; + + periods_remaining = tdc->tx_buf_count % desc->num_periods; + tdc->tx_buf_pos = pos; + + return desc->buf_len - (periods_remaining * desc->period_len); +} + +static irqreturn_t tegra_adma_isr(int irq, void *dev_id) +{ + struct tegra_adma_chan *tdc = dev_id; + unsigned long status; + unsigned long flags; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + status = tegra_adma_irq_clear(tdc); + if (status == 0 || !tdc->desc) { + spin_unlock_irqrestore(&tdc->vc.lock, flags); + return IRQ_NONE; + } + + vchan_cyclic_callback(&tdc->desc->vd); + + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + return IRQ_HANDLED; +} + +static void tegra_adma_issue_pending(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (vchan_issue_pending(&tdc->vc)) { + if (!tdc->desc) + tegra_adma_start(tdc); + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); +} + +static int tegra_adma_terminate_all(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (tdc->desc) + tegra_adma_stop(tdc); + + tegra_adma_request_free(tdc); + vchan_get_all_descriptors(&tdc->vc, &head); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + vchan_dma_desc_free_list(&tdc->vc, &head); + + return 0; +} + +static enum dma_status tegra_adma_tx_status(struct dma_chan *dc, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + unsigned int residual; + + ret = dma_cookie_status(dc, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + vd = vchan_find_desc(&tdc->vc, cookie); + if (vd) { + desc = to_tegra_adma_desc(&vd->tx); + residual = desc->ch_regs.tc; + } else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) { + residual = tegra_adma_get_residue(tdc); + } else { + residual = 0; + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + dma_set_residue(txstate, residual); + + return ret; +} + +static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc, + struct tegra_adma_desc *desc, + dma_addr_t buf_addr, + enum dma_transfer_direction direction) +{ + struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs; + unsigned int burst_size, adma_dir; + + if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS) + return -EINVAL; + + switch (direction) { + case DMA_MEM_TO_DEV: + adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB; + burst_size = fls(tdc->sconfig.dst_maxburst); + ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_CTRL_TX_REQ(tdc->sreq_index); + ch_regs->src_addr = buf_addr; + break; + + case DMA_DEV_TO_MEM: + adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM; + burst_size = fls(tdc->sconfig.src_maxburst); + ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_CTRL_RX_REQ(tdc->sreq_index); + ch_regs->trg_addr = buf_addr; + break; + + default: + dev_err(tdc2dev(tdc), "DMA direction is not supported\n"); + return -EINVAL; + } + + if (!burst_size || burst_size > ADMA_CH_CONFIG_BURST_16) + burst_size = ADMA_CH_CONFIG_BURST_16; + + ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) | + ADMA_CH_CTRL_MODE_CONTINUOUS | + ADMA_CH_CTRL_FLOWCTRL_EN; + ch_regs->config |= ADMA_CH_CONFIG_BURST_SIZE(burst_size); + ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1); + ch_regs->fifo_ctrl = ADMA_CH_FIFO_CTRL_DEFAULT; + ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK; + + return tegra_adma_request_alloc(tdc, direction); +} + +static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic( + struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc = NULL; + + if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) { + dev_err(tdc2dev(tdc), "invalid buffer/period len\n"); + return NULL; + } + + if (buf_len % period_len) { + dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n"); + return NULL; + } + + if (!IS_ALIGNED(buf_addr, 4)) { + dev_err(tdc2dev(tdc), "invalid buffer alignment\n"); + return NULL; + } + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->buf_len = buf_len; + desc->period_len = period_len; + desc->num_periods = buf_len / period_len; + + if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) { + kfree(desc); + return NULL; + } + + return vchan_tx_prep(&tdc->vc, &desc->vd, flags); +} + +static int tegra_adma_alloc_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + int ret; + + ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc); + if (ret) { + dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n", + dma_chan_name(dc)); + return ret; + } + + ret = pm_runtime_get_sync(tdc2dev(tdc)); + if (ret < 0) { + free_irq(tdc->irq, tdc); + return ret; + } + + dma_cookie_init(&tdc->vc.chan); + + return 0; +} + +static void tegra_adma_free_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + tegra_adma_terminate_all(dc); + vchan_free_chan_resources(&tdc->vc); + tasklet_kill(&tdc->vc.task); + free_irq(tdc->irq, tdc); + pm_runtime_put(tdc2dev(tdc)); + + tdc->sreq_index = 0; + tdc->sreq_dir = DMA_TRANS_NONE; +} + +static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct tegra_adma *tdma = ofdma->of_dma_data; + struct tegra_adma_chan *tdc; + struct dma_chan *chan; + unsigned int sreq_index; + + if (dma_spec->args_count != 1) + return NULL; + + sreq_index = dma_spec->args[0]; + + if (sreq_index == 0) { + dev_err(tdma->dev, "DMA request must not be 0\n"); + return NULL; + } + + chan = dma_get_any_slave_channel(&tdma->dma_dev); + if (!chan) + return NULL; + + tdc = to_tegra_adma_chan(chan); + tdc->sreq_index = sreq_index; + + return chan; +} + +static int tegra_adma_runtime_suspend(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + + tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + + return pm_clk_suspend(dev); +} + +static int tegra_adma_runtime_resume(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + int ret; + + ret = pm_clk_resume(dev); + if (ret) + return ret; + + tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + + return 0; +} + +static const struct tegra_adma_chip_data tegra210_chip_data = { + .nr_channels = 22, +}; + +static const struct of_device_id tegra_adma_of_match[] = { + { .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_adma_of_match); + +static int tegra_adma_probe(struct platform_device *pdev) +{ + const struct tegra_adma_chip_data *cdata; + struct tegra_adma *tdma; + struct resource *res; + struct clk *clk; + int ret, i; + + cdata = of_device_get_match_data(&pdev->dev); + if (!cdata) { + dev_err(&pdev->dev, "device match data not found\n"); + return -ENODEV; + } + + tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels * + sizeof(struct tegra_adma_chan), GFP_KERNEL); + if (!tdma) + return -ENOMEM; + + tdma->dev = &pdev->dev; + tdma->nr_channels = cdata->nr_channels; + platform_set_drvdata(pdev, tdma); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tdma->base_addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + + ret = pm_clk_create(&pdev->dev); + if (ret) + return ret; + + clk = clk_get(&pdev->dev, "d_audio"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "ADMA clock not found\n"); + ret = PTR_ERR(clk); + goto clk_destroy; + } + + ret = pm_clk_add_clk(&pdev->dev, clk); + if (ret) { + clk_put(clk); + goto clk_destroy; + } + + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + goto rpm_disable; + + ret = tegra_adma_init(tdma); + if (ret) + goto rpm_put; + + INIT_LIST_HEAD(&tdma->dma_dev.channels); + for (i = 0; i < tdma->nr_channels; i++) { + struct tegra_adma_chan *tdc = &tdma->channels[i]; + + tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); + + tdc->irq = of_irq_get(pdev->dev.of_node, i); + if (tdc->irq < 0) { + ret = tdc->irq; + goto irq_dispose; + } + + vchan_init(&tdc->vc, &tdma->dma_dev); + tdc->vc.desc_free = tegra_adma_desc_free; + tdc->tdma = tdma; + } + + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + + tdma->dma_dev.dev = &pdev->dev; + tdma->dma_dev.device_alloc_chan_resources = + tegra_adma_alloc_chan_resources; + tdma->dma_dev.device_free_chan_resources = + tegra_adma_free_chan_resources; + tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending; + tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic; + tdma->dma_dev.device_config = tegra_adma_slave_config; + tdma->dma_dev.device_tx_status = tegra_adma_tx_status; + tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all; + tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + ret = dma_async_device_register(&tdma->dma_dev); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret); + goto irq_dispose; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + tegra_dma_of_xlate, tdma); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret); + goto dma_remove; + } + + pm_runtime_put(&pdev->dev); + + dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n", + tdma->nr_channels); + + return 0; + +dma_remove: + dma_async_device_unregister(&tdma->dma_dev); +irq_dispose: + while (--i >= 0) + irq_dispose_mapping(tdma->channels[i].irq); +rpm_put: + pm_runtime_put_sync(&pdev->dev); +rpm_disable: + pm_runtime_disable(&pdev->dev); +clk_destroy: + pm_clk_destroy(&pdev->dev); + + return ret; +} + +static int tegra_adma_remove(struct platform_device *pdev) +{ + struct tegra_adma *tdma = platform_get_drvdata(pdev); + int i; + + dma_async_device_unregister(&tdma->dma_dev); + + for (i = 0; i < tdma->nr_channels; ++i) + irq_dispose_mapping(tdma->channels[i].irq); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_clk_destroy(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int tegra_adma_pm_suspend(struct device *dev) +{ + return pm_runtime_suspended(dev) == false; +} +#endif + +static const struct dev_pm_ops tegra_adma_dev_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend, + tegra_adma_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_adma_pm_suspend, NULL) +}; + +static struct platform_driver tegra_admac_driver = { + .driver = { + .name = "tegra-adma", + .pm = &tegra_adma_dev_pm_ops, + .of_match_table = tegra_adma_of_match, + }, + .probe = tegra_adma_probe, + .remove = tegra_adma_remove, +}; + +module_platform_driver(tegra_admac_driver); + +MODULE_ALIAS("platform:tegra210-adma"); +MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver"); +MODULE_AUTHOR("Dara Ramesh <dramesh@nvidia.com>"); +MODULE_AUTHOR("Jon Hunter <jonathanh@nvidia.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c index ef67f27..df91185 100644 --- a/drivers/dma/xilinx/xilinx_vdma.c +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -16,6 +16,15 @@ * video device (S2MM). Initialization, status, interrupt and management * registers are accessed through an AXI4-Lite slave interface. * + * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that + * provides high-bandwidth one dimensional direct memory access between memory + * and AXI4-Stream target peripherals. It supports one receive and one + * transmit channel, both of them optional at synthesis time. + * + * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory + * Access (DMA) between a memory-mapped source address and a memory-mapped + * destination address. + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or @@ -35,116 +44,138 @@ #include <linux/of_platform.h> #include <linux/of_irq.h> #include <linux/slab.h> +#include <linux/clk.h> #include "../dmaengine.h" /* Register/Descriptor Offsets */ -#define XILINX_VDMA_MM2S_CTRL_OFFSET 0x0000 -#define XILINX_VDMA_S2MM_CTRL_OFFSET 0x0030 +#define XILINX_DMA_MM2S_CTRL_OFFSET 0x0000 +#define XILINX_DMA_S2MM_CTRL_OFFSET 0x0030 #define XILINX_VDMA_MM2S_DESC_OFFSET 0x0050 #define XILINX_VDMA_S2MM_DESC_OFFSET 0x00a0 /* Control Registers */ -#define XILINX_VDMA_REG_DMACR 0x0000 -#define XILINX_VDMA_DMACR_DELAY_MAX 0xff -#define XILINX_VDMA_DMACR_DELAY_SHIFT 24 -#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX 0xff -#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT 16 -#define XILINX_VDMA_DMACR_ERR_IRQ BIT(14) -#define XILINX_VDMA_DMACR_DLY_CNT_IRQ BIT(13) -#define XILINX_VDMA_DMACR_FRM_CNT_IRQ BIT(12) -#define XILINX_VDMA_DMACR_MASTER_SHIFT 8 -#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT 5 -#define XILINX_VDMA_DMACR_FRAMECNT_EN BIT(4) -#define XILINX_VDMA_DMACR_GENLOCK_EN BIT(3) -#define XILINX_VDMA_DMACR_RESET BIT(2) -#define XILINX_VDMA_DMACR_CIRC_EN BIT(1) -#define XILINX_VDMA_DMACR_RUNSTOP BIT(0) -#define XILINX_VDMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) - -#define XILINX_VDMA_REG_DMASR 0x0004 -#define XILINX_VDMA_DMASR_EOL_LATE_ERR BIT(15) -#define XILINX_VDMA_DMASR_ERR_IRQ BIT(14) -#define XILINX_VDMA_DMASR_DLY_CNT_IRQ BIT(13) -#define XILINX_VDMA_DMASR_FRM_CNT_IRQ BIT(12) -#define XILINX_VDMA_DMASR_SOF_LATE_ERR BIT(11) -#define XILINX_VDMA_DMASR_SG_DEC_ERR BIT(10) -#define XILINX_VDMA_DMASR_SG_SLV_ERR BIT(9) -#define XILINX_VDMA_DMASR_EOF_EARLY_ERR BIT(8) -#define XILINX_VDMA_DMASR_SOF_EARLY_ERR BIT(7) -#define XILINX_VDMA_DMASR_DMA_DEC_ERR BIT(6) -#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR BIT(5) -#define XILINX_VDMA_DMASR_DMA_INT_ERR BIT(4) -#define XILINX_VDMA_DMASR_IDLE BIT(1) -#define XILINX_VDMA_DMASR_HALTED BIT(0) -#define XILINX_VDMA_DMASR_DELAY_MASK GENMASK(31, 24) -#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) - -#define XILINX_VDMA_REG_CURDESC 0x0008 -#define XILINX_VDMA_REG_TAILDESC 0x0010 -#define XILINX_VDMA_REG_REG_INDEX 0x0014 -#define XILINX_VDMA_REG_FRMSTORE 0x0018 -#define XILINX_VDMA_REG_THRESHOLD 0x001c -#define XILINX_VDMA_REG_FRMPTR_STS 0x0024 -#define XILINX_VDMA_REG_PARK_PTR 0x0028 -#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT 8 -#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT 0 -#define XILINX_VDMA_REG_VDMA_VERSION 0x002c +#define XILINX_DMA_REG_DMACR 0x0000 +#define XILINX_DMA_DMACR_DELAY_MAX 0xff +#define XILINX_DMA_DMACR_DELAY_SHIFT 24 +#define XILINX_DMA_DMACR_FRAME_COUNT_MAX 0xff +#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT 16 +#define XILINX_DMA_DMACR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMACR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMACR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMACR_MASTER_SHIFT 8 +#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT 5 +#define XILINX_DMA_DMACR_FRAMECNT_EN BIT(4) +#define XILINX_DMA_DMACR_GENLOCK_EN BIT(3) +#define XILINX_DMA_DMACR_RESET BIT(2) +#define XILINX_DMA_DMACR_CIRC_EN BIT(1) +#define XILINX_DMA_DMACR_RUNSTOP BIT(0) +#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) + +#define XILINX_DMA_REG_DMASR 0x0004 +#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) +#define XILINX_DMA_DMASR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMASR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMASR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMASR_SOF_LATE_ERR BIT(11) +#define XILINX_DMA_DMASR_SG_DEC_ERR BIT(10) +#define XILINX_DMA_DMASR_SG_SLV_ERR BIT(9) +#define XILINX_DMA_DMASR_EOF_EARLY_ERR BIT(8) +#define XILINX_DMA_DMASR_SOF_EARLY_ERR BIT(7) +#define XILINX_DMA_DMASR_DMA_DEC_ERR BIT(6) +#define XILINX_DMA_DMASR_DMA_SLAVE_ERR BIT(5) +#define XILINX_DMA_DMASR_DMA_INT_ERR BIT(4) +#define XILINX_DMA_DMASR_IDLE BIT(1) +#define XILINX_DMA_DMASR_HALTED BIT(0) +#define XILINX_DMA_DMASR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) + +#define XILINX_DMA_REG_CURDESC 0x0008 +#define XILINX_DMA_REG_TAILDESC 0x0010 +#define XILINX_DMA_REG_REG_INDEX 0x0014 +#define XILINX_DMA_REG_FRMSTORE 0x0018 +#define XILINX_DMA_REG_THRESHOLD 0x001c +#define XILINX_DMA_REG_FRMPTR_STS 0x0024 +#define XILINX_DMA_REG_PARK_PTR 0x0028 +#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT 8 +#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT 0 +#define XILINX_DMA_REG_VDMA_VERSION 0x002c /* Register Direct Mode Registers */ -#define XILINX_VDMA_REG_VSIZE 0x0000 -#define XILINX_VDMA_REG_HSIZE 0x0004 +#define XILINX_DMA_REG_VSIZE 0x0000 +#define XILINX_DMA_REG_HSIZE 0x0004 -#define XILINX_VDMA_REG_FRMDLY_STRIDE 0x0008 -#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 -#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 +#define XILINX_DMA_REG_FRMDLY_STRIDE 0x0008 +#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 +#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 #define XILINX_VDMA_REG_START_ADDRESS(n) (0x000c + 4 * (n)) +#define XILINX_VDMA_REG_START_ADDRESS_64(n) (0x000c + 8 * (n)) /* HW specific definitions */ -#define XILINX_VDMA_MAX_CHANS_PER_DEVICE 0x2 - -#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK \ - (XILINX_VDMA_DMASR_FRM_CNT_IRQ | \ - XILINX_VDMA_DMASR_DLY_CNT_IRQ | \ - XILINX_VDMA_DMASR_ERR_IRQ) - -#define XILINX_VDMA_DMASR_ALL_ERR_MASK \ - (XILINX_VDMA_DMASR_EOL_LATE_ERR | \ - XILINX_VDMA_DMASR_SOF_LATE_ERR | \ - XILINX_VDMA_DMASR_SG_DEC_ERR | \ - XILINX_VDMA_DMASR_SG_SLV_ERR | \ - XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_DMA_DEC_ERR | \ - XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \ - XILINX_VDMA_DMASR_DMA_INT_ERR) +#define XILINX_DMA_MAX_CHANS_PER_DEVICE 0x2 + +#define XILINX_DMA_DMAXR_ALL_IRQ_MASK \ + (XILINX_DMA_DMASR_FRM_CNT_IRQ | \ + XILINX_DMA_DMASR_DLY_CNT_IRQ | \ + XILINX_DMA_DMASR_ERR_IRQ) + +#define XILINX_DMA_DMASR_ALL_ERR_MASK \ + (XILINX_DMA_DMASR_EOL_LATE_ERR | \ + XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_SG_DEC_ERR | \ + XILINX_DMA_DMASR_SG_SLV_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_DEC_ERR | \ + XILINX_DMA_DMASR_DMA_SLAVE_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) /* * Recoverable errors are DMA Internal error, SOF Early, EOF Early * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC * is enabled in the h/w system. */ -#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK \ - (XILINX_VDMA_DMASR_SOF_LATE_ERR | \ - XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_DMA_INT_ERR) +#define XILINX_DMA_DMASR_ERR_RECOVER_MASK \ + (XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) /* Axi VDMA Flush on Fsync bits */ -#define XILINX_VDMA_FLUSH_S2MM 3 -#define XILINX_VDMA_FLUSH_MM2S 2 -#define XILINX_VDMA_FLUSH_BOTH 1 +#define XILINX_DMA_FLUSH_S2MM 3 +#define XILINX_DMA_FLUSH_MM2S 2 +#define XILINX_DMA_FLUSH_BOTH 1 /* Delay loop counter to prevent hardware failure */ -#define XILINX_VDMA_LOOP_COUNT 1000000 +#define XILINX_DMA_LOOP_COUNT 1000000 + +/* AXI DMA Specific Registers/Offsets */ +#define XILINX_DMA_REG_SRCDSTADDR 0x18 +#define XILINX_DMA_REG_BTT 0x28 + +/* AXI DMA Specific Masks/Bit fields */ +#define XILINX_DMA_MAX_TRANS_LEN GENMASK(22, 0) +#define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16) +#define XILINX_DMA_CR_COALESCE_SHIFT 16 +#define XILINX_DMA_BD_SOP BIT(27) +#define XILINX_DMA_BD_EOP BIT(26) +#define XILINX_DMA_COALESCE_MAX 255 +#define XILINX_DMA_NUM_APP_WORDS 5 + +/* AXI CDMA Specific Registers/Offsets */ +#define XILINX_CDMA_REG_SRCADDR 0x18 +#define XILINX_CDMA_REG_DSTADDR 0x20 + +/* AXI CDMA Specific Masks */ +#define XILINX_CDMA_CR_SGMODE BIT(3) /** * struct xilinx_vdma_desc_hw - Hardware Descriptor * @next_desc: Next Descriptor Pointer @0x00 * @pad1: Reserved @0x04 * @buf_addr: Buffer address @0x08 - * @pad2: Reserved @0x0C + * @buf_addr_msb: MSB of Buffer address @0x0C * @vsize: Vertical Size @0x10 * @hsize: Horizontal Size @0x14 * @stride: Number of bytes between the first @@ -154,13 +185,59 @@ struct xilinx_vdma_desc_hw { u32 next_desc; u32 pad1; u32 buf_addr; - u32 pad2; + u32 buf_addr_msb; u32 vsize; u32 hsize; u32 stride; } __aligned(64); /** + * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @buf_addr: Buffer address @0x08 + * @pad2: Reserved @0x0C + * @pad3: Reserved @0x10 + * @pad4: Reserved @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + * @app: APP Fields @0x20 - 0x30 + */ +struct xilinx_axidma_desc_hw { + u32 next_desc; + u32 pad1; + u32 buf_addr; + u32 pad2; + u32 pad3; + u32 pad4; + u32 control; + u32 status; + u32 app[XILINX_DMA_NUM_APP_WORDS]; +} __aligned(64); + +/** + * struct xilinx_cdma_desc_hw - Hardware Descriptor + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @src_addr: Source address @0x08 + * @pad2: Reserved @0x0C + * @dest_addr: Destination address @0x10 + * @pad3: Reserved @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + */ +struct xilinx_cdma_desc_hw { + u32 next_desc; + u32 pad1; + u32 src_addr; + u32 pad2; + u32 dest_addr; + u32 pad3; + u32 control; + u32 status; +} __aligned(64); + +/** * struct xilinx_vdma_tx_segment - Descriptor segment * @hw: Hardware descriptor * @node: Node in the descriptor segments list @@ -173,19 +250,43 @@ struct xilinx_vdma_tx_segment { } __aligned(64); /** - * struct xilinx_vdma_tx_descriptor - Per Transaction structure + * struct xilinx_axidma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_axidma_tx_segment { + struct xilinx_axidma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_cdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_cdma_tx_segment { + struct xilinx_cdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_dma_tx_descriptor - Per Transaction structure * @async_tx: Async transaction descriptor * @segments: TX segments list * @node: Node in the channel descriptors list */ -struct xilinx_vdma_tx_descriptor { +struct xilinx_dma_tx_descriptor { struct dma_async_tx_descriptor async_tx; struct list_head segments; struct list_head node; }; /** - * struct xilinx_vdma_chan - Driver specific VDMA channel structure + * struct xilinx_dma_chan - Driver specific DMA channel structure * @xdev: Driver specific device structure * @ctrl_offset: Control registers offset * @desc_offset: TX descriptor registers offset @@ -207,9 +308,14 @@ struct xilinx_vdma_tx_descriptor { * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync * @desc_pendingcount: Descriptor pending count + * @ext_addr: Indicates 64 bit addressing is supported by dma channel + * @desc_submitcount: Descriptor h/w submitted count + * @residue: Residue for AXI DMA + * @seg_v: Statically allocated segments base + * @start_transfer: Differentiate b/w DMA IP's transfer */ -struct xilinx_vdma_chan { - struct xilinx_vdma_device *xdev; +struct xilinx_dma_chan { + struct xilinx_dma_device *xdev; u32 ctrl_offset; u32 desc_offset; spinlock_t lock; @@ -230,73 +336,122 @@ struct xilinx_vdma_chan { struct xilinx_vdma_config config; bool flush_on_fsync; u32 desc_pendingcount; + bool ext_addr; + u32 desc_submitcount; + u32 residue; + struct xilinx_axidma_tx_segment *seg_v; + void (*start_transfer)(struct xilinx_dma_chan *chan); +}; + +struct xilinx_dma_config { + enum xdma_ip_type dmatype; + int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk); }; /** - * struct xilinx_vdma_device - VDMA device structure + * struct xilinx_dma_device - DMA device structure * @regs: I/O mapped base address * @dev: Device Structure * @common: DMA device structure - * @chan: Driver specific VDMA channel + * @chan: Driver specific DMA channel * @has_sg: Specifies whether Scatter-Gather is present or not * @flush_on_fsync: Flush on frame sync + * @ext_addr: Indicates 64 bit addressing is supported by dma device + * @pdev: Platform device structure pointer + * @dma_config: DMA config structure + * @axi_clk: DMA Axi4-lite interace clock + * @tx_clk: DMA mm2s clock + * @txs_clk: DMA mm2s stream clock + * @rx_clk: DMA s2mm clock + * @rxs_clk: DMA s2mm stream clock */ -struct xilinx_vdma_device { +struct xilinx_dma_device { void __iomem *regs; struct device *dev; struct dma_device common; - struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE]; + struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE]; bool has_sg; u32 flush_on_fsync; + bool ext_addr; + struct platform_device *pdev; + const struct xilinx_dma_config *dma_config; + struct clk *axi_clk; + struct clk *tx_clk; + struct clk *txs_clk; + struct clk *rx_clk; + struct clk *rxs_clk; }; /* Macros */ #define to_xilinx_chan(chan) \ - container_of(chan, struct xilinx_vdma_chan, common) -#define to_vdma_tx_descriptor(tx) \ - container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx) -#define xilinx_vdma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ + container_of(chan, struct xilinx_dma_chan, common) +#define to_dma_tx_descriptor(tx) \ + container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) +#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ cond, delay_us, timeout_us) /* IO accessors */ -static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg) +static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) { return ioread32(chan->xdev->regs + reg); } -static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value) +static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { iowrite32(value, chan->xdev->regs + reg); } -static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg, +static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { - vdma_write(chan, chan->desc_offset + reg, value); + dma_write(chan, chan->desc_offset + reg, value); } -static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg) +static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg) { - return vdma_read(chan, chan->ctrl_offset + reg); + return dma_read(chan, chan->ctrl_offset + reg); } -static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { - vdma_write(chan, chan->ctrl_offset + reg, value); + dma_write(chan, chan->ctrl_offset + reg, value); } -static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg, u32 clr) { - vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr); + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr); } -static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg, u32 set) { - vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set); + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set); +} + +/** + * vdma_desc_write_64 - 64-bit descriptor write + * @chan: Driver specific VDMA channel + * @reg: Register to write + * @value_lsb: lower address of the descriptor. + * @value_msb: upper address of the descriptor. + * + * Since vdma driver is trying to write to a register offset which is not a + * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits + * instead of a single 64 bit register write. + */ +static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg, + u32 value_lsb, u32 value_msb) +{ + /* Write the lsb 32 bits*/ + writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg); + + /* Write the msb 32 bits */ + writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4); } /* ----------------------------------------------------------------------------- @@ -305,16 +460,59 @@ static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg, /** * xilinx_vdma_alloc_tx_segment - Allocate transaction segment - * @chan: Driver specific VDMA channel + * @chan: Driver specific DMA channel * * Return: The allocated segment on success and NULL on failure. */ static struct xilinx_vdma_tx_segment * -xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan) +xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan) { struct xilinx_vdma_tx_segment *segment; dma_addr_t phys; + segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + segment->phys = phys; + + return segment; +} + +/** + * xilinx_cdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_cdma_tx_segment * +xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_cdma_tx_segment *segment; + dma_addr_t phys; + + segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + memset(segment, 0, sizeof(*segment)); + segment->phys = phys; + + return segment; +} + +/** + * xilinx_axidma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_axidma_tx_segment * +xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_axidma_tx_segment *segment; + dma_addr_t phys; + segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys); if (!segment) return NULL; @@ -326,26 +524,48 @@ xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan) } /** + * xilinx_dma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_axidma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** + * xilinx_cdma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_cdma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** * xilinx_vdma_free_tx_segment - Free transaction segment - * @chan: Driver specific VDMA channel - * @segment: VDMA transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment */ -static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan, +static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan, struct xilinx_vdma_tx_segment *segment) { dma_pool_free(chan->desc_pool, segment, segment->phys); } /** - * xilinx_vdma_tx_descriptor - Allocate transaction descriptor - * @chan: Driver specific VDMA channel + * xilinx_dma_tx_descriptor - Allocate transaction descriptor + * @chan: Driver specific DMA channel * * Return: The allocated descriptor on success and NULL on failure. */ -static struct xilinx_vdma_tx_descriptor * -xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan) +static struct xilinx_dma_tx_descriptor * +xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc; + struct xilinx_dma_tx_descriptor *desc; desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) @@ -357,22 +577,38 @@ xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_free_tx_descriptor - Free transaction descriptor - * @chan: Driver specific VDMA channel - * @desc: VDMA transaction descriptor + * xilinx_dma_free_tx_descriptor - Free transaction descriptor + * @chan: Driver specific DMA channel + * @desc: DMA transaction descriptor */ static void -xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan, - struct xilinx_vdma_tx_descriptor *desc) +xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) { struct xilinx_vdma_tx_segment *segment, *next; + struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next; + struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next; if (!desc) return; - list_for_each_entry_safe(segment, next, &desc->segments, node) { - list_del(&segment->node); - xilinx_vdma_free_tx_segment(chan, segment); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + list_for_each_entry_safe(segment, next, &desc->segments, node) { + list_del(&segment->node); + xilinx_vdma_free_tx_segment(chan, segment); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + list_for_each_entry_safe(cdma_segment, cdma_next, + &desc->segments, node) { + list_del(&cdma_segment->node); + xilinx_cdma_free_tx_segment(chan, cdma_segment); + } + } else { + list_for_each_entry_safe(axidma_segment, axidma_next, + &desc->segments, node) { + list_del(&axidma_segment->node); + xilinx_dma_free_tx_segment(chan, axidma_segment); + } } kfree(desc); @@ -381,60 +617,62 @@ xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan, /* Required functions */ /** - * xilinx_vdma_free_desc_list - Free descriptors list - * @chan: Driver specific VDMA channel + * xilinx_dma_free_desc_list - Free descriptors list + * @chan: Driver specific DMA channel * @list: List to parse and delete the descriptor */ -static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan, +static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan, struct list_head *list) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; list_for_each_entry_safe(desc, next, list, node) { list_del(&desc->node); - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); } } /** - * xilinx_vdma_free_descriptors - Free channel descriptors - * @chan: Driver specific VDMA channel + * xilinx_dma_free_descriptors - Free channel descriptors + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan) +static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan) { unsigned long flags; spin_lock_irqsave(&chan->lock, flags); - xilinx_vdma_free_desc_list(chan, &chan->pending_list); - xilinx_vdma_free_desc_list(chan, &chan->done_list); - xilinx_vdma_free_desc_list(chan, &chan->active_list); + xilinx_dma_free_desc_list(chan, &chan->pending_list); + xilinx_dma_free_desc_list(chan, &chan->done_list); + xilinx_dma_free_desc_list(chan, &chan->active_list); spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_free_chan_resources - Free channel resources + * xilinx_dma_free_chan_resources - Free channel resources * @dchan: DMA channel */ -static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan) +static void xilinx_dma_free_chan_resources(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); dev_dbg(chan->dev, "Free all channel resources.\n"); - xilinx_vdma_free_descriptors(chan); + xilinx_dma_free_descriptors(chan); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + xilinx_dma_free_tx_segment(chan, chan->seg_v); dma_pool_destroy(chan->desc_pool); chan->desc_pool = NULL; } /** - * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_desc_cleanup - Clean channel descriptors + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan) +static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; unsigned long flags; spin_lock_irqsave(&chan->lock, flags); @@ -457,32 +695,32 @@ static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); } spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_do_tasklet - Schedule completion tasklet - * @data: Pointer to the Xilinx VDMA channel structure + * xilinx_dma_do_tasklet - Schedule completion tasklet + * @data: Pointer to the Xilinx DMA channel structure */ -static void xilinx_vdma_do_tasklet(unsigned long data) +static void xilinx_dma_do_tasklet(unsigned long data) { - struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data; + struct xilinx_dma_chan *chan = (struct xilinx_dma_chan *)data; - xilinx_vdma_chan_desc_cleanup(chan); + xilinx_dma_chan_desc_cleanup(chan); } /** - * xilinx_vdma_alloc_chan_resources - Allocate channel resources + * xilinx_dma_alloc_chan_resources - Allocate channel resources * @dchan: DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) +static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); /* Has this channel already been allocated? */ if (chan->desc_pool) @@ -492,10 +730,26 @@ static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) * We need the descriptor to be aligned to 64bytes * for meeting Xilinx VDMA specification requirement. */ - chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", - chan->dev, - sizeof(struct xilinx_vdma_tx_segment), - __alignof__(struct xilinx_vdma_tx_segment), 0); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + chan->desc_pool = dma_pool_create("xilinx_dma_desc_pool", + chan->dev, + sizeof(struct xilinx_axidma_tx_segment), + __alignof__(struct xilinx_axidma_tx_segment), + 0); + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool", + chan->dev, + sizeof(struct xilinx_cdma_tx_segment), + __alignof__(struct xilinx_cdma_tx_segment), + 0); + } else { + chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", + chan->dev, + sizeof(struct xilinx_vdma_tx_segment), + __alignof__(struct xilinx_vdma_tx_segment), + 0); + } + if (!chan->desc_pool) { dev_err(chan->dev, "unable to allocate channel %d descriptor pool\n", @@ -503,110 +757,160 @@ static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) return -ENOMEM; } + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + /* + * For AXI DMA case after submitting a pending_list, keep + * an extra segment allocated so that the "next descriptor" + * pointer on the tail descriptor always points to a + * valid descriptor, even when paused after reaching taildesc. + * This way, it is possible to issue additional + * transfers without halting and restarting the channel. + */ + chan->seg_v = xilinx_axidma_alloc_tx_segment(chan); + dma_cookie_init(dchan); + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + /* For AXI DMA resetting once channel will reset the + * other channel as well so enable the interrupts here. + */ + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); + } + + if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + return 0; } /** - * xilinx_vdma_tx_status - Get VDMA transaction status + * xilinx_dma_tx_status - Get DMA transaction status * @dchan: DMA channel * @cookie: Transaction identifier * @txstate: Transaction state * * Return: DMA transaction status */ -static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan, +static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, struct dma_tx_state *txstate) { - return dma_cookie_status(dchan, cookie, txstate); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment; + struct xilinx_axidma_desc_hw *hw; + enum dma_status ret; + unsigned long flags; + u32 residue = 0; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + spin_lock_irqsave(&chan->lock, flags); + + desc = list_last_entry(&chan->active_list, + struct xilinx_dma_tx_descriptor, node); + if (chan->has_sg) { + list_for_each_entry(segment, &desc->segments, node) { + hw = &segment->hw; + residue += (hw->control - hw->status) & + XILINX_DMA_MAX_TRANS_LEN; + } + } + spin_unlock_irqrestore(&chan->lock, flags); + + chan->residue = residue; + dma_set_residue(txstate, chan->residue); + } + + return ret; } /** - * xilinx_vdma_is_running - Check if VDMA channel is running - * @chan: Driver specific VDMA channel + * xilinx_dma_is_running - Check if DMA channel is running + * @chan: Driver specific DMA channel * * Return: '1' if running, '0' if not. */ -static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan) +static bool xilinx_dma_is_running(struct xilinx_dma_chan *chan) { - return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & - XILINX_VDMA_DMASR_HALTED) && - (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) & - XILINX_VDMA_DMACR_RUNSTOP); + return !(dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_HALTED) && + (dma_ctrl_read(chan, XILINX_DMA_REG_DMACR) & + XILINX_DMA_DMACR_RUNSTOP); } /** - * xilinx_vdma_is_idle - Check if VDMA channel is idle - * @chan: Driver specific VDMA channel + * xilinx_dma_is_idle - Check if DMA channel is idle + * @chan: Driver specific DMA channel * * Return: '1' if idle, '0' if not. */ -static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan) +static bool xilinx_dma_is_idle(struct xilinx_dma_chan *chan) { - return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & - XILINX_VDMA_DMASR_IDLE; + return dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_IDLE; } /** - * xilinx_vdma_halt - Halt VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_halt - Halt DMA channel + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan) +static void xilinx_dma_halt(struct xilinx_dma_chan *chan) { int err; u32 val; - vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); /* Wait for the hardware to halt */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val, - (val & XILINX_VDMA_DMASR_HALTED), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + (val & XILINX_DMA_DMASR_HALTED), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "Cannot stop channel %p: %x\n", - chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); chan->err = true; } - - return; } /** - * xilinx_vdma_start - Start VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_start - Start DMA channel + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_start(struct xilinx_vdma_chan *chan) +static void xilinx_dma_start(struct xilinx_dma_chan *chan) { int err; u32 val; - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); /* Wait for the hardware to start */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val, - !(val & XILINX_VDMA_DMASR_HALTED), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + !(val & XILINX_DMA_DMASR_HALTED), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "Cannot start channel %p: %x\n", - chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); chan->err = true; } - - return; } /** * xilinx_vdma_start_transfer - Starts VDMA transfer * @chan: Driver specific channel struct pointer */ -static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) +static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan) { struct xilinx_vdma_config *config = &chan->config; - struct xilinx_vdma_tx_descriptor *desc, *tail_desc; + struct xilinx_dma_tx_descriptor *desc, *tail_desc; u32 reg; struct xilinx_vdma_tx_segment *tail_segment; @@ -618,16 +922,16 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) return; desc = list_first_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); + struct xilinx_dma_tx_descriptor, node); tail_desc = list_last_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); + struct xilinx_dma_tx_descriptor, node); tail_segment = list_last_entry(&tail_desc->segments, struct xilinx_vdma_tx_segment, node); /* If it is SG mode and hardware is busy, cannot submit */ - if (chan->has_sg && xilinx_vdma_is_running(chan) && - !xilinx_vdma_is_idle(chan)) { + if (chan->has_sg && xilinx_dma_is_running(chan) && + !xilinx_dma_is_idle(chan)) { dev_dbg(chan->dev, "DMA controller still busy\n"); return; } @@ -637,19 +941,19 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) * done, start new transfers */ if (chan->has_sg) - vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, desc->async_tx.phys); /* Configure the hardware using info in the config structure */ - reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); if (config->frm_cnt_en) - reg |= XILINX_VDMA_DMACR_FRAMECNT_EN; + reg |= XILINX_DMA_DMACR_FRAMECNT_EN; else - reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN; + reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN; /* Configure channel to allow number frame buffers */ - vdma_ctrl_write(chan, XILINX_VDMA_REG_FRMSTORE, + dma_ctrl_write(chan, XILINX_DMA_REG_FRMSTORE, chan->desc_pendingcount); /* @@ -657,45 +961,53 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) * In direct register mode, if not parking, enable circular mode */ if (chan->has_sg || !config->park) - reg |= XILINX_VDMA_DMACR_CIRC_EN; + reg |= XILINX_DMA_DMACR_CIRC_EN; if (config->park) - reg &= ~XILINX_VDMA_DMACR_CIRC_EN; + reg &= ~XILINX_DMA_DMACR_CIRC_EN; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg); + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); if (config->park && (config->park_frm >= 0) && (config->park_frm < chan->num_frms)) { if (chan->direction == DMA_MEM_TO_DEV) - vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + dma_write(chan, XILINX_DMA_REG_PARK_PTR, config->park_frm << - XILINX_VDMA_PARK_PTR_RD_REF_SHIFT); + XILINX_DMA_PARK_PTR_RD_REF_SHIFT); else - vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + dma_write(chan, XILINX_DMA_REG_PARK_PTR, config->park_frm << - XILINX_VDMA_PARK_PTR_WR_REF_SHIFT); + XILINX_DMA_PARK_PTR_WR_REF_SHIFT); } /* Start the hardware */ - xilinx_vdma_start(chan); + xilinx_dma_start(chan); if (chan->err) return; /* Start the transfer */ if (chan->has_sg) { - vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, tail_segment->phys); } else { struct xilinx_vdma_tx_segment *segment, *last = NULL; int i = 0; - list_for_each_entry(desc, &chan->pending_list, node) { - segment = list_first_entry(&desc->segments, - struct xilinx_vdma_tx_segment, node); - vdma_desc_write(chan, + if (chan->desc_submitcount < chan->num_frms) + i = chan->desc_submitcount; + + list_for_each_entry(segment, &desc->segments, node) { + if (chan->ext_addr) + vdma_desc_write_64(chan, + XILINX_VDMA_REG_START_ADDRESS_64(i++), + segment->hw.buf_addr, + segment->hw.buf_addr_msb); + else + vdma_desc_write(chan, XILINX_VDMA_REG_START_ADDRESS(i++), segment->hw.buf_addr); + last = segment; } @@ -703,10 +1015,164 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) return; /* HW expects these parameters to be same for one transaction */ - vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize); - vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE, + vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize); + vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE, last->hw.stride); - vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize); + vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize); + } + + if (!chan->has_sg) { + list_del(&desc->node); + list_add_tail(&desc->node, &chan->active_list); + chan->desc_submitcount++; + chan->desc_pendingcount--; + if (chan->desc_submitcount == chan->num_frms) + chan->desc_submitcount = 0; + } else { + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; + } +} + +/** + * xilinx_cdma_start_transfer - Starts cdma transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_cdma_tx_segment *tail_segment; + u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->err) + return; + + if (list_empty(&chan->pending_list)) + return; + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, node); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX; + ctrl_reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg); + } + + if (chan->has_sg) { + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + + /* Update tail ptr register which will start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + /* In simple mode */ + struct xilinx_cdma_tx_segment *segment; + struct xilinx_cdma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_cdma_tx_segment, + node); + + hw = &segment->hw; + + dma_ctrl_write(chan, XILINX_CDMA_REG_SRCADDR, hw->src_addr); + dma_ctrl_write(chan, XILINX_CDMA_REG_DSTADDR, hw->dest_addr); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & XILINX_DMA_MAX_TRANS_LEN); + } + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; +} + +/** + * xilinx_dma_start_transfer - Starts DMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_axidma_tx_segment *tail_segment, *old_head, *new_head; + u32 reg; + + if (chan->err) + return; + + if (list_empty(&chan->pending_list)) + return; + + /* If it is SG mode and hardware is busy, cannot submit */ + if (chan->has_sg && xilinx_dma_is_running(chan) && + !xilinx_dma_is_idle(chan)) { + dev_dbg(chan->dev, "DMA controller still busy\n"); + return; + } + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, node); + + old_head = list_first_entry(&head_desc->segments, + struct xilinx_axidma_tx_segment, node); + new_head = chan->seg_v; + /* Copy Buffer Descriptor fields. */ + new_head->hw = old_head->hw; + + /* Swap and save new reserve */ + list_replace_init(&old_head->node, &new_head->node); + chan->seg_v = old_head; + + tail_segment->hw.next_desc = chan->seg_v->phys; + head_desc->async_tx.phys = new_head->phys; + + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + reg &= ~XILINX_DMA_CR_COALESCE_MAX; + reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + } + + if (chan->has_sg) + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + + xilinx_dma_start(chan); + + if (chan->err) + return; + + /* Start the transfer */ + if (chan->has_sg) { + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + struct xilinx_axidma_tx_segment *segment; + struct xilinx_axidma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_axidma_tx_segment, + node); + hw = &segment->hw; + + dma_ctrl_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & XILINX_DMA_MAX_TRANS_LEN); } list_splice_tail_init(&chan->pending_list, &chan->active_list); @@ -714,28 +1180,28 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_issue_pending - Issue pending transactions + * xilinx_dma_issue_pending - Issue pending transactions * @dchan: DMA channel */ -static void xilinx_vdma_issue_pending(struct dma_chan *dchan) +static void xilinx_dma_issue_pending(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; spin_lock_irqsave(&chan->lock, flags); - xilinx_vdma_start_transfer(chan); + chan->start_transfer(chan); spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete + * xilinx_dma_complete_descriptor - Mark the active descriptor as complete * @chan : xilinx DMA channel * * CONTEXT: hardirq */ -static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan) +static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; /* This function was invoked with lock held */ if (list_empty(&chan->active_list)) @@ -749,27 +1215,27 @@ static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_reset - Reset VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_reset - Reset DMA channel + * @chan: Driver specific DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan) +static int xilinx_dma_reset(struct xilinx_dma_chan *chan) { int err; u32 tmp; - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET); /* Wait for the hardware to finish reset */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMACR, tmp, - !(tmp & XILINX_VDMA_DMACR_RESET), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp, + !(tmp & XILINX_DMA_DMACR_RESET), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "reset timeout, cr %x, sr %x\n", - vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR), - vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + dma_ctrl_read(chan, XILINX_DMA_REG_DMACR), + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); return -ETIMEDOUT; } @@ -779,48 +1245,48 @@ static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts + * @chan: Driver specific DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan) +static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan) { int err; /* Reset VDMA */ - err = xilinx_vdma_reset(chan); + err = xilinx_dma_reset(chan); if (err) return err; /* Enable interrupts */ - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, - XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); return 0; } /** - * xilinx_vdma_irq_handler - VDMA Interrupt handler + * xilinx_dma_irq_handler - DMA Interrupt handler * @irq: IRQ number - * @data: Pointer to the Xilinx VDMA channel structure + * @data: Pointer to the Xilinx DMA channel structure * * Return: IRQ_HANDLED/IRQ_NONE */ -static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) +static irqreturn_t xilinx_dma_irq_handler(int irq, void *data) { - struct xilinx_vdma_chan *chan = data; + struct xilinx_dma_chan *chan = data; u32 status; /* Read the status and ack the interrupts. */ - status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR); - if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK)) + status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR); + if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK)) return IRQ_NONE; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, - status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + status & XILINX_DMA_DMAXR_ALL_IRQ_MASK); - if (status & XILINX_VDMA_DMASR_ERR_IRQ) { + if (status & XILINX_DMA_DMASR_ERR_IRQ) { /* * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the * error is recoverable, ignore it. Otherwise flag the error. @@ -828,22 +1294,23 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) * Only recoverable errors can be cleared in the DMASR register, * make sure not to write to other error bits to 1. */ - u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, - errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK); + u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK); if (!chan->flush_on_fsync || - (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) { + (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) { dev_err(chan->dev, "Channel %p has errors %x, cdr %x tdr %x\n", chan, errors, - vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC), - vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC)); + dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC), + dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC)); chan->err = true; } } - if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) { + if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) { /* * Device takes too long to do the transfer when user requires * responsiveness. @@ -851,10 +1318,10 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) dev_dbg(chan->dev, "Inter-packet latency too long\n"); } - if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) { + if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) { spin_lock(&chan->lock); - xilinx_vdma_complete_descriptor(chan); - xilinx_vdma_start_transfer(chan); + xilinx_dma_complete_descriptor(chan); + chan->start_transfer(chan); spin_unlock(&chan->lock); } @@ -867,11 +1334,13 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) * @chan: Driver specific dma channel * @desc: dma transaction descriptor */ -static void append_desc_queue(struct xilinx_vdma_chan *chan, - struct xilinx_vdma_tx_descriptor *desc) +static void append_desc_queue(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) { struct xilinx_vdma_tx_segment *tail_segment; - struct xilinx_vdma_tx_descriptor *tail_desc; + struct xilinx_dma_tx_descriptor *tail_desc; + struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_cdma_tx_segment *cdma_tail_segment; if (list_empty(&chan->pending_list)) goto append; @@ -881,10 +1350,23 @@ static void append_desc_queue(struct xilinx_vdma_chan *chan, * that already exists in memory. */ tail_desc = list_last_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); - tail_segment = list_last_entry(&tail_desc->segments, - struct xilinx_vdma_tx_segment, node); - tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + struct xilinx_dma_tx_descriptor, node); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_vdma_tx_segment, + node); + tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + cdma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, + node); + cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + axidma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, + node); + axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } /* * Add the software descriptor and all children to the list @@ -894,22 +1376,23 @@ append: list_add_tail(&desc->node, &chan->pending_list); chan->desc_pendingcount++; - if (unlikely(chan->desc_pendingcount > chan->num_frms)) { + if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) + && unlikely(chan->desc_pendingcount > chan->num_frms)) { dev_dbg(chan->dev, "desc pendingcount is too high\n"); chan->desc_pendingcount = chan->num_frms; } } /** - * xilinx_vdma_tx_submit - Submit DMA transaction + * xilinx_dma_tx_submit - Submit DMA transaction * @tx: Async transaction descriptor * * Return: cookie value on success and failure value on error */ -static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) { - struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx); - struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan); + struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx); + struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan); dma_cookie_t cookie; unsigned long flags; int err; @@ -919,7 +1402,7 @@ static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx) * If reset fails, need to hard reset the system. * Channel is no longer functional */ - err = xilinx_vdma_chan_reset(chan); + err = xilinx_dma_chan_reset(chan); if (err < 0) return err; } @@ -950,8 +1433,8 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, struct dma_interleaved_template *xt, unsigned long flags) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); - struct xilinx_vdma_tx_descriptor *desc; + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; struct xilinx_vdma_tx_segment *segment, *prev = NULL; struct xilinx_vdma_desc_hw *hw; @@ -965,12 +1448,12 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, return NULL; /* Allocate a transaction descriptor. */ - desc = xilinx_vdma_alloc_tx_descriptor(chan); + desc = xilinx_dma_alloc_tx_descriptor(chan); if (!desc) return NULL; dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); - desc->async_tx.tx_submit = xilinx_vdma_tx_submit; + desc->async_tx.tx_submit = xilinx_dma_tx_submit; async_tx_ack(&desc->async_tx); /* Allocate the link descriptor from DMA pool */ @@ -983,14 +1466,25 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, hw->vsize = xt->numf; hw->hsize = xt->sgl[0].size; hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) << - XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT; + XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT; hw->stride |= chan->config.frm_dly << - XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT; - - if (xt->dir != DMA_MEM_TO_DEV) - hw->buf_addr = xt->dst_start; - else - hw->buf_addr = xt->src_start; + XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT; + + if (xt->dir != DMA_MEM_TO_DEV) { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->dst_start); + hw->buf_addr_msb = upper_32_bits(xt->dst_start); + } else { + hw->buf_addr = xt->dst_start; + } + } else { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->src_start); + hw->buf_addr_msb = upper_32_bits(xt->src_start); + } else { + hw->buf_addr = xt->src_start; + } + } /* Insert the segment into the descriptor segments list. */ list_add_tail(&segment->node, &desc->segments); @@ -1005,29 +1499,194 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, return &desc->async_tx; error: - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction + * @dchan: DMA channel + * @dma_dst: destination address + * @dma_src: source address + * @len: transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_cdma_tx_segment *segment, *prev; + struct xilinx_cdma_desc_hw *hw; + + if (!len || len > XILINX_DMA_MAX_TRANS_LEN) + return NULL; + + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_cdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + hw = &segment->hw; + hw->control = len; + hw->src_addr = dma_src; + hw->dest_addr = dma_dst; + + /* Fill the previous next descriptor with current */ + prev = list_last_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + prev->hw.next_desc = segment->phys; + + /* Insert the segment into the descriptor segments list. */ + list_add_tail(&segment->node, &desc->segments); + + prev = segment; + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @dchan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: transfer ack flags + * @context: APP words of the descriptor + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment = NULL, *prev = NULL; + u32 *app_w = (u32 *)context; + struct scatterlist *sg; + size_t copy; + size_t sg_used; + unsigned int i; + + if (!is_slave_direction(direction)) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Build transactions using information in the scatter gather list */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + struct xilinx_axidma_desc_hw *hw; + + /* Get a free segment */ + segment = xilinx_axidma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the hw limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + XILINX_DMA_MAX_TRANS_LEN); + hw = &segment->hw; + + /* Fill in the descriptor */ + hw->buf_addr = sg_dma_address(sg) + sg_used; + + hw->control = copy; + + if (chan->direction == DMA_MEM_TO_DEV) { + if (app_w) + memcpy(hw->app, app_w, sizeof(u32) * + XILINX_DMA_NUM_APP_WORDS); + } + + if (prev) + prev->hw.next_desc = segment->phys; + + prev = segment; + sg_used += copy; + + /* + * Insert the segment into the descriptor segments + * list. + */ + list_add_tail(&segment->node, &desc->segments); + } + } + + segment = list_first_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + /* For the last DMA_MEM_TO_DEV transfer, set EOP */ + if (chan->direction == DMA_MEM_TO_DEV) { + segment->hw.control |= XILINX_DMA_BD_SOP; + segment = list_last_entry(&desc->segments, + struct xilinx_axidma_tx_segment, + node); + segment->hw.control |= XILINX_DMA_BD_EOP; + } + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); return NULL; } /** - * xilinx_vdma_terminate_all - Halt the channel and free descriptors - * @chan: Driver specific VDMA Channel pointer + * xilinx_dma_terminate_all - Halt the channel and free descriptors + * @chan: Driver specific DMA Channel pointer */ -static int xilinx_vdma_terminate_all(struct dma_chan *dchan) +static int xilinx_dma_terminate_all(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); /* Halt the DMA engine */ - xilinx_vdma_halt(chan); + xilinx_dma_halt(chan); /* Remove and free all of the descriptors in the lists */ - xilinx_vdma_free_descriptors(chan); + xilinx_dma_free_descriptors(chan); return 0; } /** - * xilinx_vdma_channel_set_config - Configure VDMA channel + * xilinx_dma_channel_set_config - Configure VDMA channel * Run-time configuration for Axi VDMA, supports: * . halt the channel * . configure interrupt coalescing and inter-packet delay threshold @@ -1042,13 +1701,13 @@ static int xilinx_vdma_terminate_all(struct dma_chan *dchan) int xilinx_vdma_channel_set_config(struct dma_chan *dchan, struct xilinx_vdma_config *cfg) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); u32 dmacr; if (cfg->reset) - return xilinx_vdma_chan_reset(chan); + return xilinx_dma_chan_reset(chan); - dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); chan->config.frm_dly = cfg->frm_dly; chan->config.park = cfg->park; @@ -1058,8 +1717,8 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.master = cfg->master; if (cfg->gen_lock && chan->genlock) { - dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN; - dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT; + dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; } chan->config.frm_cnt_en = cfg->frm_cnt_en; @@ -1071,21 +1730,21 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.coalesc = cfg->coalesc; chan->config.delay = cfg->delay; - if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) { - dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT; + if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; chan->config.coalesc = cfg->coalesc; } - if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) { - dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT; + if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; chan->config.delay = cfg->delay; } /* FSync Source selection */ - dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK; - dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT; + dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK; + dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr); + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr); return 0; } @@ -1096,14 +1755,14 @@ EXPORT_SYMBOL(xilinx_vdma_channel_set_config); */ /** - * xilinx_vdma_chan_remove - Per Channel remove function - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_remove - Per Channel remove function + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) +static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan) { /* Disable all interrupts */ - vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, - XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); if (chan->irq > 0) free_irq(chan->irq, chan); @@ -1113,8 +1772,197 @@ static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) list_del(&chan->common.device_node); } +static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **rx_clk, + struct clk **sg_clk, struct clk **tmp_clk) +{ + int err; + + *tmp_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + return err; + } + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk"); + if (IS_ERR(*sg_clk)) + *sg_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*sg_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable sg_clk (%u)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **dev_clk, struct clk **tmp_clk, + struct clk **tmp1_clk, struct clk **tmp2_clk) +{ + int err; + + *tmp_clk = NULL; + *tmp1_clk = NULL; + *tmp2_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_clk (%u)\n", err); + return err; + } + + *dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk"); + if (IS_ERR(*dev_clk)) { + err = PTR_ERR(*dev_clk); + dev_err(&pdev->dev, "failed to get dev_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*dev_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable dev_clk (%u)\n", err); + goto err_disable_axiclk; + } + + return 0; + +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk) +{ + int err; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + return err; + } + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk"); + if (IS_ERR(*txs_clk)) + *txs_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk"); + if (IS_ERR(*rxs_clk)) + *rxs_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*txs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable txs_clk (%u)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txsclk; + } + + err = clk_prepare_enable(*rxs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rxs_clk (%u)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txsclk: + clk_disable_unprepare(*txs_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static void xdma_disable_allclks(struct xilinx_dma_device *xdev) +{ + clk_disable_unprepare(xdev->rxs_clk); + clk_disable_unprepare(xdev->rx_clk); + clk_disable_unprepare(xdev->txs_clk); + clk_disable_unprepare(xdev->tx_clk); + clk_disable_unprepare(xdev->axi_clk); +} + /** - * xilinx_vdma_chan_probe - Per Channel Probing + * xilinx_dma_chan_probe - Per Channel Probing * It get channel features from the device tree entry and * initialize special channel handling routines * @@ -1123,10 +1971,10 @@ static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, +static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - struct xilinx_vdma_chan *chan; + struct xilinx_dma_chan *chan; bool has_dre = false; u32 value, width; int err; @@ -1140,6 +1988,7 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, chan->xdev = xdev; chan->has_sg = xdev->has_sg; chan->desc_pendingcount = 0x0; + chan->ext_addr = xdev->ext_addr; spin_lock_init(&chan->lock); INIT_LIST_HEAD(&chan->pending_list); @@ -1169,23 +2018,27 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, chan->direction = DMA_MEM_TO_DEV; chan->id = 0; - chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET; - chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; + chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; - if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || - xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S) - chan->flush_on_fsync = true; + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S) + chan->flush_on_fsync = true; + } } else if (of_device_is_compatible(node, "xlnx,axi-vdma-s2mm-channel")) { chan->direction = DMA_DEV_TO_MEM; chan->id = 1; - chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET; - chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; + chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; - if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || - xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM) - chan->flush_on_fsync = true; + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM) + chan->flush_on_fsync = true; + } } else { dev_err(xdev->dev, "Invalid channel compatible node\n"); return -EINVAL; @@ -1193,15 +2046,22 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, /* Request the interrupt */ chan->irq = irq_of_parse_and_map(node, 0); - err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED, - "xilinx-vdma-controller", chan); + err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED, + "xilinx-dma-controller", chan); if (err) { dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq); return err; } + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + chan->start_transfer = xilinx_dma_start_transfer; + else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) + chan->start_transfer = xilinx_cdma_start_transfer; + else + chan->start_transfer = xilinx_vdma_start_transfer; + /* Initialize the tasklet */ - tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet, + tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet, (unsigned long)chan); /* @@ -1214,7 +2074,7 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, xdev->chan[chan->id] = chan; /* Reset the channel */ - err = xilinx_vdma_chan_reset(chan); + err = xilinx_dma_chan_reset(chan); if (err < 0) { dev_err(xdev->dev, "Reset channel failed\n"); return err; @@ -1233,28 +2093,54 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { - struct xilinx_vdma_device *xdev = ofdma->of_dma_data; + struct xilinx_dma_device *xdev = ofdma->of_dma_data; int chan_id = dma_spec->args[0]; - if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) + if (chan_id >= XILINX_DMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) return NULL; return dma_get_slave_channel(&xdev->chan[chan_id]->common); } +static const struct xilinx_dma_config axidma_config = { + .dmatype = XDMA_TYPE_AXIDMA, + .clk_init = axidma_clk_init, +}; + +static const struct xilinx_dma_config axicdma_config = { + .dmatype = XDMA_TYPE_CDMA, + .clk_init = axicdma_clk_init, +}; + +static const struct xilinx_dma_config axivdma_config = { + .dmatype = XDMA_TYPE_VDMA, + .clk_init = axivdma_clk_init, +}; + +static const struct of_device_id xilinx_dma_of_ids[] = { + { .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config }, + { .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config }, + { .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config }, + {} +}; +MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids); + /** - * xilinx_vdma_probe - Driver probe function + * xilinx_dma_probe - Driver probe function * @pdev: Pointer to the platform_device structure * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_probe(struct platform_device *pdev) +static int xilinx_dma_probe(struct platform_device *pdev) { + int (*clk_init)(struct platform_device *, struct clk **, struct clk **, + struct clk **, struct clk **, struct clk **) + = axivdma_clk_init; struct device_node *node = pdev->dev.of_node; - struct xilinx_vdma_device *xdev; - struct device_node *child; + struct xilinx_dma_device *xdev; + struct device_node *child, *np = pdev->dev.of_node; struct resource *io; - u32 num_frames; + u32 num_frames, addr_width; int i, err; /* Allocate and initialize the DMA engine structure */ @@ -1263,6 +2149,20 @@ static int xilinx_vdma_probe(struct platform_device *pdev) return -ENOMEM; xdev->dev = &pdev->dev; + if (np) { + const struct of_device_id *match; + + match = of_match_node(xilinx_dma_of_ids, np); + if (match && match->data) { + xdev->dma_config = match->data; + clk_init = xdev->dma_config->clk_init; + } + } + + err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk, + &xdev->rx_clk, &xdev->rxs_clk); + if (err) + return err; /* Request and map I/O memory */ io = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1273,46 +2173,77 @@ static int xilinx_vdma_probe(struct platform_device *pdev) /* Retrieve the DMA engine properties from the device tree */ xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); - err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames); - if (err < 0) { - dev_err(xdev->dev, "missing xlnx,num-fstores property\n"); - return err; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + err = of_property_read_u32(node, "xlnx,num-fstores", + &num_frames); + if (err < 0) { + dev_err(xdev->dev, + "missing xlnx,num-fstores property\n"); + return err; + } + + err = of_property_read_u32(node, "xlnx,flush-fsync", + &xdev->flush_on_fsync); + if (err < 0) + dev_warn(xdev->dev, + "missing xlnx,flush-fsync property\n"); } - err = of_property_read_u32(node, "xlnx,flush-fsync", - &xdev->flush_on_fsync); + err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width); if (err < 0) - dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n"); + dev_warn(xdev->dev, "missing xlnx,addrwidth property\n"); + + if (addr_width > 32) + xdev->ext_addr = true; + else + xdev->ext_addr = false; + + /* Set the dma mask bits */ + dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width)); /* Initialize the DMA engine */ xdev->common.dev = &pdev->dev; INIT_LIST_HEAD(&xdev->common.channels); - dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); - dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) { + dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); + dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + } xdev->common.device_alloc_chan_resources = - xilinx_vdma_alloc_chan_resources; + xilinx_dma_alloc_chan_resources; xdev->common.device_free_chan_resources = - xilinx_vdma_free_chan_resources; - xdev->common.device_prep_interleaved_dma = + xilinx_dma_free_chan_resources; + xdev->common.device_terminate_all = xilinx_dma_terminate_all; + xdev->common.device_tx_status = xilinx_dma_tx_status; + xdev->common.device_issue_pending = xilinx_dma_issue_pending; + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg; + /* Residue calculation is supported by only AXI DMA */ + xdev->common.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask); + xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy; + } else { + xdev->common.device_prep_interleaved_dma = xilinx_vdma_dma_prep_interleaved; - xdev->common.device_terminate_all = xilinx_vdma_terminate_all; - xdev->common.device_tx_status = xilinx_vdma_tx_status; - xdev->common.device_issue_pending = xilinx_vdma_issue_pending; + } platform_set_drvdata(pdev, xdev); /* Initialize the channels */ for_each_child_of_node(node, child) { - err = xilinx_vdma_chan_probe(xdev, child); + err = xilinx_dma_chan_probe(xdev, child); if (err < 0) - goto error; + goto disable_clks; } - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) - if (xdev->chan[i]) - xdev->chan[i]->num_frms = num_frames; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) + if (xdev->chan[i]) + xdev->chan[i]->num_frms = num_frames; + } /* Register the DMA engine with the core */ dma_async_device_register(&xdev->common); @@ -1329,49 +2260,47 @@ static int xilinx_vdma_probe(struct platform_device *pdev) return 0; +disable_clks: + xdma_disable_allclks(xdev); error: - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) if (xdev->chan[i]) - xilinx_vdma_chan_remove(xdev->chan[i]); + xilinx_dma_chan_remove(xdev->chan[i]); return err; } /** - * xilinx_vdma_remove - Driver remove function + * xilinx_dma_remove - Driver remove function * @pdev: Pointer to the platform_device structure * * Return: Always '0' */ -static int xilinx_vdma_remove(struct platform_device *pdev) +static int xilinx_dma_remove(struct platform_device *pdev) { - struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev); + struct xilinx_dma_device *xdev = platform_get_drvdata(pdev); int i; of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&xdev->common); - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) if (xdev->chan[i]) - xilinx_vdma_chan_remove(xdev->chan[i]); + xilinx_dma_chan_remove(xdev->chan[i]); + + xdma_disable_allclks(xdev); return 0; } -static const struct of_device_id xilinx_vdma_of_ids[] = { - { .compatible = "xlnx,axi-vdma-1.00.a",}, - {} -}; -MODULE_DEVICE_TABLE(of, xilinx_vdma_of_ids); - static struct platform_driver xilinx_vdma_driver = { .driver = { .name = "xilinx-vdma", - .of_match_table = xilinx_vdma_of_ids, + .of_match_table = xilinx_dma_of_ids, }, - .probe = xilinx_vdma_probe, - .remove = xilinx_vdma_remove, + .probe = xilinx_dma_probe, + .remove = xilinx_dma_remove, }; module_platform_driver(xilinx_vdma_driver); diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 520ed1d..4fd7f98 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -144,16 +144,16 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, struct dw_dma_slave *slave = c->tx_param; slave->dma_dev = &dma_dev->dev; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 0; + slave->p_master = 1; } if (c->rx_param) { struct dw_dma_slave *slave = c->rx_param; slave->dma_dev = &dma_dev->dev; - slave->src_master = 1; - slave->dst_master = 0; + slave->m_master = 0; + slave->p_master = 1; } spi_pdata.dma_filter = lpss_dma_filter; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 98862aa..5eea74d 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1454,13 +1454,13 @@ byt_serial_setup(struct serial_private *priv, return -EINVAL; } - rx_param->src_master = 1; - rx_param->dst_master = 0; + rx_param->m_master = 0; + rx_param->p_master = 1; dma->rxconf.src_maxburst = 16; - tx_param->src_master = 1; - tx_param->dst_master = 0; + tx_param->m_master = 0; + tx_param->p_master = 1; dma->txconf.dst_maxburst = 16; diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 10fe2a2..27e9ec8 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -86,7 +86,7 @@ struct pl08x_channel_data { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - const struct pl08x_channel_data *slave_channels; + struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_xfer_signal)(const struct pl08x_channel_data *); diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index 7145644..f2e538a 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -27,6 +27,7 @@ struct dw_dma; * @regs: memory mapped I/O space * @clk: hclk clock * @dw: struct dw_dma that is filed by dw_dma_probe() + * @pdata: pointer to platform data */ struct dw_dma_chip { struct device *dev; @@ -34,10 +35,12 @@ struct dw_dma_chip { void __iomem *regs; struct clk *clk; struct dw_dma *dw; + + const struct dw_dma_platform_data *pdata; }; /* Export to the platform drivers */ -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata); +int dw_dma_probe(struct dw_dma_chip *chip); int dw_dma_remove(struct dw_dma_chip *chip); /* DMA API extensions */ diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h index 34b98f2..3ae3000 100644 --- a/include/linux/dma/xilinx_dma.h +++ b/include/linux/dma/xilinx_dma.h @@ -41,6 +41,20 @@ struct xilinx_vdma_config { int ext_fsync; }; +/** + * enum xdma_ip_type: DMA IP type. + * + * XDMA_TYPE_AXIDMA: Axi dma ip. + * XDMA_TYPE_CDMA: Axi cdma ip. + * XDMA_TYPE_VDMA: Axi vdma ip. + * + */ +enum xdma_ip_type { + XDMA_TYPE_AXIDMA = 0, + XDMA_TYPE_CDMA, + XDMA_TYPE_VDMA, +}; + int xilinx_vdma_channel_set_config(struct dma_chan *dchan, struct xilinx_vdma_config *cfg); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 0174337..30de019 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -804,6 +804,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( sg_dma_address(&sg) = buf; sg_dma_len(&sg) = len; + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags, NULL); } @@ -812,6 +815,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, NULL); } @@ -823,6 +829,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg( enum dma_transfer_direction dir, unsigned long flags, struct rio_dma_ext *rio_ext) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, rio_ext); } @@ -833,6 +842,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_cyclic) + return NULL; + return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len, period_len, dir, flags); } @@ -841,6 +853,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_interleaved_dma) + return NULL; + return chan->device->device_prep_interleaved_dma(chan, xt, flags); } @@ -848,7 +863,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags) { - if (!chan || !chan->device) + if (!chan || !chan->device || !chan->device->device_prep_dma_memset) return NULL; return chan->device->device_prep_dma_memset(chan, dest, value, @@ -861,6 +876,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct scatterlist *src_sg, unsigned int src_nents, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_sg) + return NULL; + return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents, src_sg, src_nents, flags); } diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 03b6095..d15d8ba 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -21,15 +21,15 @@ * @dma_dev: required DMA master device * @src_id: src request line * @dst_id: dst request line - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. + * @m_master: memory master for transfers on allocated channel + * @p_master: peripheral master for transfers on allocated channel */ struct dw_dma_slave { struct device *dma_dev; u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; }; /** @@ -43,7 +43,7 @@ struct dw_dma_slave { * @block_size: Maximum block size supported by the controller * @nr_masters: Number of AHB masters supported by the controller * @data_width: Maximum data width supported by hardware per AHB master - * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) + * (in bytes, power of 2) */ struct dw_dma_platform_data { unsigned int nr_channels; @@ -55,7 +55,7 @@ struct dw_dma_platform_data { #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ unsigned char chan_priority; - unsigned short block_size; + unsigned int block_size; unsigned char nr_masters; unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; }; diff --git a/sound/soc/intel/common/sst-firmware.c b/sound/soc/intel/common/sst-firmware.c index ef4881e..2599352 100644 --- a/sound/soc/intel/common/sst-firmware.c +++ b/sound/soc/intel/common/sst-firmware.c @@ -203,7 +203,7 @@ static struct dw_dma_chip *dw_probe(struct device *dev, struct resource *mem, chip->dev = dev; - err = dw_dma_probe(chip, NULL); + err = dw_dma_probe(chip); if (err) return ERR_PTR(err); |