diff options
Diffstat (limited to 'arch/mips/cavium-octeon/dma-octeon.c')
-rw-r--r-- | arch/mips/cavium-octeon/dma-octeon.c | 581 |
1 files changed, 294 insertions, 287 deletions
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index d22b5a2..1abb66c 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -8,335 +8,342 @@ * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com> * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. * IP32 changes by Ilya. - * Cavium Networks: Create new dma setup for Cavium Networks Octeon based on - * the kernels original. + * Copyright (C) 2010 Cavium Networks, Inc. */ -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/string.h> #include <linux/dma-mapping.h> -#include <linux/platform_device.h> #include <linux/scatterlist.h> +#include <linux/bootmem.h> +#include <linux/swiotlb.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/mm.h> -#include <linux/cache.h> -#include <linux/io.h> +#include <asm/bootinfo.h> #include <asm/octeon/octeon.h> + +#ifdef CONFIG_PCI +#include <asm/octeon/pci-octeon.h> #include <asm/octeon/cvmx-npi-defs.h> #include <asm/octeon/cvmx-pci-defs.h> -#include <dma-coherence.h> +static dma_addr_t octeon_hole_phys_to_dma(phys_addr_t paddr) +{ + if (paddr >= CVMX_PCIE_BAR1_PHYS_BASE && paddr < (CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_PHYS_SIZE)) + return paddr - CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_RC_BASE; + else + return paddr; +} -#ifdef CONFIG_PCI -#include <asm/octeon/pci-octeon.h> -#endif +static phys_addr_t octeon_hole_dma_to_phys(dma_addr_t daddr) +{ + if (daddr >= CVMX_PCIE_BAR1_RC_BASE) + return daddr + CVMX_PCIE_BAR1_PHYS_BASE - CVMX_PCIE_BAR1_RC_BASE; + else + return daddr; +} + +static dma_addr_t octeon_gen1_phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + if (paddr >= 0x410000000ull && paddr < 0x420000000ull) + paddr -= 0x400000000ull; + return octeon_hole_phys_to_dma(paddr); +} -#define BAR2_PCI_ADDRESS 0x8000000000ul +static phys_addr_t octeon_gen1_dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + daddr = octeon_hole_dma_to_phys(daddr); -struct bar1_index_state { - int16_t ref_count; /* Number of PCI mappings using this index */ - uint16_t address_bits; /* Upper bits of physical address. This is - shifted 22 bits */ -}; + if (daddr >= 0x10000000ull && daddr < 0x20000000ull) + daddr += 0x400000000ull; -#ifdef CONFIG_PCI -static DEFINE_RAW_SPINLOCK(bar1_lock); -static struct bar1_index_state bar1_state[32]; -#endif + return daddr; +} -dma_addr_t octeon_map_dma_mem(struct device *dev, void *ptr, size_t size) +static dma_addr_t octeon_big_phys_to_dma(struct device *dev, phys_addr_t paddr) { -#ifndef CONFIG_PCI - /* Without PCI/PCIe this function can be called for Octeon internal - devices such as USB. These devices all support 64bit addressing */ + if (paddr >= 0x410000000ull && paddr < 0x420000000ull) + paddr -= 0x400000000ull; + + /* Anything in the BAR1 hole or above goes via BAR2 */ + if (paddr >= 0xf0000000ull) + paddr = OCTEON_BAR2_PCI_ADDRESS + paddr; + + return paddr; +} + +static phys_addr_t octeon_big_dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + if (daddr >= OCTEON_BAR2_PCI_ADDRESS) + daddr -= OCTEON_BAR2_PCI_ADDRESS; + + if (daddr >= 0x10000000ull && daddr < 0x20000000ull) + daddr += 0x400000000ull; + return daddr; +} + +static dma_addr_t octeon_small_phys_to_dma(struct device *dev, + phys_addr_t paddr) +{ + if (paddr >= 0x410000000ull && paddr < 0x420000000ull) + paddr -= 0x400000000ull; + + /* Anything not in the BAR1 range goes via BAR2 */ + if (paddr >= octeon_bar1_pci_phys && paddr < octeon_bar1_pci_phys + 0x8000000ull) + paddr = paddr - octeon_bar1_pci_phys; + else + paddr = OCTEON_BAR2_PCI_ADDRESS + paddr; + + return paddr; +} + +static phys_addr_t octeon_small_dma_to_phys(struct device *dev, + dma_addr_t daddr) +{ + if (daddr >= OCTEON_BAR2_PCI_ADDRESS) + daddr -= OCTEON_BAR2_PCI_ADDRESS; + else + daddr += octeon_bar1_pci_phys; + + if (daddr >= 0x10000000ull && daddr < 0x20000000ull) + daddr += 0x400000000ull; + return daddr; +} + +#endif /* CONFIG_PCI */ + +static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size, + direction, attrs); mb(); - return virt_to_phys(ptr); -#else - unsigned long flags; - uint64_t dma_mask; - int64_t start_index; - dma_addr_t result = -1; - uint64_t physical = virt_to_phys(ptr); - int64_t index; + return daddr; +} + +static int octeon_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, struct dma_attrs *attrs) +{ + int r = swiotlb_map_sg_attrs(dev, sg, nents, direction, attrs); mb(); - /* - * Use the DMA masks to determine the allowed memory - * region. For us it doesn't limit the actual memory, just the - * address visible over PCI. Devices with limits need to use - * lower indexed Bar1 entries. - */ - if (dev) { - dma_mask = dev->coherent_dma_mask; - if (dev->dma_mask) - dma_mask = *dev->dma_mask; - } else { - dma_mask = 0xfffffffful; - } + return r; +} - /* - * Platform devices, such as the internal USB, skip all - * translation and use Octeon physical addresses directly. - */ - if (!dev || dev->bus == &platform_bus_type) - return physical; +static void octeon_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) +{ + swiotlb_sync_single_for_device(dev, dma_handle, size, direction); + mb(); +} - switch (octeon_dma_bar_type) { - case OCTEON_DMA_BAR_TYPE_PCIE: - if (unlikely(physical < (16ul << 10))) - panic("dma_map_single: Not allowed to map first 16KB." - " It interferes with BAR0 special area\n"); - else if ((physical + size >= (256ul << 20)) && - (physical < (512ul << 20))) - panic("dma_map_single: Not allowed to map bootbus\n"); - else if ((physical + size >= 0x400000000ull) && - physical < 0x410000000ull) - panic("dma_map_single: " - "Attempt to map illegal memory address 0x%llx\n", - physical); - else if (physical >= 0x420000000ull) - panic("dma_map_single: " - "Attempt to map illegal memory address 0x%llx\n", - physical); - else if (physical >= CVMX_PCIE_BAR1_PHYS_BASE && - physical + size < (CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_PHYS_SIZE)) { - result = physical - CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_RC_BASE; - - if (((result+size-1) & dma_mask) != result+size-1) - panic("dma_map_single: Attempt to map address 0x%llx-0x%llx, which can't be accessed according to the dma mask 0x%llx\n", - physical, physical+size-1, dma_mask); - goto done; - } - - /* The 2nd 256MB is mapped at 256<<20 instead of 0x410000000 */ - if ((physical >= 0x410000000ull) && physical < 0x420000000ull) - result = physical - 0x400000000ull; - else - result = physical; - if (((result+size-1) & dma_mask) != result+size-1) - panic("dma_map_single: Attempt to map address " - "0x%llx-0x%llx, which can't be accessed " - "according to the dma mask 0x%llx\n", - physical, physical+size-1, dma_mask); - goto done; +static void octeon_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction direction) +{ + swiotlb_sync_sg_for_device(dev, sg, nelems, direction); + mb(); +} - case OCTEON_DMA_BAR_TYPE_BIG: -#ifdef CONFIG_64BIT - /* If the device supports 64bit addressing, then use BAR2 */ - if (dma_mask > BAR2_PCI_ADDRESS) { - result = physical + BAR2_PCI_ADDRESS; - goto done; - } -#endif - if (unlikely(physical < (4ul << 10))) { - panic("dma_map_single: Not allowed to map first 4KB. " - "It interferes with BAR0 special area\n"); - } else if (physical < (256ul << 20)) { - if (unlikely(physical + size > (256ul << 20))) - panic("dma_map_single: Requested memory spans " - "Bar0 0:256MB and bootbus\n"); - result = physical; - goto done; - } else if (unlikely(physical < (512ul << 20))) { - panic("dma_map_single: Not allowed to map bootbus\n"); - } else if (physical < (2ul << 30)) { - if (unlikely(physical + size > (2ul << 30))) - panic("dma_map_single: Requested memory spans " - "Bar0 512MB:2GB and BAR1\n"); - result = physical; - goto done; - } else if (physical < (2ul << 30) + (128 << 20)) { - /* Fall through */ - } else if (physical < - (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20)) { - if (unlikely - (physical + size > - (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20))) - panic("dma_map_single: Requested memory " - "extends past Bar1 (4GB-%luMB)\n", - OCTEON_PCI_BAR1_HOLE_SIZE); - result = physical; - goto done; - } else if ((physical >= 0x410000000ull) && - (physical < 0x420000000ull)) { - if (unlikely(physical + size > 0x420000000ull)) - panic("dma_map_single: Requested memory spans " - "non existant memory\n"); - /* BAR0 fixed mapping 256MB:512MB -> - * 16GB+256MB:16GB+512MB */ - result = physical - 0x400000000ull; - goto done; - } else { - /* Continued below switch statement */ - } - break; +static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *ret; - case OCTEON_DMA_BAR_TYPE_SMALL: -#ifdef CONFIG_64BIT - /* If the device supports 64bit addressing, then use BAR2 */ - if (dma_mask > BAR2_PCI_ADDRESS) { - result = physical + BAR2_PCI_ADDRESS; - goto done; - } + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); + +#ifdef CONFIG_ZONE_DMA + if (dev == NULL) + gfp |= __GFP_DMA; + else if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) + gfp |= __GFP_DMA; + else #endif - /* Continued below switch statement */ - break; +#ifdef CONFIG_ZONE_DMA32 + if (dev->coherent_dma_mask <= DMA_BIT_MASK(32)) + gfp |= __GFP_DMA32; + else +#endif + ; - default: - panic("dma_map_single: Invalid octeon_dma_bar_type\n"); - } + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; - /* Don't allow mapping to span multiple Bar entries. The hardware guys - won't guarantee that DMA across boards work */ - if (unlikely((physical >> 22) != ((physical + size - 1) >> 22))) - panic("dma_map_single: " - "Requested memory spans more than one Bar1 entry\n"); + ret = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG) - start_index = 31; - else if (unlikely(dma_mask < (1ul << 27))) - start_index = (dma_mask >> 22); - else - start_index = 31; - - /* Only one processor can access the Bar register at once */ - raw_spin_lock_irqsave(&bar1_lock, flags); - - /* Look through Bar1 for existing mapping that will work */ - for (index = start_index; index >= 0; index--) { - if ((bar1_state[index].address_bits == physical >> 22) && - (bar1_state[index].ref_count)) { - /* An existing mapping will work, use it */ - bar1_state[index].ref_count++; - if (unlikely(bar1_state[index].ref_count < 0)) - panic("dma_map_single: " - "Bar1[%d] reference count overflowed\n", - (int) index); - result = (index << 22) | (physical & ((1 << 22) - 1)); - /* Large BAR1 is offset at 2GB */ - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG) - result += 2ul << 30; - goto done_unlock; - } - } + mb(); - /* No existing mappings, look for a free entry */ - for (index = start_index; index >= 0; index--) { - if (unlikely(bar1_state[index].ref_count == 0)) { - union cvmx_pci_bar1_indexx bar1_index; - /* We have a free entry, use it */ - bar1_state[index].ref_count = 1; - bar1_state[index].address_bits = physical >> 22; - bar1_index.u32 = 0; - /* Address bits[35:22] sent to L2C */ - bar1_index.s.addr_idx = physical >> 22; - /* Don't put PCI accesses in L2. */ - bar1_index.s.ca = 1; - /* Endian Swap Mode */ - bar1_index.s.end_swp = 1; - /* Set '1' when the selected address range is valid. */ - bar1_index.s.addr_v = 1; - octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index), - bar1_index.u32); - /* An existing mapping will work, use it */ - result = (index << 22) | (physical & ((1 << 22) - 1)); - /* Large BAR1 is offset at 2GB */ - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG) - result += 2ul << 30; - goto done_unlock; - } - } + return ret; +} - pr_err("dma_map_single: " - "Can't find empty BAR1 index for physical mapping 0x%llx\n", - (unsigned long long) physical); +static void octeon_dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + int order = get_order(size); -done_unlock: - raw_spin_unlock_irqrestore(&bar1_lock, flags); -done: - pr_debug("dma_map_single 0x%llx->0x%llx\n", physical, result); - return result; -#endif + if (dma_release_from_coherent(dev, order, vaddr)) + return; + + swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -void octeon_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr) +static dma_addr_t octeon_unity_phys_to_dma(struct device *dev, phys_addr_t paddr) { -#ifndef CONFIG_PCI - /* - * Without PCI/PCIe this function can be called for Octeon internal - * devices such as USB. These devices all support 64bit addressing. - */ - return; -#else - unsigned long flags; - uint64_t index; + return paddr; +} +static phys_addr_t octeon_unity_dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + +struct octeon_dma_map_ops { + struct dma_map_ops dma_map_ops; + dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr); + phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr); +}; + +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), + struct octeon_dma_map_ops, + dma_map_ops); + + return ops->phys_to_dma(dev, paddr); +} +EXPORT_SYMBOL(phys_to_dma); + +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), + struct octeon_dma_map_ops, + dma_map_ops); + + return ops->dma_to_phys(dev, daddr); +} +EXPORT_SYMBOL(dma_to_phys); + +static struct octeon_dma_map_ops octeon_linear_dma_map_ops = { + .dma_map_ops = { + .alloc_coherent = octeon_dma_alloc_coherent, + .free_coherent = octeon_dma_free_coherent, + .map_page = octeon_dma_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = octeon_dma_map_sg, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = octeon_dma_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = octeon_dma_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, + .dma_supported = swiotlb_dma_supported + }, + .phys_to_dma = octeon_unity_phys_to_dma, + .dma_to_phys = octeon_unity_dma_to_phys +}; + +char *octeon_swiotlb; + +void __init plat_swiotlb_setup(void) +{ + int i; + phys_t max_addr; + phys_t addr_size; + size_t swiotlbsize; + unsigned long swiotlb_nslabs; + + max_addr = 0; + addr_size = 0; + + for (i = 0 ; i < boot_mem_map.nr_map; i++) { + struct boot_mem_map_entry *e = &boot_mem_map.map[i]; + if (e->type != BOOT_MEM_RAM) + continue; + + /* These addresses map low for PCI. */ + if (e->addr > 0x410000000ull) + continue; + + addr_size += e->size; + + if (max_addr < e->addr + e->size) + max_addr = e->addr + e->size; + + } + + swiotlbsize = PAGE_SIZE; + +#ifdef CONFIG_PCI /* - * Platform devices, such as the internal USB, skip all - * translation and use Octeon physical addresses directly. + * For OCTEON_DMA_BAR_TYPE_SMALL, size the iotlb at 1/4 memory + * size to a maximum of 64MB */ - if (dev->bus == &platform_bus_type) - return; + if (OCTEON_IS_MODEL(OCTEON_CN31XX) + || OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2)) { + swiotlbsize = addr_size / 4; + if (swiotlbsize > 64 * (1<<20)) + swiotlbsize = 64 * (1<<20); + } else if (max_addr > 0xf0000000ul) { + /* + * Otherwise only allocate a big iotlb if there is + * memory past the BAR1 hole. + */ + swiotlbsize = 64 * (1<<20); + } +#endif + swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT; + swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE); + swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT; + + octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); + swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1); + + mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; +} + +#ifdef CONFIG_PCI +static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = { + .dma_map_ops = { + .alloc_coherent = octeon_dma_alloc_coherent, + .free_coherent = octeon_dma_free_coherent, + .map_page = octeon_dma_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = octeon_dma_map_sg, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = octeon_dma_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = octeon_dma_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, + .dma_supported = swiotlb_dma_supported + }, +}; + +struct dma_map_ops *octeon_pci_dma_map_ops; + +void __init octeon_pci_dma_init(void) +{ switch (octeon_dma_bar_type) { case OCTEON_DMA_BAR_TYPE_PCIE: - /* Nothing to do, all mappings are static */ - goto done; - + _octeon_pci_dma_map_ops.phys_to_dma = octeon_gen1_phys_to_dma; + _octeon_pci_dma_map_ops.dma_to_phys = octeon_gen1_dma_to_phys; + break; case OCTEON_DMA_BAR_TYPE_BIG: -#ifdef CONFIG_64BIT - /* Nothing to do for addresses using BAR2 */ - if (dma_addr >= BAR2_PCI_ADDRESS) - goto done; -#endif - if (unlikely(dma_addr < (4ul << 10))) - panic("dma_unmap_single: Unexpect DMA address 0x%llx\n", - dma_addr); - else if (dma_addr < (2ul << 30)) - /* Nothing to do for addresses using BAR0 */ - goto done; - else if (dma_addr < (2ul << 30) + (128ul << 20)) - /* Need to unmap, fall through */ - index = (dma_addr - (2ul << 30)) >> 22; - else if (dma_addr < - (4ul << 30) - (OCTEON_PCI_BAR1_HOLE_SIZE << 20)) - goto done; /* Nothing to do for the rest of BAR1 */ - else - panic("dma_unmap_single: Unexpect DMA address 0x%llx\n", - dma_addr); - /* Continued below switch statement */ + _octeon_pci_dma_map_ops.phys_to_dma = octeon_big_phys_to_dma; + _octeon_pci_dma_map_ops.dma_to_phys = octeon_big_dma_to_phys; break; - case OCTEON_DMA_BAR_TYPE_SMALL: -#ifdef CONFIG_64BIT - /* Nothing to do for addresses using BAR2 */ - if (dma_addr >= BAR2_PCI_ADDRESS) - goto done; -#endif - index = dma_addr >> 22; - /* Continued below switch statement */ + _octeon_pci_dma_map_ops.phys_to_dma = octeon_small_phys_to_dma; + _octeon_pci_dma_map_ops.dma_to_phys = octeon_small_dma_to_phys; break; - default: - panic("dma_unmap_single: Invalid octeon_dma_bar_type\n"); + BUG(); } - - if (unlikely(index > 31)) - panic("dma_unmap_single: " - "Attempt to unmap an invalid address (0x%llx)\n", - dma_addr); - - raw_spin_lock_irqsave(&bar1_lock, flags); - bar1_state[index].ref_count--; - if (bar1_state[index].ref_count == 0) - octeon_npi_write32(CVMX_NPI_PCI_BAR1_INDEXX(index), 0); - else if (unlikely(bar1_state[index].ref_count < 0)) - panic("dma_unmap_single: Bar1[%u] reference count < 0\n", - (int) index); - raw_spin_unlock_irqrestore(&bar1_lock, flags); -done: - pr_debug("dma_unmap_single 0x%llx\n", dma_addr); - return; -#endif + octeon_pci_dma_map_ops = &_octeon_pci_dma_map_ops.dma_map_ops; } +#endif /* CONFIG_PCI */ |