diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 16:12:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 16:12:10 -0800 |
commit | 39cf7c398122ff6d7df13d2832810933d227ac59 (patch) | |
tree | 0020e65f058e20ed35701b0645a5809ea781a390 /drivers/iommu | |
parent | ab1228e42e71f5cb687c740c4c304f1d48bcf68a (diff) | |
parent | b67ad2f7c7514f94fe6bbd0cd86add445eb4e64a (diff) | |
download | op-kernel-dev-39cf7c398122ff6d7df13d2832810933d227ac59.zip op-kernel-dev-39cf7c398122ff6d7df13d2832810933d227ac59.tar.gz |
Merge tag 'iommu-updates-v4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
"This time including:
- A new IOMMU driver for s390 pci devices
- Common dma-ops support based on iommu-api for ARM64. The plan is
to use this as a basis for ARM32 and hopefully other architectures
as well in the future.
- MSI support for ARM-SMMUv3
- Cleanups and dead code removal in the AMD IOMMU driver
- Better RMRR handling for the Intel VT-d driver
- Various other cleanups and small fixes"
* tag 'iommu-updates-v4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (41 commits)
iommu/vt-d: Fix return value check of parse_ioapics_under_ir()
iommu/vt-d: Propagate error-value from ir_parse_ioapic_hpet_scope()
iommu/vt-d: Adjust the return value of the parse_ioapics_under_ir
iommu: Move default domain allocation to iommu_group_get_for_dev()
iommu: Remove is_pci_dev() fall-back from iommu_group_get_for_dev
iommu/arm-smmu: Switch to device_group call-back
iommu/fsl: Convert to device_group call-back
iommu: Add device_group call-back to x86 iommu drivers
iommu: Add generic_device_group() function
iommu: Export and rename iommu_group_get_for_pci_dev()
iommu: Revive device_group iommu-ops call-back
iommu/amd: Remove find_last_devid_on_pci()
iommu/amd: Remove first/last_device handling
iommu/amd: Initialize amd_iommu_last_bdf for DEV_ALL
iommu/amd: Cleanup buffer allocation
iommu/amd: Remove cmd_buf_size and evt_buf_size from struct amd_iommu
iommu/amd: Align DTE flag definitions
iommu/amd: Remove old alias handling code
iommu/amd: Set alias DTE in do_attach/do_detach
iommu/amd: WARN when __[attach|detach]_device are called with irqs enabled
...
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 15 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 173 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 120 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 13 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 155 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 132 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 524 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu_domain.c | 41 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 83 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 64 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 46 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.c | 58 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.h | 9 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 337 |
15 files changed, 1351 insertions, 421 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e1738f6..b9094e9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -48,6 +48,13 @@ config OF_IOMMU def_bool y depends on OF && IOMMU_API +# IOMMU-agnostic DMA-mapping layer +config IOMMU_DMA + bool + depends on NEED_SG_DMA_LENGTH + select IOMMU_API + select IOMMU_IOVA + config FSL_PAMU bool "Freescale IOMMU support" depends on PPC32 @@ -371,6 +378,7 @@ config ARM_SMMU_V3 depends on ARM64 && PCI select IOMMU_API select IOMMU_IO_PGTABLE_LPAE + select GENERIC_MSI_IRQ_DOMAIN help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. @@ -378,4 +386,11 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. +config S390_IOMMU + def_bool y if S390 && PCI + depends on S390 && PCI + select IOMMU_API + help + Support for the IOMMU API for s390 PCI devices. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index dc6f511..68faca02 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o @@ -24,3 +25,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o +obj-$(CONFIG_S390_IOMMU) += s390-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 532e2a2..0d533bb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops; struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct list_head dev_data_list; /* For global dev_data_list */ - struct list_head alias_list; /* Link alias-groups together */ - struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ @@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; - INIT_LIST_HEAD(&dev_data->alias_list); - dev_data->devid = devid; spin_lock_irqsave(&dev_data_list_lock, flags); @@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) return dev_data; } -static void free_dev_data(struct iommu_dev_data *dev_data) -{ - unsigned long flags; - - spin_lock_irqsave(&dev_data_list_lock, flags); - list_del(&dev_data->dev_data_list); - spin_unlock_irqrestore(&dev_data_list_lock, flags); - - kfree(dev_data); -} - static struct iommu_dev_data *search_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -311,73 +296,10 @@ out: iommu_group_put(group); } -static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) -{ - *(u16 *)data = alias; - return 0; -} - -static u16 get_alias(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid, ivrs_alias, pci_alias; - - devid = get_device_id(dev); - ivrs_alias = amd_iommu_alias_table[devid]; - pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); - - if (ivrs_alias == pci_alias) - return ivrs_alias; - - /* - * DMA alias showdown - * - * The IVRS is fairly reliable in telling us about aliases, but it - * can't know about every screwy device. If we don't have an IVRS - * reported alias, use the PCI reported alias. In that case we may - * still need to initialize the rlookup and dev_table entries if the - * alias is to a non-existent device. - */ - if (ivrs_alias == devid) { - if (!amd_iommu_rlookup_table[pci_alias]) { - amd_iommu_rlookup_table[pci_alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[pci_alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[pci_alias].data)); - } - - return pci_alias; - } - - pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " - "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, - PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), - PCI_FUNC(pci_alias)); - - /* - * If we don't have a PCI DMA alias and the IVRS alias is on the same - * bus, then the IVRS table may know about a quirk that we don't. - */ - if (pci_alias == devid && - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { - pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; - pdev->dma_alias_devfn = ivrs_alias & 0xff; - pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); - } - - return ivrs_alias; -} - static int iommu_init_device(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; - u16 alias; if (dev->archdata.iommu) return 0; @@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - alias = get_alias(dev); - - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; - - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; - - /* Add device to the alias_list */ - list_add(&dev_data->alias_list, &alias_data->alias_list); - } - if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev) iommu_group_remove_device(dev); - /* Unlink from alias, it may change if another device is re-plugged */ - dev_data->alias_data = NULL; - /* Remove dma-ops */ dev->archdata.dma_ops = NULL; @@ -633,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) while (head != tail) { iommu_print_event(iommu, iommu->evt_buf + head); - head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; } writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); @@ -783,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, u8 *target; target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); @@ -950,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu, u32 left, tail, head, next_tail; unsigned long flags; - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); - again: spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - left = (head - next_tail) % iommu->cmd_buf_size; + next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + left = (head - next_tail) % CMD_BUFFER_SIZE; if (left <= 2) { struct iommu_cmd sync_cmd; @@ -1114,11 +1013,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ret = iommu_flush_dte(iommu, dev_data->devid); + if (!ret && alias != dev_data->devid) + ret = iommu_flush_dte(iommu, alias); if (ret) return ret; @@ -1984,27 +1887,33 @@ static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu; + u16 alias; bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ats = dev_data->ats.enabled; /* Update data structures */ dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(dev_data->devid, domain, ats); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Flush the DTE entry */ + /* Update device table */ + set_dte_entry(dev_data->devid, domain, ats); + if (alias != dev_data->devid) + set_dte_entry(dev_data->devid, domain, ats); + device_flush_dte(dev_data); } static void do_detach(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; /* * First check if the device is still attached. It might already @@ -2016,6 +1925,7 @@ static void do_detach(struct iommu_dev_data *dev_data) return; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; @@ -2025,6 +1935,8 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); + if (alias != dev_data->devid) + clear_dte_entry(alias); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -2037,29 +1949,23 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct iommu_dev_data *head, *entry; int ret; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); + /* lock domain */ spin_lock(&domain->lock); - head = dev_data; - - if (head->alias_data != NULL) - head = head->alias_data; - - /* Now we have the root of the alias group, if any */ - ret = -EBUSY; - if (head->domain != NULL) + if (dev_data->domain != NULL) goto out_unlock; /* Attach alias group root */ - do_attach(head, domain); - - /* Attach other devices in the alias group */ - list_for_each_entry(entry, &head->alias_list, alias_list) - do_attach(entry, domain); + do_attach(dev_data, domain); ret = 0; @@ -2209,26 +2115,24 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct iommu_dev_data *dev_data) { - struct iommu_dev_data *head, *entry; struct protection_domain *domain; - unsigned long flags; - BUG_ON(!dev_data->domain); - - domain = dev_data->domain; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); - spin_lock_irqsave(&domain->lock, flags); + if (WARN_ON(!dev_data->domain)) + return; - head = dev_data; - if (head->alias_data != NULL) - head = head->alias_data; + domain = dev_data->domain; - list_for_each_entry(entry, &head->alias_list, alias_list) - do_detach(entry); + spin_lock(&domain->lock); - do_detach(head); + do_detach(dev_data); - spin_unlock_irqrestore(&domain->lock, flags); + spin_unlock(&domain->lock); } /* @@ -3198,6 +3102,7 @@ static const struct iommu_ops amd_iommu_ops = { .iova_to_phys = amd_iommu_iova_to_phys, .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, + .device_group = pci_device_group, .get_dm_regions = amd_iommu_get_dm_regions, .put_dm_regions = amd_iommu_put_dm_regions, .pgsize_bitmap = AMD_IOMMU_PGSIZES, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 9f86ecf..013bdff 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -408,20 +408,6 @@ static inline int ivhd_entry_length(u8 *ivhd) } /* - * This function reads the last device id the IOMMU has to handle from the PCI - * capability header for this IOMMU - */ -static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) -{ - u32 cap; - - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); - - return 0; -} - -/* * After reading the highest device id from the IOMMU PCI capability header * this function looks if there is a higher device id defined in the ACPI table */ @@ -433,14 +419,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) p += sizeof(*h); end += h->length; - find_last_devid_on_pci(PCI_BUS_NUM(h->devid), - PCI_SLOT(h->devid), - PCI_FUNC(h->devid), - h->cap_ptr); - while (p < end) { dev = (struct ivhd_entry *)p; switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(0xffff); + break; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: @@ -513,17 +498,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) * write commands to that buffer later and the IOMMU will execute them * asynchronously */ -static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +static int __init alloc_command_buffer(struct amd_iommu *iommu) { - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); - - if (cmd_buf == NULL) - return NULL; - - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; + iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(CMD_BUFFER_SIZE)); - return cmd_buf; + return iommu->cmd_buf ? 0 : -ENOMEM; } /* @@ -557,27 +537,20 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); - if (iommu->evt_buf == NULL) - return NULL; - - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; + return iommu->evt_buf ? 0 : -ENOMEM; } static void iommu_enable_event_buffer(struct amd_iommu *iommu) @@ -604,15 +577,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu) } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) +static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); - - if (iommu->ppr_log == NULL) - return NULL; + iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(PPR_LOG_SIZE)); - return iommu->ppr_log; + return iommu->ppr_log ? 0 : -ENOMEM; } static void iommu_enable_ppr_log(struct amd_iommu *iommu) @@ -835,20 +805,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" - " last device %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(iommu->first_device), - PCI_SLOT(iommu->first_device), - PCI_FUNC(iommu->first_device), - PCI_BUS_NUM(iommu->last_device), - PCI_SLOT(iommu->last_device), - PCI_FUNC(iommu->last_device), - e->flags); + DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = iommu->first_device; - dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, - e->flags, 0); + for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: @@ -1004,17 +964,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } -/* Initializes the device->iommu mapping for the driver */ -static int __init init_iommu_devices(struct amd_iommu *iommu) -{ - u32 i; - - for (i = iommu->first_device; i <= iommu->last_device; ++i) - set_iommu_for_device(iommu, i); - - return 0; -} - static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); @@ -1111,12 +1060,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; - iommu->cmd_buf = alloc_command_buffer(iommu); - if (!iommu->cmd_buf) + if (alloc_command_buffer(iommu)) return -ENOMEM; - iommu->evt_buf = alloc_event_buffer(iommu); - if (!iommu->evt_buf) + if (alloc_event_buffer(iommu)) return -ENOMEM; iommu->int_enabled = false; @@ -1135,8 +1082,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ amd_iommu_rlookup_table[iommu->devid] = NULL; - init_iommu_devices(iommu); - return 0; } @@ -1266,11 +1211,6 @@ static int iommu_init_pci(struct amd_iommu *iommu) pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, &misc); - iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_FD(range)); - iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_LD(range)); - if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; @@ -1308,11 +1248,8 @@ static int iommu_init_pci(struct amd_iommu *iommu) amd_iommu_v2_present = true; } - if (iommu_feature(iommu, FEATURE_PPR)) { - iommu->ppr_log = alloc_ppr_log(iommu); - if (!iommu->ppr_log) - return -ENOMEM; - } + if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; @@ -1758,11 +1695,8 @@ static void __init free_on_init_error(void) free_pages((unsigned long)irq_lookup_table, get_order(rlookup_table_size)); - if (amd_iommu_irq_cache) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - - } + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -2201,7 +2135,7 @@ int __init amd_iommu_detect(void) iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; - return 0; + return 1; } /**************************************************************************** diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 6a0bf1a..b08cf57 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -295,9 +295,9 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +#define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) -#define DTE_FLAG_IOTLB (0x01UL << 32) -#define DTE_FLAG_GV (0x01ULL << 55) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) @@ -517,11 +517,6 @@ struct amd_iommu { /* pci domain of this IOMMU */ u16 pci_seg; - /* first device this IOMMU handles. read from PCI */ - u16 first_device; - /* last device this IOMMU handles. read from PCI */ - u16 last_device; - /* start of exclusion range of that IOMMU */ u64 exclusion_start; /* length of exclusion range of that IOMMU */ @@ -529,11 +524,7 @@ struct amd_iommu { /* command buffer virtual address */ u8 *cmd_buf; - /* size of command buffer */ - u32 cmd_buf_size; - /* size of event buffer */ - u32 evt_buf_size; /* event buffer virtual address */ u8 *evt_buf; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 286e890..4e5118a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -26,8 +26,10 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/module.h> +#include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/of_platform.h> #include <linux/pci.h> #include <linux/platform_device.h> @@ -403,6 +405,31 @@ enum pri_resp { PRI_RESP_SUCC, }; +enum arm_smmu_msi_index { + EVTQ_MSI_INDEX, + GERROR_MSI_INDEX, + PRIQ_MSI_INDEX, + ARM_SMMU_MAX_MSIS, +}; + +static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { + [EVTQ_MSI_INDEX] = { + ARM_SMMU_EVTQ_IRQ_CFG0, + ARM_SMMU_EVTQ_IRQ_CFG1, + ARM_SMMU_EVTQ_IRQ_CFG2, + }, + [GERROR_MSI_INDEX] = { + ARM_SMMU_GERROR_IRQ_CFG0, + ARM_SMMU_GERROR_IRQ_CFG1, + ARM_SMMU_GERROR_IRQ_CFG2, + }, + [PRIQ_MSI_INDEX] = { + ARM_SMMU_PRIQ_IRQ_CFG0, + ARM_SMMU_PRIQ_IRQ_CFG1, + ARM_SMMU_PRIQ_IRQ_CFG2, + }, +}; + struct arm_smmu_cmdq_ent { /* Common fields */ u8 opcode; @@ -570,7 +597,6 @@ struct arm_smmu_device { unsigned int sid_bits; struct arm_smmu_strtab_cfg strtab_cfg; - struct list_head list; }; /* SMMU private data for an IOMMU group */ @@ -605,10 +631,6 @@ struct arm_smmu_domain { struct iommu_domain domain; }; -/* Our list of SMMU instances */ -static DEFINE_SPINLOCK(arm_smmu_devices_lock); -static LIST_HEAD(arm_smmu_devices); - struct arm_smmu_option_prop { u32 opt; const char *prop; @@ -1427,7 +1449,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int ret; - u16 asid; + int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; @@ -1439,10 +1461,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, &cfg->cdptr_dma, GFP_KERNEL); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + ret = -ENOMEM; goto out_free_asid; } - cfg->cd.asid = asid; + cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; @@ -1456,7 +1479,7 @@ out_free_asid: static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { - u16 vmid; + int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -1464,7 +1487,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, if (IS_ERR_VALUE(vmid)) return vmid; - cfg->vmid = vmid; + cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; return 0; @@ -1726,7 +1749,8 @@ static void __arm_smmu_release_pci_iommudata(void *data) static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) { struct device_node *of_node; - struct arm_smmu_device *curr, *smmu = NULL; + struct platform_device *smmu_pdev; + struct arm_smmu_device *smmu = NULL; struct pci_bus *bus = pdev->bus; /* Walk up to the root bus */ @@ -1739,14 +1763,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) return NULL; /* See if we can find an SMMU corresponding to the phandle */ - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev->of_node == of_node) { - smmu = curr; - break; - } - } - spin_unlock(&arm_smmu_devices_lock); + smmu_pdev = of_find_device_by_node(of_node); + if (smmu_pdev) + smmu = platform_get_drvdata(smmu_pdev); + of_node_put(of_node); return smmu; } @@ -1902,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = pci_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ @@ -2186,6 +2207,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 1, ARM_SMMU_POLL_TIMEOUT_US); } +static void arm_smmu_free_msis(void *data) +{ + struct device *dev = data; + platform_msi_domain_free_irqs(dev); +} + +static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + phys_addr_t doorbell; + struct device *dev = msi_desc_to_dev(desc); + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index]; + + doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; + doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT; + + writeq_relaxed(doorbell, smmu->base + cfg[0]); + writel_relaxed(msg->data, smmu->base + cfg[1]); + writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); +} + +static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) +{ + struct msi_desc *desc; + int ret, nvec = ARM_SMMU_MAX_MSIS; + struct device *dev = smmu->dev; + + /* Clear the MSI address regs */ + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + else + nvec--; + + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) + return; + + /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ + ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + return; + } + + for_each_msi_entry(desc, dev) { + switch (desc->platform.msi_index) { + case EVTQ_MSI_INDEX: + smmu->evtq.q.irq = desc->irq; + break; + case GERROR_MSI_INDEX: + smmu->gerr_irq = desc->irq; + break; + case PRIQ_MSI_INDEX: + smmu->priq.q.irq = desc->irq; + break; + default: /* Unknown */ + continue; + } + } + + /* Add callback to free MSIs on teardown */ + devm_add_action(dev, arm_smmu_free_msis, dev); +} + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { int ret, irq; @@ -2199,11 +2286,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) return ret; } - /* Clear the MSI address regs */ - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + arm_smmu_setup_msis(smmu); - /* Request wired interrupt lines */ + /* Request interrupt lines */ irq = smmu->evtq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2232,8 +2317,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) } if (smmu->features & ARM_SMMU_FEAT_PRI) { - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); - irq = smmu->priq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2612,16 +2695,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (ret) return ret; + /* Record our private device structure */ + platform_set_drvdata(pdev, smmu); + /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) goto out_free_structures; - /* Record our private device structure */ - INIT_LIST_HEAD(&smmu->list); - spin_lock(&arm_smmu_devices_lock); - list_add(&smmu->list, &arm_smmu_devices); - spin_unlock(&arm_smmu_devices_lock); return 0; out_free_structures: @@ -2631,21 +2712,7 @@ out_free_structures: static int arm_smmu_device_remove(struct platform_device *pdev) { - struct arm_smmu_device *curr, *smmu = NULL; - struct device *dev = &pdev->dev; - - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev == dev) { - smmu = curr; - list_del(&smmu->list); - break; - } - } - spin_unlock(&arm_smmu_devices_lock); - - if (!smmu) - return -ENODEV; + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); arm_smmu_free_structures(smmu); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 48a39df..47dc7a7 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -70,6 +70,18 @@ ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ ? 0x400 : 0)) +#ifdef CONFIG_64BIT +#define smmu_writeq writeq_relaxed +#else +#define smmu_writeq(reg64, addr) \ + do { \ + u64 __val = (reg64); \ + void __iomem *__addr = (addr); \ + writel_relaxed(__val >> 32, __addr + 4); \ + writel_relaxed(__val, __addr); \ + } while (0) +#endif + /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 #define sCR0_CLIENTPD (1 << 0) @@ -185,10 +197,8 @@ #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_CB_TTBCR2 0x10 -#define ARM_SMMU_CB_TTBR0_LO 0x20 -#define ARM_SMMU_CB_TTBR0_HI 0x24 -#define ARM_SMMU_CB_TTBR1_LO 0x28 -#define ARM_SMMU_CB_TTBR1_HI 0x2c +#define ARM_SMMU_CB_TTBR0 0x20 +#define ARM_SMMU_CB_TTBR1 0x28 #define ARM_SMMU_CB_TTBCR 0x30 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -226,7 +236,7 @@ #define TTBCR2_SEP_SHIFT 15 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) -#define TTBRn_HI_ASID_SHIFT 16 +#define TTBRn_ASID_SHIFT 48 #define FSR_MULTI (1 << 31) #define FSR_SS (1 << 30) @@ -695,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { u32 reg; + u64 reg64; bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; - void __iomem *cb_base, *gr0_base, *gr1_base; + void __iomem *cb_base, *gr1_base; - gr0_base = ARM_SMMU_GR0(smmu); gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); @@ -738,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); + + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1); } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); } /* TTBCR */ @@ -1212,17 +1217,15 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; -#ifdef CONFIG_64BIT if (smmu->version == ARM_SMMU_V2) - writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); + smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR); else -#endif writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { dev_err(dev, - "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", + "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); return ops->iova_to_phys(ops, iova); } @@ -1292,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data) kfree(data); } -static int arm_smmu_add_pci_device(struct pci_dev *pdev) +static int arm_smmu_init_pci_device(struct pci_dev *pdev, + struct iommu_group *group) { - int i, ret; - u16 sid; - struct iommu_group *group; struct arm_smmu_master_cfg *cfg; - - group = iommu_group_get_for_dev(&pdev->dev); - if (IS_ERR(group)) - return PTR_ERR(group); + u16 sid; + int i; cfg = iommu_group_get_iommudata(group); if (!cfg) { cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) { - ret = -ENOMEM; - goto out_put_group; - } + if (!cfg) + return -ENOMEM; iommu_group_set_iommudata(group, cfg, __arm_smmu_release_pci_iommudata); } - if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) { - ret = -ENOSPC; - goto out_put_group; - } + if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) + return -ENOSPC; /* * Assume Stream ID == Requester ID for now. @@ -1334,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev) cfg->streamids[cfg->num_streamids++] = sid; return 0; -out_put_group: - iommu_group_put(group); - return ret; } -static int arm_smmu_add_platform_device(struct device *dev) +static int arm_smmu_init_platform_device(struct device *dev, + struct iommu_group *group) { - struct iommu_group *group; - struct arm_smmu_master *master; struct arm_smmu_device *smmu = find_smmu_for_device(dev); + struct arm_smmu_master *master; if (!smmu) return -ENODEV; @@ -1352,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev) if (!master) return -ENODEV; - /* No automatic group creation for platform devices */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - iommu_group_set_iommudata(group, &master->cfg, NULL); - return iommu_group_add_device(group, dev); + + return 0; } static int arm_smmu_add_device(struct device *dev) { - if (dev_is_pci(dev)) - return arm_smmu_add_pci_device(to_pci_dev(dev)); + struct iommu_group *group; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); - return arm_smmu_add_platform_device(dev); + return 0; } static void arm_smmu_remove_device(struct device *dev) @@ -1374,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev) iommu_group_remove_device(dev); } +static struct iommu_group *arm_smmu_device_group(struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + + if (IS_ERR(group)) + return group; + + if (dev_is_pci(dev)) + ret = arm_smmu_init_pci_device(to_pci_dev(dev), group); + else + ret = arm_smmu_init_platform_device(dev, group); + + if (ret) { + iommu_group_put(group); + group = ERR_PTR(ret); + } + + return group; +} + static int arm_smmu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { @@ -1430,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = arm_smmu_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c new file mode 100644 index 0000000..3a20db4 --- /dev/null +++ b/drivers/iommu/dma-iommu.c @@ -0,0 +1,524 @@ +/* + * A fairly generic DMA-API to IOMMU-API glue layer. + * + * Copyright (C) 2014-2015 ARM Ltd. + * + * based in part on arch/arm/mm/dma-mapping.c: + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/device.h> +#include <linux/dma-iommu.h> +#include <linux/huge_mm.h> +#include <linux/iommu.h> +#include <linux/iova.h> +#include <linux/mm.h> + +int iommu_dma_init(void) +{ + return iova_cache_get(); +} + +/** + * iommu_get_dma_cookie - Acquire DMA-API resources for a domain + * @domain: IOMMU domain to prepare for DMA-API usage + * + * IOMMU drivers should normally call this from their domain_alloc + * callback when domain->type == IOMMU_DOMAIN_DMA. + */ +int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad; + + if (domain->iova_cookie) + return -EEXIST; + + iovad = kzalloc(sizeof(*iovad), GFP_KERNEL); + domain->iova_cookie = iovad; + + return iovad ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(iommu_get_dma_cookie); + +/** + * iommu_put_dma_cookie - Release a domain's DMA mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * + * IOMMU drivers should normally call this from their domain_free callback. + */ +void iommu_put_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad = domain->iova_cookie; + + if (!iovad) + return; + + put_iova_domain(iovad); + kfree(iovad); + domain->iova_cookie = NULL; +} +EXPORT_SYMBOL(iommu_put_dma_cookie); + +/** + * iommu_dma_init_domain - Initialise a DMA mapping domain + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * @base: IOVA at which the mappable address space starts + * @size: Size of IOVA space + * + * @base and @size should be exact multiples of IOMMU page granularity to + * avoid rounding surprises. If necessary, we reserve the page at address 0 + * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but + * any change which could make prior IOVAs invalid will fail. + */ +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long order, base_pfn, end_pfn; + + if (!iovad) + return -ENODEV; + + /* Use the smallest supported page size for IOVA granularity */ + order = __ffs(domain->ops->pgsize_bitmap); + base_pfn = max_t(unsigned long, 1, base >> order); + end_pfn = (base + size - 1) >> order; + + /* Check the domain allows at least some access to the device... */ + if (domain->geometry.force_aperture) { + if (base > domain->geometry.aperture_end || + base + size <= domain->geometry.aperture_start) { + pr_warn("specified DMA range outside IOMMU capability\n"); + return -EFAULT; + } + /* ...then finally give it a kicking to make sure it fits */ + base_pfn = max_t(unsigned long, base_pfn, + domain->geometry.aperture_start >> order); + end_pfn = min_t(unsigned long, end_pfn, + domain->geometry.aperture_end >> order); + } + + /* All we can safely do with an existing domain is enlarge it */ + if (iovad->start_pfn) { + if (1UL << order != iovad->granule || + base_pfn != iovad->start_pfn || + end_pfn < iovad->dma_32bit_pfn) { + pr_warn("Incompatible range for DMA domain\n"); + return -EFAULT; + } + iovad->dma_32bit_pfn = end_pfn; + } else { + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + } + return 0; +} +EXPORT_SYMBOL(iommu_dma_init_domain); + +/** + * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags + * @dir: Direction of DMA transfer + * @coherent: Is the DMA master cache-coherent? + * + * Return: corresponding IOMMU API page protection flags + */ +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, + dma_addr_t dma_limit) +{ + unsigned long shift = iova_shift(iovad); + unsigned long length = iova_align(iovad, size) >> shift; + + /* + * Enforce size-alignment to be safe - there could perhaps be an + * attribute to control this per-device, or at least per-domain... + */ + return alloc_iova(iovad, length, dma_limit >> shift, true); +} + +/* The IOVA allocator knows what we mapped, so just unmap whatever that was */ +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long shift = iova_shift(iovad); + unsigned long pfn = dma_addr >> shift; + struct iova *iova = find_iova(iovad, pfn); + size_t size; + + if (WARN_ON(!iova)) + return; + + size = iova_size(iova) << shift; + size -= iommu_unmap(domain, pfn << shift, size); + /* ...and if we can't, then something is horribly, horribly wrong */ + WARN_ON(size > 0); + __free_iova(iovad, iova); +} + +static void __iommu_dma_free_pages(struct page **pages, int count) +{ + while (count--) + __free_page(pages[count]); + kvfree(pages); +} + +static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) +{ + struct page **pages; + unsigned int i = 0, array_size = count * sizeof(*pages); + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + /* IOMMU can map any pages, so himem can also be used here */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + struct page *page = NULL; + int j, order = __fls(count); + + /* + * Higher-order allocations are a convenience rather + * than a necessity, hence using __GFP_NORETRY until + * falling back to single-page allocations. + */ + for (order = min(order, MAX_ORDER); order > 0; order--) { + page = alloc_pages(gfp | __GFP_NORETRY, order); + if (!page) + continue; + if (PageCompound(page)) { + if (!split_huge_page(page)) + break; + __free_pages(page, order); + } else { + split_page(page, order); + break; + } + } + if (!page) + page = alloc_page(gfp); + if (!page) { + __iommu_dma_free_pages(pages, i); + return NULL; + } + j = 1 << order; + count -= j; + while (j--) + pages[i++] = page++; + } + return pages; +} + +/** + * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc() + * @dev: Device which owns this buffer + * @pages: Array of buffer pages as returned by iommu_dma_alloc() + * @size: Size of buffer in bytes + * @handle: DMA address of buffer + * + * Frees both the pages associated with the buffer, and the array + * describing them + */ +void iommu_dma_free(struct device *dev, struct page **pages, size_t size, + dma_addr_t *handle) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); + __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); + *handle = DMA_ERROR_CODE; +} + +/** + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space + * @dev: Device to allocate memory for. Must be a real device + * attached to an iommu_dma_domain + * @size: Size of buffer in bytes + * @gfp: Allocation flags + * @prot: IOMMU mapping flags + * @handle: Out argument for allocated DMA handle + * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the + * given VA/PA are visible to the given non-coherent device. + * + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, + * but an IOMMU which supports smaller pages might not map the whole thing. + * + * Return: Array of struct page pointers describing the buffer, + * or NULL on failure. + */ +struct page **iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, int prot, dma_addr_t *handle, + void (*flush_page)(struct device *, const void *, phys_addr_t)) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct page **pages; + struct sg_table sgt; + dma_addr_t dma_addr; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + *handle = DMA_ERROR_CODE; + + pages = __iommu_dma_alloc_pages(count, gfp); + if (!pages) + return NULL; + + iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); + if (!iova) + goto out_free_pages; + + size = iova_align(iovad, size); + if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) + goto out_free_iova; + + if (!(prot & IOMMU_CACHE)) { + struct sg_mapping_iter miter; + /* + * The CPU-centric flushing implied by SG_MITER_TO_SG isn't + * sufficient here, so skip it by using the "wrong" direction. + */ + sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG); + while (sg_miter_next(&miter)) + flush_page(dev, miter.addr, page_to_phys(miter.page)); + sg_miter_stop(&miter); + } + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) + < size) + goto out_free_sg; + + *handle = dma_addr; + sg_free_table(&sgt); + return pages; + +out_free_sg: + sg_free_table(&sgt); +out_free_iova: + __free_iova(iovad, iova); +out_free_pages: + __iommu_dma_free_pages(pages, count); + return NULL; +} + +/** + * iommu_dma_mmap - Map a buffer into provided user VMA + * @pages: Array representing buffer from iommu_dma_alloc() + * @size: Size of buffer in bytes + * @vma: VMA describing requested userspace mapping + * + * Maps the pages of the buffer in @pages into @vma. The caller is responsible + * for verifying the correct size and protection of @vma beforehand. + */ + +int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) +{ + unsigned long uaddr = vma->vm_start; + unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT; + int ret = -ENXIO; + + for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) { + ret = vm_insert_page(vma, uaddr, pages[i]); + if (ret) + break; + uaddr += PAGE_SIZE; + } + return ret; +} + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int prot) +{ + dma_addr_t dma_addr; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + phys_addr_t phys = page_to_phys(page) + offset; + size_t iova_off = iova_offset(iovad, phys); + size_t len = iova_align(iovad, size + iova_off); + struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); + + if (!iova) + return DMA_ERROR_CODE; + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { + __free_iova(iovad, iova); + return DMA_ERROR_CODE; + } + return dma_addr + iova_off; +} + +void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * Handling IOVA concatenation can come later, if needed + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + /* Un-swizzling the fields here, hence the naming mismatch */ + unsigned int s_offset = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_dma_len = s->length; + + s->offset = s_offset; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_offset; + dma_addr += s_dma_len; + } + return i; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_ERROR_CODE) + s->offset = sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_len(s) = 0; + } +} + +/* + * The DMA API client is passing in a scatterlist which could describe + * any old buffer layout, but the IOMMU API requires everything to be + * aligned to IOMMU pages. Hence the need for this complicated bit of + * impedance-matching, to be able to hand off a suitably-aligned list, + * but still preserve the original offsets and sizes for the caller. + */ +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int prot) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct scatterlist *s, *prev = NULL; + dma_addr_t dma_addr; + size_t iova_len = 0; + int i; + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * hiding the original data in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_offset = iova_offset(iovad, s->offset); + size_t s_length = s->length; + + sg_dma_address(s) = s->offset; + sg_dma_len(s) = s_length; + s->offset -= s_offset; + s_length = iova_align(iovad, s_length + s_offset); + s->length = s_length; + + /* + * The simple way to avoid the rare case of a segment + * crossing the boundary mask is to pad the previous one + * to end at a naturally-aligned IOVA for this one's size, + * at the cost of potentially over-allocating a little. + */ + if (prev) { + size_t pad_len = roundup_pow_of_two(s_length); + + pad_len = (pad_len - iova_len) & (pad_len - 1); + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); + if (!iova) + goto out_restore_sg; + + /* + * We'll leave any physical concatenation to the IOMMU driver's + * implementation - it knows better than we do. + */ + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) + goto out_free_iova; + + return __finalise_sg(dev, sg, nents, dma_addr); + +out_free_iova: + __free_iova(iovad, iova); +out_restore_sg: + __invalidate_sg(sg, nents); + return 0; +} + +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, so this is incredibly easy. + */ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); +} + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + /* + * 'Special' IOMMUs which don't have the same addressing capability + * as the CPU will have to wait until we have some way to query that + * before they'll be able to use this framework. + */ + return 1; +} + +int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == DMA_ERROR_CODE; +} diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 1d45293..da0e1e3 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ if (pci_endpt_partioning) { - group = iommu_group_get_for_dev(&pdev->dev); + group = pci_device_group(&pdev->dev); /* * PCIe controller is not a paritionable entity @@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) return group; } -static int fsl_pamu_add_device(struct device *dev) +static struct iommu_group *fsl_pamu_device_group(struct device *dev) { struct iommu_group *group = ERR_PTR(-ENODEV); - struct pci_dev *pdev; - const u32 *prop; - int ret = 0, len; + int len; /* * For platform devices we allocate a separate group for * each of the devices. */ - if (dev_is_pci(dev)) { - pdev = to_pci_dev(dev); - /* Don't create device groups for virtual PCI bridges */ - if (pdev->subordinate) - return 0; + if (dev_is_pci(dev)) + group = get_pci_device_group(to_pci_dev(dev)); + else if (of_get_property(dev->of_node, "fsl,liodn", &len)) + group = get_device_iommu_group(dev); - group = get_pci_device_group(pdev); + return group; +} - } else { - prop = of_get_property(dev->of_node, "fsl,liodn", &len); - if (prop) - group = get_device_iommu_group(dev); - } +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); - /* - * Check if device has already been added to an iommu group. - * Group could have already been created for a PCI device in - * the iommu_group_get_for_dev path. - */ - if (!dev->iommu_group) - ret = iommu_group_add_device(group, dev); - iommu_group_put(group); - return ret; + + return 0; } static void fsl_pamu_remove_device(struct device *dev) @@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_get_attr = fsl_pamu_get_domain_attr, .add_device = fsl_pamu_add_device, .remove_device = fsl_pamu_remove_device, + .device_group = fsl_pamu_device_group, }; int __init pamu_domain_init(void) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6a10d97..7cf80c1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -34,6 +34,7 @@ #include <linux/mempool.h> #include <linux/memory.h> #include <linux/timer.h> +#include <linux/io.h> #include <linux/iova.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> @@ -2506,17 +2507,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) +static int domain_prepare_identity_map(struct device *dev, + struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - /* For _hardware_ passthrough, don't bother. But for software passthrough, we do it anyway -- it may indicate a memory range which is reserved in E820, so which didn't get set @@ -2536,8 +2531,7 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } if (end >> agaw_to_width(domain->agaw)) { @@ -2547,18 +2541,27 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } - ret = iommu_domain_identity_map(domain, start, end); - if (ret) - goto error; + return iommu_domain_identity_map(domain, start, end); +} - return 0; +static int iommu_prepare_identity_map(struct device *dev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = domain_prepare_identity_map(dev, domain, start, end); + if (ret) + domain_exit(domain); - error: - domain_exit(domain); return ret; } @@ -2884,18 +2887,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu) } static int copy_context_table(struct intel_iommu *iommu, - struct root_entry __iomem *old_re, + struct root_entry *old_re, struct context_entry **tbl, int bus, bool ext) { int tbl_idx, pos = 0, idx, devfn, ret = 0, did; - struct context_entry __iomem *old_ce = NULL; struct context_entry *new_ce = NULL, ce; + struct context_entry *old_ce = NULL; struct root_entry re; phys_addr_t old_ce_phys; tbl_idx = ext ? bus * 2 : bus; - memcpy_fromio(&re, old_re, sizeof(re)); + memcpy(&re, old_re, sizeof(re)); for (devfn = 0; devfn < 256; devfn++) { /* First calculate the correct index */ @@ -2930,7 +2933,8 @@ static int copy_context_table(struct intel_iommu *iommu, } ret = -ENOMEM; - old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE); + old_ce = memremap(old_ce_phys, PAGE_SIZE, + MEMREMAP_WB); if (!old_ce) goto out; @@ -2942,7 +2946,7 @@ static int copy_context_table(struct intel_iommu *iommu, } /* Now copy the context entry */ - memcpy_fromio(&ce, old_ce + idx, sizeof(ce)); + memcpy(&ce, old_ce + idx, sizeof(ce)); if (!__context_present(&ce)) continue; @@ -2978,7 +2982,7 @@ static int copy_context_table(struct intel_iommu *iommu, __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); out_unmap: - iounmap(old_ce); + memunmap(old_ce); out: return ret; @@ -2986,8 +2990,8 @@ out: static int copy_translation_tables(struct intel_iommu *iommu) { - struct root_entry __iomem *old_rt; struct context_entry **ctxt_tbls; + struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; unsigned long flags; @@ -3012,7 +3016,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (!old_rt_phys) return -EINVAL; - old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE); + old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); if (!old_rt) return -ENOMEM; @@ -3061,7 +3065,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) ret = 0; out_unmap: - iounmap(old_rt); + memunmap(old_rt); return ret; } @@ -3329,7 +3333,10 @@ static struct iova *intel_alloc_iova(struct device *dev, static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) { + struct dmar_rmrr_unit *rmrr; struct dmar_domain *domain; + struct device *i_dev; + int i, ret; domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { @@ -3338,6 +3345,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) return NULL; } + /* We have a new domain - setup possible RMRRs for the device */ + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, i_dev) { + if (i_dev != dev) + continue; + + ret = domain_prepare_identity_map(dev, domain, + rmrr->base_address, + rmrr->end_address); + if (ret) + dev_err(dev, "Mapping reserved region failed\n"); + } + } + rcu_read_unlock(); + return domain; } @@ -5084,6 +5108,7 @@ static const struct iommu_ops intel_iommu_ops = { .iova_to_phys = intel_iommu_iova_to_phys, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, + .device_group = pci_device_group, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9ec4e0d..1fae188 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit(&irte->low, irte_modified->low); - set_64bit(&irte->high, irte_modified->high); +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) + if ((irte->pst == 1) || (irte_modified->pst == 1)) { + bool ret; + + ret = cmpxchg_double(&irte->low, &irte->high, + irte->low, irte->high, + irte_modified->low, irte_modified->high); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + } else +#endif + { + set_64bit(&irte->low, irte_modified->low); + set_64bit(&irte->high, irte_modified->high); + } __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); @@ -384,7 +402,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) static int iommu_load_old_irte(struct intel_iommu *iommu) { - struct irte __iomem *old_ir_table; + struct irte *old_ir_table; phys_addr_t irt_phys; unsigned int i; size_t size; @@ -408,12 +426,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); /* Map the old IR table */ - old_ir_table = ioremap_cache(irt_phys, size); + old_ir_table = memremap(irt_phys, size, MEMREMAP_WB); if (!old_ir_table) return -ENOMEM; /* Copy data over */ - memcpy_fromio(iommu->ir_table->base, old_ir_table, size); + memcpy(iommu->ir_table->base, old_ir_table, size); __iommu_flush_cache(iommu, iommu->ir_table->base, size); @@ -426,7 +444,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) bitmap_set(iommu->ir_table->bitmap, i, 1); } - iounmap(old_ir_table); + memunmap(old_ir_table); return 0; } @@ -672,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void) if (!dmar_ir_support()) return -ENODEV; - if (parse_ioapics_under_ir() != 1) { + if (parse_ioapics_under_ir()) { pr_info("Not enabling interrupt remapping\n"); goto error; } @@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void) struct intel_iommu *iommu; if (!disable_irq_post) { - intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; + /* + * If IRTE is in posted format, the 'pda' field goes across the + * 64-bit boundary, we need use cmpxchg16b to atomically update + * it. We only expose posted-interrupt when X86_FEATURE_CX16 + * is supported. Actually, hardware platforms supporting PI + * should have X86_FEATURE_CX16 support, this has been confirmed + * with Intel hardware guys. + */ + if ( cpu_has_cx16 ) + intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; for_each_iommu(iommu, drhd) if (!cap_pi_support(iommu->cap)) { @@ -907,16 +934,21 @@ static int __init parse_ioapics_under_ir(void) bool ir_supported = false; int ioapic_idx; - for_each_iommu(iommu, drhd) - if (ecap_ir_support(iommu->ecap)) { - if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) - return -1; + for_each_iommu(iommu, drhd) { + int ret; - ir_supported = true; - } + if (!ecap_ir_support(iommu->ecap)) + continue; + + ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); + if (ret) + return ret; + + ir_supported = true; + } if (!ir_supported) - return 0; + return -ENODEV; for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { int ioapic_id = mpc_ioapic_id(ioapic_idx); @@ -928,7 +960,7 @@ static int __init parse_ioapics_under_ir(void) } } - return 1; + return 0; } static int __init ir_dev_scope_init(void) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 049df49..abae363 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -728,16 +728,35 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) } /* + * Generic device_group call-back function. It just allocates one + * iommu-group per device. + */ +struct iommu_group *generic_device_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return NULL; + + return group; +} + +/* * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. */ -static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +struct iommu_group *pci_device_group(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct group_for_pci_data data; struct pci_bus *bus; struct iommu_group *group = NULL; u64 devfns[4] = { 0 }; + if (WARN_ON(!dev_is_pci(dev))) + return ERR_PTR(-EINVAL); + /* * Find the upstream DMA alias for the device. A device must not * be aliased due to topology in order to have its own IOMMU group. @@ -791,14 +810,6 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) if (IS_ERR(group)) return NULL; - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. - */ - group->default_domain = __iommu_domain_alloc(pdev->dev.bus, - IOMMU_DOMAIN_DMA); - group->domain = group->default_domain; - return group; } @@ -814,6 +825,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) */ struct iommu_group *iommu_group_get_for_dev(struct device *dev) { + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; int ret; @@ -821,14 +833,24 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - if (!dev_is_pci(dev)) - return ERR_PTR(-EINVAL); + group = ERR_PTR(-EINVAL); - group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + if (ops && ops->device_group) + group = ops->device_group(dev); if (IS_ERR(group)) return group; + /* + * Try to allocate a default domain - needs support from the + * IOMMU driver. + */ + if (!group->default_domain) { + group->default_domain = __iommu_domain_alloc(dev->bus, + IOMMU_DOMAIN_DMA); + group->domain = group->default_domain; + } + ret = iommu_group_add_device(group, dev); if (ret) { iommu_group_put(group); diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 36d0033..3dc5b65 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -26,6 +26,8 @@ #include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> #include <asm/cacheflush.h> @@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev) } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); +static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable) +{ + u32 val, mask; + + if (!obj->syscfg) + return; + + mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT)); + val = enable ? mask : 0; + regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val); +} + static void __iommu_set_twl(struct omap_iommu *obj, bool on) { u32 l = iommu_read_reg(obj, MMU_CNTL); @@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj) iommu_write_reg(obj, pa, MMU_TTB); + dra7_cfg_dspsys_mmu(obj, true); + if (obj->has_bus_err_back) iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); @@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); + dra7_cfg_dspsys_mmu(obj, false); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, + struct omap_iommu *obj) +{ + struct device_node *np = pdev->dev.of_node; + int ret; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return 0; + + if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) { + dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n"); + return -EINVAL; + } + + obj->syscfg = + syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig"); + if (IS_ERR(obj->syscfg)) { + /* can fail with -EPROBE_DEFER */ + ret = PTR_ERR(obj->syscfg); + return ret; + } + + if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1, + &obj->id)) { + dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n"); + return -EINVAL; + } + + if (obj->id != 0 && obj->id != 1) { + dev_err(&pdev->dev, "invalid IOMMU instance id\n"); + return -EINVAL; + } + + return 0; +} + /* * OMAP Device MMU(IOMMU) detection */ @@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev) if (IS_ERR(obj->regbase)) return PTR_ERR(obj->regbase); + err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj); + if (err) + return err; + irq = platform_get_irq(pdev, 0); if (irq < 0) return -ENODEV; @@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = { { .compatible = "ti,omap2-iommu" }, { .compatible = "ti,omap4-iommu" }, { .compatible = "ti,dra7-iommu" }, + { .compatible = "ti,dra7-dsp-iommu" }, {}, }; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a656df2..59628e5 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -30,6 +30,7 @@ struct iotlb_entry { struct omap_iommu { const char *name; void __iomem *regbase; + struct regmap *syscfg; struct device *dev; struct iommu_domain *domain; struct dentry *debug_dir; @@ -48,6 +49,7 @@ struct omap_iommu { void *ctx; /* iommu context: registres saved area */ int has_bus_err_back; + u32 id; }; struct cr_regs { @@ -159,6 +161,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) /* + * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP) + */ +#define DSP_SYS_REVISION 0x00 +#define DSP_SYS_MMU_CONFIG 0x18 +#define DSP_SYS_MMU_CONFIG_EN_SHIFT 4 + +/* * utilities for super page(16MB, 1MB, 64KB and 4KB) */ diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c new file mode 100644 index 0000000..cbe198c --- /dev/null +++ b/drivers/iommu/s390-iommu.c @@ -0,0 +1,337 @@ +/* + * IOMMU API for s390 PCI devices + * + * Copyright IBM Corp. 2015 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/iommu-helper.h> +#include <linux/pci.h> +#include <linux/sizes.h> +#include <asm/pci_dma.h> + +/* + * Physically contiguous memory regions can be mapped with 4 KiB alignment, + * we allow all page sizes that are an order of 4KiB (no special large page + * support so far). + */ +#define S390_IOMMU_PGSIZES (~0xFFFUL) + +struct s390_domain { + struct iommu_domain domain; + struct list_head devices; + unsigned long *dma_table; + spinlock_t dma_table_lock; + spinlock_t list_lock; +}; + +struct s390_domain_device { + struct list_head list; + struct zpci_dev *zdev; +}; + +static struct s390_domain *to_s390_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct s390_domain, domain); +} + +static bool s390_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_INTR_REMAP: + return true; + default: + return false; + } +} + +struct iommu_domain *s390_domain_alloc(unsigned domain_type) +{ + struct s390_domain *s390_domain; + + if (domain_type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); + if (!s390_domain) + return NULL; + + s390_domain->dma_table = dma_alloc_cpu_table(); + if (!s390_domain->dma_table) { + kfree(s390_domain); + return NULL; + } + + spin_lock_init(&s390_domain->dma_table_lock); + spin_lock_init(&s390_domain->list_lock); + INIT_LIST_HEAD(&s390_domain->devices); + + return &s390_domain->domain; +} + +void s390_domain_free(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + + dma_cleanup_tables(s390_domain->dma_table); + kfree(s390_domain); +} + +static int s390_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device; + unsigned long flags; + int rc; + + if (!zdev) + return -ENODEV; + + domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); + if (!domain_device) + return -ENOMEM; + + if (zdev->dma_table) + zpci_dma_exit_device(zdev); + + zdev->dma_table = s390_domain->dma_table; + rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + if (rc) + goto out_restore; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + /* First device defines the DMA range limits */ + if (list_empty(&s390_domain->devices)) { + domain->geometry.aperture_start = zdev->start_dma; + domain->geometry.aperture_end = zdev->end_dma; + domain->geometry.force_aperture = true; + /* Allow only devices with identical DMA range limits */ + } else if (domain->geometry.aperture_start != zdev->start_dma || + domain->geometry.aperture_end != zdev->end_dma) { + rc = -EINVAL; + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + goto out_restore; + } + domain_device->zdev = zdev; + zdev->s390_domain = s390_domain; + list_add(&domain_device->list, &s390_domain->devices); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + return 0; + +out_restore: + zpci_dma_init_device(zdev); + kfree(domain_device); + + return rc; +} + +static void s390_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device, *tmp; + unsigned long flags; + int found = 0; + + if (!zdev) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, + list) { + if (domain_device->zdev == zdev) { + list_del(&domain_device->list); + kfree(domain_device); + found = 1; + break; + } + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + if (found) { + zdev->s390_domain = NULL; + zpci_unregister_ioat(zdev, 0); + zpci_dma_init_device(zdev); + } +} + +static int s390_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int rc; + + group = iommu_group_get(dev); + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + } + + rc = iommu_group_add_device(group, dev); + iommu_group_put(group); + + return rc; +} + +static void s390_iommu_remove_device(struct device *dev) +{ + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct iommu_domain *domain; + + /* + * This is a workaround for a scenario where the IOMMU API common code + * "forgets" to call the detach_dev callback: After binding a device + * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers + * the attach_dev), removing the device via + * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, + * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE + * notifier. + * + * So let's call detach_dev from here if it hasn't been called before. + */ + if (zdev && zdev->s390_domain) { + domain = iommu_get_domain_for_dev(dev); + if (domain) + s390_iommu_detach_device(domain, dev); + } + + iommu_group_remove_device(dev); +} + +static int s390_iommu_update_trans(struct s390_domain *s390_domain, + unsigned long pa, dma_addr_t dma_addr, + size_t size, int flags) +{ + struct s390_domain_device *domain_device; + u8 *page_addr = (u8 *) (pa & PAGE_MASK); + dma_addr_t start_dma_addr = dma_addr; + unsigned long irq_flags, nr_pages, i; + int rc = 0; + + if (dma_addr < s390_domain->domain.geometry.aperture_start || + dma_addr + size > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + if (!nr_pages) + return 0; + + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); + for (i = 0; i < nr_pages; i++) { + dma_update_cpu_trans(s390_domain->dma_table, page_addr, + dma_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + + spin_lock(&s390_domain->list_lock); + list_for_each_entry(domain_device, &s390_domain->devices, list) { + rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, + start_dma_addr, nr_pages * PAGE_SIZE); + if (rc) + break; + } + spin_unlock(&s390_domain->list_lock); + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return rc; +} + +static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_VALID, rc = 0; + + if (!(prot & IOMMU_READ)) + return -EINVAL; + + if (!(prot & IOMMU_WRITE)) + flags |= ZPCI_TABLE_PROTECTED; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + + return rc; +} + +static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + unsigned long *sto, *pto, *rto, flags; + unsigned int rtx, sx, px; + phys_addr_t phys = 0; + + if (iova < domain->geometry.aperture_start || + iova > domain->geometry.aperture_end) + return 0; + + rtx = calc_rtx(iova); + sx = calc_sx(iova); + px = calc_px(iova); + rto = s390_domain->dma_table; + + spin_lock_irqsave(&s390_domain->dma_table_lock, flags); + if (rto && reg_entry_isvalid(rto[rtx])) { + sto = get_rt_sto(rto[rtx]); + if (sto && reg_entry_isvalid(sto[sx])) { + pto = get_st_pto(sto[sx]); + if (pto && pt_entry_isvalid(pto[px])) + phys = pto[px] & ZPCI_PTE_ADDR_MASK; + } + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); + + return phys; +} + +static size_t s390_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_INVALID; + phys_addr_t paddr; + int rc; + + paddr = s390_iommu_iova_to_phys(domain, iova); + if (!paddr) + return 0; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + if (rc) + return 0; + + return size; +} + +static struct iommu_ops s390_iommu_ops = { + .capable = s390_iommu_capable, + .domain_alloc = s390_domain_alloc, + .domain_free = s390_domain_free, + .attach_dev = s390_iommu_attach_device, + .detach_dev = s390_iommu_detach_device, + .map = s390_iommu_map, + .unmap = s390_iommu_unmap, + .iova_to_phys = s390_iommu_iova_to_phys, + .add_device = s390_iommu_add_device, + .remove_device = s390_iommu_remove_device, + .pgsize_bitmap = S390_IOMMU_PGSIZES, +}; + +static int __init s390_iommu_init(void) +{ + return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); +} +subsys_initcall(s390_iommu_init); |