summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile2
-rw-r--r--drivers/iommu/amd_iommu.c224
-rw-r--r--drivers/iommu/amd_iommu_init.c38
-rw-r--r--drivers/iommu/amd_iommu_types.h10
-rw-r--r--drivers/iommu/amd_iommu_v2.c107
-rw-r--r--drivers/iommu/arm-smmu.c2
-rw-r--r--drivers/iommu/dmar.c12
-rw-r--r--drivers/iommu/exynos-iommu.c2
-rw-r--r--drivers/iommu/fsl_pamu.c10
-rw-r--r--drivers/iommu/fsl_pamu_domain.c91
-rw-r--r--drivers/iommu/intel-iommu.c874
-rw-r--r--drivers/iommu/intel_irq_remapping.c60
-rw-r--r--drivers/iommu/iommu-sysfs.c134
-rw-r--r--drivers/iommu/iommu.c201
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/msm_iommu.c2
-rw-r--r--drivers/iommu/omap-iommu-debug.c114
-rw-r--r--drivers/iommu/omap-iommu.c15
-rw-r--r--drivers/iommu/omap-iommu.h8
-rw-r--r--drivers/iommu/omap-iovmm.c791
-rw-r--r--drivers/iommu/pci.h29
-rw-r--r--drivers/iommu/shmobile-iommu.c2
-rw-r--r--drivers/iommu/tegra-gart.c2
-rw-r--r--drivers/iommu/tegra-smmu.c2
25 files changed, 1028 insertions, 1719 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d260605..dd51122 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -76,7 +76,7 @@ config AMD_IOMMU_STATS
config AMD_IOMMU_V2
tristate "AMD IOMMU Version 2 driver"
- depends on AMD_IOMMU && PROFILING
+ depends on AMD_IOMMU
select MMU_NOTIFIER
---help---
This option enables support for the AMD IOMMUv2 features of the IOMMU
@@ -143,16 +143,12 @@ config OMAP_IOMMU
depends on ARCH_OMAP2PLUS
select IOMMU_API
-config OMAP_IOVMM
- tristate "OMAP IO Virtual Memory Manager Support"
- depends on OMAP_IOMMU
-
config OMAP_IOMMU_DEBUG
- tristate "Export OMAP IOMMU/IOVMM internals in DebugFS"
- depends on OMAP_IOVMM && DEBUG_FS
+ tristate "Export OMAP IOMMU internals in DebugFS"
+ depends on OMAP_IOMMU && DEBUG_FS
help
Select this to see extensive information about
- the internal state of OMAP IOMMU/IOVMM in debugfs.
+ the internal state of OMAP IOMMU in debugfs.
Say N unless you know you need this.
@@ -180,6 +176,7 @@ config EXYNOS_IOMMU
bool "Exynos IOMMU Support"
depends on ARCH_EXYNOS
select IOMMU_API
+ select ARM_DMA_USE_IOMMU
help
Support for the IOMMU (System MMU) of Samsung Exynos application
processor family. This enables H/W multimedia accelerators to see
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 8893bad..16edef7 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,5 +1,6 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
+obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
@@ -11,7 +12,6 @@ obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
-obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4aec6a2..1840531 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -46,7 +46,6 @@
#include "amd_iommu_proto.h"
#include "amd_iommu_types.h"
#include "irq_remapping.h"
-#include "pci.h"
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
@@ -81,7 +80,7 @@ LIST_HEAD(hpet_map);
*/
static struct protection_domain *pt_domain;
-static struct iommu_ops amd_iommu_ops;
+static const struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
@@ -133,9 +132,6 @@ static void free_dev_data(struct iommu_dev_data *dev_data)
list_del(&dev_data->dev_data_list);
spin_unlock_irqrestore(&dev_data_list_lock, flags);
- if (dev_data->group)
- iommu_group_put(dev_data->group);
-
kfree(dev_data);
}
@@ -264,167 +260,79 @@ static bool check_device(struct device *dev)
return true;
}
-static struct pci_bus *find_hosted_bus(struct pci_bus *bus)
-{
- while (!bus->self) {
- if (!pci_is_root_bus(bus))
- bus = bus->parent;
- else
- return ERR_PTR(-ENODEV);
- }
-
- return bus;
-}
-
-#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
-
-static struct pci_dev *get_isolation_root(struct pci_dev *pdev)
-{
- struct pci_dev *dma_pdev = pdev;
-
- /* Account for quirked devices */
- swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
-
- /*
- * If it's a multifunction device that does not support our
- * required ACS flags, add to the same group as lowest numbered
- * function that also does not suport the required ACS flags.
- */
- if (dma_pdev->multifunction &&
- !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
- u8 i, slot = PCI_SLOT(dma_pdev->devfn);
-
- for (i = 0; i < 8; i++) {
- struct pci_dev *tmp;
-
- tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
- if (!tmp)
- continue;
-
- if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
- swap_pci_ref(&dma_pdev, tmp);
- break;
- }
- pci_dev_put(tmp);
- }
- }
-
- /*
- * Devices on the root bus go through the iommu. If that's not us,
- * find the next upstream device and test ACS up to the root bus.
- * Finding the next device may require skipping virtual buses.
- */
- while (!pci_is_root_bus(dma_pdev->bus)) {
- struct pci_bus *bus = find_hosted_bus(dma_pdev->bus);
- if (IS_ERR(bus))
- break;
-
- if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
- break;
-
- swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
- }
-
- return dma_pdev;
-}
-
-static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev)
+static int init_iommu_group(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(&pdev->dev);
- int ret;
+ struct iommu_group *group;
- if (!group) {
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
+ group = iommu_group_get_for_dev(dev);
- WARN_ON(&pdev->dev != dev);
- }
+ if (IS_ERR(group))
+ return PTR_ERR(group);
- ret = iommu_group_add_device(group, dev);
iommu_group_put(group);
- return ret;
+ return 0;
}
-static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data,
- struct device *dev)
+static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
{
- if (!dev_data->group) {
- struct iommu_group *group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- dev_data->group = group;
- }
-
- return iommu_group_add_device(dev_data->group, dev);
+ *(u16 *)data = alias;
+ return 0;
}
-static int init_iommu_group(struct device *dev)
+static u16 get_alias(struct device *dev)
{
- struct iommu_dev_data *dev_data;
- struct iommu_group *group;
- struct pci_dev *dma_pdev;
- int ret;
-
- group = iommu_group_get(dev);
- if (group) {
- iommu_group_put(group);
- return 0;
- }
-
- dev_data = find_dev_data(get_device_id(dev));
- if (!dev_data)
- return -ENOMEM;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 devid, ivrs_alias, pci_alias;
- if (dev_data->alias_data) {
- u16 alias;
- struct pci_bus *bus;
+ devid = get_device_id(dev);
+ ivrs_alias = amd_iommu_alias_table[devid];
+ pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
- if (dev_data->alias_data->group)
- goto use_group;
+ if (ivrs_alias == pci_alias)
+ return ivrs_alias;
- /*
- * If the alias device exists, it's effectively just a first
- * level quirk for finding the DMA source.
- */
- alias = amd_iommu_alias_table[dev_data->devid];
- dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
- if (dma_pdev) {
- dma_pdev = get_isolation_root(dma_pdev);
- goto use_pdev;
+ /*
+ * DMA alias showdown
+ *
+ * The IVRS is fairly reliable in telling us about aliases, but it
+ * can't know about every screwy device. If we don't have an IVRS
+ * reported alias, use the PCI reported alias. In that case we may
+ * still need to initialize the rlookup and dev_table entries if the
+ * alias is to a non-existent device.
+ */
+ if (ivrs_alias == devid) {
+ if (!amd_iommu_rlookup_table[pci_alias]) {
+ amd_iommu_rlookup_table[pci_alias] =
+ amd_iommu_rlookup_table[devid];
+ memcpy(amd_iommu_dev_table[pci_alias].data,
+ amd_iommu_dev_table[devid].data,
+ sizeof(amd_iommu_dev_table[pci_alias].data));
}
- /*
- * If the alias is virtual, try to find a parent device
- * and test whether the IOMMU group is actualy rooted above
- * the alias. Be careful to also test the parent device if
- * we think the alias is the root of the group.
- */
- bus = pci_find_bus(0, alias >> 8);
- if (!bus)
- goto use_group;
-
- bus = find_hosted_bus(bus);
- if (IS_ERR(bus) || !bus->self)
- goto use_group;
+ return pci_alias;
+ }
- dma_pdev = get_isolation_root(pci_dev_get(bus->self));
- if (dma_pdev != bus->self || (dma_pdev->multifunction &&
- !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)))
- goto use_pdev;
+ pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
+ "for device %s[%04x:%04x], kernel reported alias "
+ "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
+ PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
+ PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
+ PCI_FUNC(pci_alias));
- pci_dev_put(dma_pdev);
- goto use_group;
+ /*
+ * If we don't have a PCI DMA alias and the IVRS alias is on the same
+ * bus, then the IVRS table may know about a quirk that we don't.
+ */
+ if (pci_alias == devid &&
+ PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
+ pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+ pdev->dma_alias_devfn = ivrs_alias & 0xff;
+ pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
+ PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
+ dev_name(dev));
}
- dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev)));
-use_pdev:
- ret = use_pdev_iommu_group(dma_pdev, dev);
- pci_dev_put(dma_pdev);
- return ret;
-use_group:
- return use_dev_data_iommu_group(dev_data->alias_data, dev);
+ return ivrs_alias;
}
static int iommu_init_device(struct device *dev)
@@ -441,7 +349,8 @@ static int iommu_init_device(struct device *dev)
if (!dev_data)
return -ENOMEM;
- alias = amd_iommu_alias_table[dev_data->devid];
+ alias = get_alias(dev);
+
if (alias != dev_data->devid) {
struct iommu_dev_data *alias_data;
@@ -470,6 +379,9 @@ static int iommu_init_device(struct device *dev)
dev->archdata.iommu = dev_data;
+ iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
+ dev);
+
return 0;
}
@@ -489,12 +401,22 @@ static void iommu_ignore_device(struct device *dev)
static void iommu_uninit_device(struct device *dev)
{
+ struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev));
+
+ if (!dev_data)
+ return;
+
+ iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
+ dev);
+
iommu_group_remove_device(dev);
+ /* Unlink from alias, it may change if another device is re-plugged */
+ dev_data->alias_data = NULL;
+
/*
- * Nothing to do here - we keep dev_data around for unplugged devices
- * and reuse it when the device is re-plugged - not doing so would
- * introduce a ton of races.
+ * We keep dev_data around for unplugged devices and reuse it when the
+ * device is re-plugged - not doing so would introduce a ton of races.
*/
}
@@ -3473,7 +3395,7 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
-static struct iommu_ops amd_iommu_ops = {
+static const struct iommu_ops amd_iommu_ops = {
.domain_init = amd_iommu_domain_init,
.domain_destroy = amd_iommu_domain_destroy,
.attach_dev = amd_iommu_attach_device,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 0e08545..3783e0b 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -26,6 +26,7 @@
#include <linux/msi.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
+#include <linux/iommu.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -1197,6 +1198,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
iommu->max_counters = (u8) ((val >> 7) & 0xf);
}
+static ssize_t amd_iommu_show_cap(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amd_iommu *iommu = dev_get_drvdata(dev);
+ return sprintf(buf, "%x\n", iommu->cap);
+}
+static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
+
+static ssize_t amd_iommu_show_features(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amd_iommu *iommu = dev_get_drvdata(dev);
+ return sprintf(buf, "%llx\n", iommu->features);
+}
+static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
+
+static struct attribute *amd_iommu_attrs[] = {
+ &dev_attr_cap.attr,
+ &dev_attr_features.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_group = {
+ .name = "amd-iommu",
+ .attrs = amd_iommu_attrs,
+};
+
+static const struct attribute_group *amd_iommu_groups[] = {
+ &amd_iommu_group,
+ NULL,
+};
static int iommu_init_pci(struct amd_iommu *iommu)
{
@@ -1297,6 +1331,10 @@ static int iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_erratum_746_workaround(iommu);
+ iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu,
+ amd_iommu_groups, "ivhd%d",
+ iommu->index);
+
return pci_enable_device(iommu->dev);
}
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f1a5abf..8e43b7c 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -390,12 +390,6 @@ struct amd_iommu_fault {
};
-#define PPR_FAULT_EXEC (1 << 1)
-#define PPR_FAULT_READ (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER (1 << 6)
-#define PPR_FAULT_RSVD (1 << 7)
-#define PPR_FAULT_GN (1 << 8)
struct iommu_domain;
@@ -432,7 +426,6 @@ struct iommu_dev_data {
struct iommu_dev_data *alias_data;/* The alias dev_data */
struct protection_domain *domain; /* Domain the device is bound to */
atomic_t bind; /* Domain attach reference count */
- struct iommu_group *group; /* IOMMU group for virtual aliases */
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
bool passthrough; /* Default for device is pt_domain */
@@ -578,6 +571,9 @@ struct amd_iommu {
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
+ /* IOMMU sysfs device */
+ struct device *iommu_dev;
+
/*
* We can't rely on the BIOS to restore all values on reinit, so we
* need to stash them
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 499b436..5f578e8 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -47,12 +47,13 @@ struct pasid_state {
atomic_t count; /* Reference count */
unsigned mmu_notifier_count; /* Counting nested mmu_notifier
calls */
- struct task_struct *task; /* Task bound to this PASID */
struct mm_struct *mm; /* mm_struct for the faults */
- struct mmu_notifier mn; /* mmu_otifier handle */
+ struct mmu_notifier mn; /* mmu_notifier handle */
struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
struct device_state *device_state; /* Link to our device_state */
int pasid; /* PASID index */
+ bool invalid; /* Used during setup and
+ teardown of the pasid */
spinlock_t lock; /* Protect pri_queues and
mmu_notifer_count */
wait_queue_head_t wq; /* To wait for count == 0 */
@@ -99,7 +100,6 @@ static struct workqueue_struct *iommu_wq;
static u64 *empty_page_table;
static void free_pasid_states(struct device_state *dev_state);
-static void unbind_pasid(struct device_state *dev_state, int pasid);
static u16 device_id(struct pci_dev *pdev)
{
@@ -297,37 +297,29 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state)
schedule();
finish_wait(&pasid_state->wq, &wait);
- mmput(pasid_state->mm);
free_pasid_state(pasid_state);
}
-static void __unbind_pasid(struct pasid_state *pasid_state)
+static void unbind_pasid(struct pasid_state *pasid_state)
{
struct iommu_domain *domain;
domain = pasid_state->device_state->domain;
+ /*
+ * Mark pasid_state as invalid, no more faults will we added to the
+ * work queue after this is visible everywhere.
+ */
+ pasid_state->invalid = true;
+
+ /* Make sure this is visible */
+ smp_wmb();
+
+ /* After this the device/pasid can't access the mm anymore */
amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
- clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
/* Make sure no more pending faults are in the queue */
flush_workqueue(iommu_wq);
-
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state(pasid_state); /* Reference taken in bind() function */
-}
-
-static void unbind_pasid(struct device_state *dev_state, int pasid)
-{
- struct pasid_state *pasid_state;
-
- pasid_state = get_pasid_state(dev_state, pasid);
- if (pasid_state == NULL)
- return;
-
- __unbind_pasid(pasid_state);
- put_pasid_state_wait(pasid_state); /* Reference taken in this function */
}
static void free_pasid_states_level1(struct pasid_state **tbl)
@@ -373,6 +365,12 @@ static void free_pasid_states(struct device_state *dev_state)
* unbind the PASID
*/
mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+ put_pasid_state_wait(pasid_state); /* Reference taken in
+ amd_iommu_bind_pasid */
+
+ /* Drop reference taken in amd_iommu_bind_pasid */
+ put_device_state(dev_state);
}
if (dev_state->pasid_levels == 2)
@@ -411,14 +409,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn,
return 0;
}
-static void mn_change_pte(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address,
- pte_t pte)
-{
- __mn_flush_page(mn, address);
-}
-
static void mn_invalidate_page(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address)
@@ -472,22 +462,23 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
+ bool run_inv_ctx_cb;
might_sleep();
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
+ pasid_state = mn_to_state(mn);
+ dev_state = pasid_state->device_state;
+ run_inv_ctx_cb = !pasid_state->invalid;
- if (pasid_state->device_state->inv_ctx_cb)
+ if (run_inv_ctx_cb && pasid_state->device_state->inv_ctx_cb)
dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
- unbind_pasid(dev_state, pasid_state->pasid);
+ unbind_pasid(pasid_state);
}
static struct mmu_notifier_ops iommu_mn = {
.release = mn_release,
.clear_flush_young = mn_clear_flush_young,
- .change_pte = mn_change_pte,
.invalidate_page = mn_invalidate_page,
.invalidate_range_start = mn_invalidate_range_start,
.invalidate_range_end = mn_invalidate_range_end,
@@ -529,7 +520,7 @@ static void do_fault(struct work_struct *work)
write = !!(fault->flags & PPR_FAULT_WRITE);
down_read(&fault->state->mm->mmap_sem);
- npages = get_user_pages(fault->state->task, fault->state->mm,
+ npages = get_user_pages(NULL, fault->state->mm,
fault->address, 1, write, 0, &page, NULL);
up_read(&fault->state->mm->mmap_sem);
@@ -587,7 +578,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
goto out;
pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
- if (pasid_state == NULL) {
+ if (pasid_state == NULL || pasid_state->invalid) {
/* We know the device but not the PASID -> send INVALID */
amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
PPR_INVALID, tag);
@@ -612,6 +603,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
fault->state = pasid_state;
fault->tag = tag;
fault->finish = finish;
+ fault->pasid = iommu_fault->pasid;
fault->flags = iommu_fault->flags;
INIT_WORK(&fault->work, do_fault);
@@ -620,6 +612,10 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
ret = NOTIFY_OK;
out_drop_state:
+
+ if (ret != NOTIFY_OK && pasid_state)
+ put_pasid_state(pasid_state);
+
put_device_state(dev_state);
out:
@@ -635,6 +631,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
+ struct mm_struct *mm;
u16 devid;
int ret;
@@ -658,20 +655,23 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
if (pasid_state == NULL)
goto out;
+
atomic_set(&pasid_state->count, 1);
init_waitqueue_head(&pasid_state->wq);
spin_lock_init(&pasid_state->lock);
- pasid_state->task = task;
- pasid_state->mm = get_task_mm(task);
+ mm = get_task_mm(task);
+ pasid_state->mm = mm;
pasid_state->device_state = dev_state;
pasid_state->pasid = pasid;
+ pasid_state->invalid = true; /* Mark as valid only if we are
+ done with setting up the pasid */
pasid_state->mn.ops = &iommu_mn;
if (pasid_state->mm == NULL)
goto out_free;
- mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
+ mmu_notifier_register(&pasid_state->mn, mm);
ret = set_pasid_state(dev_state, pasid_state, pasid);
if (ret)
@@ -682,15 +682,26 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
if (ret)
goto out_clear_state;
+ /* Now we are ready to handle faults */
+ pasid_state->invalid = false;
+
+ /*
+ * Drop the reference to the mm_struct here. We rely on the
+ * mmu_notifier release call-back to inform us when the mm
+ * is going away.
+ */
+ mmput(mm);
+
return 0;
out_clear_state:
clear_pasid_state(dev_state, pasid);
out_unregister:
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+ mmu_notifier_unregister(&pasid_state->mn, mm);
out_free:
+ mmput(mm);
free_pasid_state(pasid_state);
out:
@@ -728,10 +739,22 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
*/
put_pasid_state(pasid_state);
- /* This will call the mn_release function and unbind the PASID */
+ /* Clear the pasid state so that the pasid can be re-used */
+ clear_pasid_state(dev_state, pasid_state->pasid);
+
+ /*
+ * Call mmu_notifier_unregister to drop our reference
+ * to pasid_state->mm
+ */
mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+ put_pasid_state_wait(pasid_state); /* Reference taken in
+ amd_iommu_bind_pasid */
out:
+ /* Drop reference taken in this function */
+ put_device_state(dev_state);
+
+ /* Drop reference taken in amd_iommu_bind_pasid */
put_device_state(dev_state);
}
EXPORT_SYMBOL(amd_iommu_unbind_pasid);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f3f6641..ca18d6d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1597,7 +1597,7 @@ static void arm_smmu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
}
-static struct iommu_ops arm_smmu_ops = {
+static const struct iommu_ops arm_smmu_ops = {
.domain_init = arm_smmu_domain_init,
.domain_destroy = arm_smmu_domain_destroy,
.attach_dev = arm_smmu_attach_dev,
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9a4f05e..4306885 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -38,6 +38,7 @@
#include <linux/tboot.h>
#include <linux/dmi.h>
#include <linux/slab.h>
+#include <linux/iommu.h>
#include <asm/irq_remapping.h>
#include <asm/iommu_table.h>
@@ -980,6 +981,12 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
raw_spin_lock_init(&iommu->register_lock);
drhd->iommu = iommu;
+
+ if (intel_iommu_enabled)
+ iommu->iommu_dev = iommu_device_create(NULL, iommu,
+ intel_iommu_groups,
+ iommu->name);
+
return 0;
err_unmap:
@@ -991,6 +998,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
static void free_iommu(struct intel_iommu *iommu)
{
+ iommu_device_destroy(iommu->iommu_dev);
+
if (iommu->irq) {
free_irq(iommu->irq, iommu);
irq_set_handler_data(iommu->irq, NULL);
@@ -1339,9 +1348,6 @@ int dmar_enable_qi(struct intel_iommu *iommu)
return -ENOMEM;
}
- qi->free_head = qi->free_tail = 0;
- qi->free_cnt = QI_LENGTH;
-
raw_spin_lock_init(&qi->q_lock);
__dmar_enable_qi(iommu);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 99054d2..d037e87a 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1170,7 +1170,7 @@ static void exynos_iommu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
}
-static struct iommu_ops exynos_iommu_ops = {
+static const struct iommu_ops exynos_iommu_ops = {
.domain_init = exynos_iommu_domain_init,
.domain_destroy = exynos_iommu_domain_destroy,
.attach_dev = exynos_iommu_attach_device,
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index b99dd88..2b6ce93 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -92,7 +92,7 @@ struct gen_pool *spaace_pool;
* subwindow count per liodn.
*
*/
-u32 pamu_get_max_subwin_cnt()
+u32 pamu_get_max_subwin_cnt(void)
{
return max_subwindow_count;
}
@@ -170,10 +170,10 @@ int pamu_disable_liodn(int liodn)
static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
{
/* Bug if not a power of 2 */
- BUG_ON(!is_power_of_2(addrspace_size));
+ BUG_ON((addrspace_size & (addrspace_size - 1)));
/* window size is 2^(WSE+1) bytes */
- return __ffs(addrspace_size) - 1;
+ return fls64(addrspace_size) - 2;
}
/* Derive the PAACE window count encoding for the subwindow count */
@@ -351,7 +351,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
struct paace *ppaace;
unsigned long fspi;
- if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) {
+ if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) {
pr_debug("window size too small or not a power of two %llx\n", win_size);
return -EINVAL;
}
@@ -464,7 +464,7 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin,
return -ENOENT;
}
- if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) {
+ if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) {
pr_debug("subwindow size out of range, or not a power of 2\n");
return -EINVAL;
}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 93072ba..61d1daf 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -38,7 +38,6 @@
#include <sysdev/fsl_pci.h>
#include "fsl_pamu_domain.h"
-#include "pci.h"
/*
* Global spinlock that needs to be held while
@@ -301,7 +300,7 @@ static int check_size(u64 size, dma_addr_t iova)
* Size must be a power of two and at least be equal
* to PAMU page size.
*/
- if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) {
+ if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) {
pr_debug("%s: size too small or not a power of two\n", __func__);
return -EINVAL;
}
@@ -335,11 +334,6 @@ static struct fsl_dma_domain *iommu_alloc_dma_domain(void)
return domain;
}
-static inline struct device_domain_info *find_domain(struct device *dev)
-{
- return dev->archdata.iommu_domain;
-}
-
static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)
{
unsigned long flags;
@@ -380,7 +374,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d
* Check here if the device is already attached to domain or not.
* If the device is already attached to a domain detach it.
*/
- old_domain_info = find_domain(dev);
+ old_domain_info = dev->archdata.iommu_domain;
if (old_domain_info && old_domain_info->domain != dma_domain) {
spin_unlock_irqrestore(&device_domain_lock, flags);
detach_device(dev, old_domain_info->domain);
@@ -399,7 +393,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d
* the info for the first LIODN as all
* LIODNs share the same domain
*/
- if (!old_domain_info)
+ if (!dev->archdata.iommu_domain)
dev->archdata.iommu_domain = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -892,8 +886,6 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
return ret;
}
-#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
-
static struct iommu_group *get_device_iommu_group(struct device *dev)
{
struct iommu_group *group;
@@ -950,75 +942,14 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
struct pci_controller *pci_ctl;
bool pci_endpt_partioning;
struct iommu_group *group = NULL;
- struct pci_dev *bridge, *dma_pdev = NULL;
pci_ctl = pci_bus_to_host(pdev->bus);
pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
if (pci_endpt_partioning) {
- bridge = pci_find_upstream_pcie_bridge(pdev);
- if (bridge) {
- if (pci_is_pcie(bridge))
- dma_pdev = pci_get_domain_bus_and_slot(
- pci_domain_nr(pdev->bus),
- bridge->subordinate->number, 0);
- if (!dma_pdev)
- dma_pdev = pci_dev_get(bridge);
- } else
- dma_pdev = pci_dev_get(pdev);
-
- /* Account for quirked devices */
- swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
-
- /*
- * If it's a multifunction device that does not support our
- * required ACS flags, add to the same group as lowest numbered
- * function that also does not suport the required ACS flags.
- */
- if (dma_pdev->multifunction &&
- !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
- u8 i, slot = PCI_SLOT(dma_pdev->devfn);
-
- for (i = 0; i < 8; i++) {
- struct pci_dev *tmp;
-
- tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
- if (!tmp)
- continue;
-
- if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
- swap_pci_ref(&dma_pdev, tmp);
- break;
- }
- pci_dev_put(tmp);
- }
- }
+ group = iommu_group_get_for_dev(&pdev->dev);
/*
- * Devices on the root bus go through the iommu. If that's not us,
- * find the next upstream device and test ACS up to the root bus.
- * Finding the next device may require skipping virtual buses.
- */
- while (!pci_is_root_bus(dma_pdev->bus)) {
- struct pci_bus *bus = dma_pdev->bus;
-
- while (!bus->self) {
- if (!pci_is_root_bus(bus))
- bus = bus->parent;
- else
- goto root_bus;
- }
-
- if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
- break;
-
- swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
- }
-
-root_bus:
- group = get_device_iommu_group(&dma_pdev->dev);
- pci_dev_put(dma_pdev);
- /*
* PCIe controller is not a paritionable entity
* free the controller device iommu_group.
*/
@@ -1042,12 +973,15 @@ root_bus:
group = get_shared_pci_device_group(pdev);
}
+ if (!group)
+ group = ERR_PTR(-ENODEV);
+
return group;
}
static int fsl_pamu_add_device(struct device *dev)
{
- struct iommu_group *group = NULL;
+ struct iommu_group *group = ERR_PTR(-ENODEV);
struct pci_dev *pdev;
const u32 *prop;
int ret, len;
@@ -1070,7 +1004,7 @@ static int fsl_pamu_add_device(struct device *dev)
group = get_device_iommu_group(dev);
}
- if (!group || IS_ERR(group))
+ if (IS_ERR(group))
return PTR_ERR(group);
ret = iommu_group_add_device(group, dev);
@@ -1118,8 +1052,7 @@ static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
((w_count > 1) ? w_count : 0));
if (!ret) {
- if (dma_domain->win_arr)
- kfree(dma_domain->win_arr);
+ kfree(dma_domain->win_arr);
dma_domain->win_arr = kzalloc(sizeof(struct dma_window) *
w_count, GFP_ATOMIC);
if (!dma_domain->win_arr) {
@@ -1140,7 +1073,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
return dma_domain->win_cnt;
}
-static struct iommu_ops fsl_pamu_ops = {
+static const struct iommu_ops fsl_pamu_ops = {
.domain_init = fsl_pamu_domain_init,
.domain_destroy = fsl_pamu_domain_destroy,
.attach_dev = fsl_pamu_attach_device,
@@ -1157,7 +1090,7 @@ static struct iommu_ops fsl_pamu_ops = {
.remove_device = fsl_pamu_remove_device,
};
-int pamu_domain_init()
+int pamu_domain_init(void)
{
int ret = 0;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 51b6b77..d1f5caa 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -45,7 +45,6 @@
#include <asm/iommu.h>
#include "irq_remapping.h"
-#include "pci.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -304,7 +303,7 @@ static inline bool dma_pte_present(struct dma_pte *pte)
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
- return (pte->val & (1 << 7));
+ return (pte->val & DMA_PTE_LARGE_PAGE);
}
static inline int first_pte_in_page(struct dma_pte *pte)
@@ -321,16 +320,13 @@ static inline int first_pte_in_page(struct dma_pte *pte)
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-/* devices under the same p2p bridge are owned in one domain */
-#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
-
/* domain represents a virtual machine, more than one devices
* across iommus may be owned in one domain, e.g. kvm guest.
*/
-#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
+#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
+#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
/* define the limit of IOMMUs supported in each domain */
#ifdef CONFIG_X86
@@ -429,6 +425,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
struct device *dev);
static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
struct device *dev);
+static int domain_detach_iommu(struct dmar_domain *domain,
+ struct intel_iommu *iommu);
#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
int dmar_disabled = 0;
@@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
-static struct iommu_ops intel_iommu_ops;
+static const struct iommu_ops intel_iommu_ops;
static int __init intel_iommu_setup(char *str)
{
@@ -540,6 +538,24 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
+static inline int domain_type_is_vm(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
+}
+
+static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
+{
+ return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
+ DOMAIN_FLAG_STATIC_IDENTITY);
+}
+
+static inline int domain_pfn_supported(struct dmar_domain *domain,
+ unsigned long pfn)
+{
+ int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+
+ return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
+}
static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
{
@@ -580,9 +596,7 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
int iommu_id;
/* si_domain and vm domain should not get here. */
- BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
- BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
-
+ BUG_ON(domain_type_is_vm_or_si(domain));
iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
return NULL;
@@ -619,50 +633,56 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
rcu_read_unlock();
}
-static void domain_update_iommu_snooping(struct dmar_domain *domain)
+static int domain_update_iommu_snooping(struct intel_iommu *skip)
{
- int i;
-
- domain->iommu_snooping = 1;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int ret = 1;
- for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
- if (!ecap_sc_support(g_iommus[i]->ecap)) {
- domain->iommu_snooping = 0;
- break;
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (iommu != skip) {
+ if (!ecap_sc_support(iommu->ecap)) {
+ ret = 0;
+ break;
+ }
}
}
+ rcu_read_unlock();
+
+ return ret;
}
-static void domain_update_iommu_superpage(struct dmar_domain *domain)
+static int domain_update_iommu_superpage(struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu = NULL;
+ struct intel_iommu *iommu;
int mask = 0xf;
if (!intel_iommu_superpage) {
- domain->iommu_superpage = 0;
- return;
+ return 0;
}
/* set iommu_superpage to the smallest common denominator */
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
- mask &= cap_super_page_val(iommu->cap);
- if (!mask) {
- break;
+ if (iommu != skip) {
+ mask &= cap_super_page_val(iommu->cap);
+ if (!mask)
+ break;
}
}
rcu_read_unlock();
- domain->iommu_superpage = fls(mask);
+ return fls(mask);
}
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
- domain_update_iommu_snooping(domain);
- domain_update_iommu_superpage(domain);
+ domain->iommu_snooping = domain_update_iommu_snooping(NULL);
+ domain->iommu_superpage = domain_update_iommu_superpage(NULL);
}
static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
@@ -671,7 +691,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
struct intel_iommu *iommu;
struct device *tmp;
struct pci_dev *ptmp, *pdev = NULL;
- u16 segment;
+ u16 segment = 0;
int i;
if (dev_is_pci(dev)) {
@@ -816,14 +836,13 @@ out:
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn, int *target_level)
{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
int offset;
BUG_ON(!domain->pgd);
- if (addr_width < BITS_PER_LONG && pfn >> addr_width)
+ if (!domain_pfn_supported(domain, pfn))
/* Address beyond IOMMU's addressing capabilities. */
return NULL;
@@ -849,13 +868,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
- if (cmpxchg64(&pte->val, 0ULL, pteval)) {
+ if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
- } else {
- dma_pte_addr(pte);
+ else
domain_flush_cache(domain, pte, sizeof(*pte));
- }
}
if (level == 1)
break;
@@ -892,7 +909,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
break;
}
- if (pte->val & DMA_PTE_LARGE_PAGE) {
+ if (dma_pte_superpage(pte)) {
*large_page = total;
return pte;
}
@@ -908,12 +925,11 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned int large_page = 1;
struct dma_pte *first_pte, *pte;
- BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
- BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+ BUG_ON(!domain_pfn_supported(domain, start_pfn));
+ BUG_ON(!domain_pfn_supported(domain, last_pfn));
BUG_ON(start_pfn > last_pfn);
/* we don't need lock here; nobody else touches the iova range */
@@ -974,12 +990,12 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
-
- BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
- BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+ BUG_ON(!domain_pfn_supported(domain, start_pfn));
+ BUG_ON(!domain_pfn_supported(domain, last_pfn));
BUG_ON(start_pfn > last_pfn);
+ dma_pte_clear_range(domain, start_pfn, last_pfn);
+
/* We don't need lock here; nobody else touches the iova range */
dma_pte_free_level(domain, agaw_to_level(domain->agaw),
domain->pgd, 0, start_pfn, last_pfn);
@@ -1077,11 +1093,10 @@ struct page *domain_unmap(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct page *freelist = NULL;
- BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
- BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+ BUG_ON(!domain_pfn_supported(domain, start_pfn));
+ BUG_ON(!domain_pfn_supported(domain, last_pfn));
BUG_ON(start_pfn > last_pfn);
/* we don't need lock here; nobody else touches the iova range */
@@ -1275,7 +1290,8 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &domain->devices, link)
- if (info->bus == bus && info->devfn == devfn) {
+ if (info->iommu == iommu && info->bus == bus &&
+ info->devfn == devfn) {
found = 1;
break;
}
@@ -1384,7 +1400,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
}
-static int iommu_enable_translation(struct intel_iommu *iommu)
+static void iommu_enable_translation(struct intel_iommu *iommu)
{
u32 sts;
unsigned long flags;
@@ -1398,10 +1414,9 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
readl, (sts & DMA_GSTS_TES), sts);
raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
- return 0;
}
-static int iommu_disable_translation(struct intel_iommu *iommu)
+static void iommu_disable_translation(struct intel_iommu *iommu)
{
u32 sts;
unsigned long flag;
@@ -1415,7 +1430,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
readl, (!(sts & DMA_GSTS_TES)), sts);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
- return 0;
}
@@ -1462,8 +1476,7 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
- int i, count;
- unsigned long flags;
+ int i;
if ((iommu->domains) && (iommu->domain_ids)) {
for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
@@ -1476,11 +1489,8 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
-
- spin_lock_irqsave(&domain->iommu_lock, flags);
- count = --domain->iommu_count;
- spin_unlock_irqrestore(&domain->iommu_lock, flags);
- if (count == 0)
+ if (domain_detach_iommu(domain, iommu) == 0 &&
+ !domain_type_is_vm(domain))
domain_exit(domain);
}
}
@@ -1499,7 +1509,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
free_context_table(iommu);
}
-static struct dmar_domain *alloc_domain(bool vm)
+static struct dmar_domain *alloc_domain(int flags)
{
/* domain id for virtual machine, it won't be set in context */
static atomic_t vm_domid = ATOMIC_INIT(0);
@@ -1509,46 +1519,62 @@ static struct dmar_domain *alloc_domain(bool vm)
if (!domain)
return NULL;
+ memset(domain, 0, sizeof(*domain));
domain->nid = -1;
- domain->iommu_count = 0;
- memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
- domain->flags = 0;
+ domain->flags = flags;
spin_lock_init(&domain->iommu_lock);
INIT_LIST_HEAD(&domain->devices);
- if (vm) {
+ if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
domain->id = atomic_inc_return(&vm_domid);
- domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
- }
return domain;
}
-static int iommu_attach_domain(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+static int __iommu_attach_domain(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
{
int num;
unsigned long ndomains;
- unsigned long flags;
ndomains = cap_ndoms(iommu->cap);
-
- spin_lock_irqsave(&iommu->lock, flags);
-
num = find_first_zero_bit(iommu->domain_ids, ndomains);
- if (num >= ndomains) {
- spin_unlock_irqrestore(&iommu->lock, flags);
- printk(KERN_ERR "IOMMU: no free domain ids\n");
- return -ENOMEM;
+ if (num < ndomains) {
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ } else {
+ num = -ENOSPC;
}
- domain->id = num;
- domain->iommu_count++;
- set_bit(num, iommu->domain_ids);
- set_bit(iommu->seq_id, domain->iommu_bmp);
- iommu->domains[num] = domain;
+ return num;
+}
+
+static int iommu_attach_domain(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ int num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ num = __iommu_attach_domain(domain, iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
+ if (num < 0)
+ pr_err("IOMMU: no free domain ids\n");
- return 0;
+ return num;
+}
+
+static int iommu_attach_vm_domain(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ int num;
+ unsigned long ndomains;
+
+ ndomains = cap_ndoms(iommu->cap);
+ for_each_set_bit(num, iommu->domain_ids, ndomains)
+ if (iommu->domains[num] == domain)
+ return num;
+
+ return __iommu_attach_domain(domain, iommu);
}
static void iommu_detach_domain(struct dmar_domain *domain,
@@ -1558,17 +1584,53 @@ static void iommu_detach_domain(struct dmar_domain *domain,
int num, ndomains;
spin_lock_irqsave(&iommu->lock, flags);
- ndomains = cap_ndoms(iommu->cap);
- for_each_set_bit(num, iommu->domain_ids, ndomains) {
- if (iommu->domains[num] == domain) {
- clear_bit(num, iommu->domain_ids);
- iommu->domains[num] = NULL;
- break;
+ if (domain_type_is_vm_or_si(domain)) {
+ ndomains = cap_ndoms(iommu->cap);
+ for_each_set_bit(num, iommu->domain_ids, ndomains) {
+ if (iommu->domains[num] == domain) {
+ clear_bit(num, iommu->domain_ids);
+ iommu->domains[num] = NULL;
+ break;
+ }
}
+ } else {
+ clear_bit(domain->id, iommu->domain_ids);
+ iommu->domains[domain->id] = NULL;
}
spin_unlock_irqrestore(&iommu->lock, flags);
}
+static void domain_attach_iommu(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->iommu_lock, flags);
+ if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
+ domain->iommu_count++;
+ if (domain->iommu_count == 1)
+ domain->nid = iommu->node;
+ domain_update_iommu_cap(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags);
+}
+
+static int domain_detach_iommu(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ unsigned long flags;
+ int count = INT_MAX;
+
+ spin_lock_irqsave(&domain->iommu_lock, flags);
+ if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
+ count = --domain->iommu_count;
+ domain_update_iommu_cap(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags);
+
+ return count;
+}
+
static struct iova_domain reserved_iova_list;
static struct lock_class_key reserved_rbtree_key;
@@ -1706,9 +1768,7 @@ static void domain_exit(struct dmar_domain *domain)
/* clear attached or cached domains */
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
- if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
- test_bit(iommu->seq_id, domain->iommu_bmp))
- iommu_detach_domain(domain, iommu);
+ iommu_detach_domain(domain, iommu);
rcu_read_unlock();
dma_free_pagelist(freelist);
@@ -1723,8 +1783,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct context_entry *context;
unsigned long flags;
struct dma_pte *pgd;
- unsigned long num;
- unsigned long ndomains;
int id;
int agaw;
struct device_domain_info *info = NULL;
@@ -1748,31 +1806,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
id = domain->id;
pgd = domain->pgd;
- if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
- domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
- int found = 0;
-
- /* find an available domain id for this device in iommu */
- ndomains = cap_ndoms(iommu->cap);
- for_each_set_bit(num, iommu->domain_ids, ndomains) {
- if (iommu->domains[num] == domain) {
- id = num;
- found = 1;
- break;
- }
- }
-
- if (found == 0) {
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
- if (num >= ndomains) {
+ if (domain_type_is_vm_or_si(domain)) {
+ if (domain_type_is_vm(domain)) {
+ id = iommu_attach_vm_domain(domain, iommu);
+ if (id < 0) {
spin_unlock_irqrestore(&iommu->lock, flags);
- printk(KERN_ERR "IOMMU: no free domain ids\n");
+ pr_err("IOMMU: no free domain ids\n");
return -EFAULT;
}
-
- set_bit(num, iommu->domain_ids);
- iommu->domains[num] = domain;
- id = num;
}
/* Skip top levels of page tables for
@@ -1824,72 +1865,68 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
+ iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
} else {
iommu_flush_write_buffer(iommu);
}
iommu_enable_dev_iotlb(info);
spin_unlock_irqrestore(&iommu->lock, flags);
- spin_lock_irqsave(&domain->iommu_lock, flags);
- if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
- domain->iommu_count++;
- if (domain->iommu_count == 1)
- domain->nid = iommu->node;
- domain_update_iommu_cap(domain);
- }
- spin_unlock_irqrestore(&domain->iommu_lock, flags);
+ domain_attach_iommu(domain, iommu);
+
return 0;
}
+struct domain_context_mapping_data {
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ int translation;
+};
+
+static int domain_context_mapping_cb(struct pci_dev *pdev,
+ u16 alias, void *opaque)
+{
+ struct domain_context_mapping_data *data = opaque;
+
+ return domain_context_mapping_one(data->domain, data->iommu,
+ PCI_BUS_NUM(alias), alias & 0xff,
+ data->translation);
+}
+
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev,
int translation)
{
- int ret;
- struct pci_dev *pdev, *tmp, *parent;
struct intel_iommu *iommu;
u8 bus, devfn;
+ struct domain_context_mapping_data data;
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
- ret = domain_context_mapping_one(domain, iommu, bus, devfn,
- translation);
- if (ret || !dev_is_pci(dev))
- return ret;
-
- /* dependent device mapping */
- pdev = to_pci_dev(dev);
- tmp = pci_find_upstream_pcie_bridge(pdev);
- if (!tmp)
- return 0;
- /* Secondary interface's bus number and devfn 0 */
- parent = pdev->bus->self;
- while (parent != tmp) {
- ret = domain_context_mapping_one(domain, iommu,
- parent->bus->number,
- parent->devfn, translation);
- if (ret)
- return ret;
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
- return domain_context_mapping_one(domain, iommu,
- tmp->subordinate->number, 0,
- translation);
- else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain, iommu,
- tmp->bus->number,
- tmp->devfn,
+ if (!dev_is_pci(dev))
+ return domain_context_mapping_one(domain, iommu, bus, devfn,
translation);
+
+ data.domain = domain;
+ data.iommu = iommu;
+ data.translation = translation;
+
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ &domain_context_mapping_cb, &data);
+}
+
+static int domain_context_mapped_cb(struct pci_dev *pdev,
+ u16 alias, void *opaque)
+{
+ struct intel_iommu *iommu = opaque;
+
+ return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
}
static int domain_context_mapped(struct device *dev)
{
- int ret;
- struct pci_dev *pdev, *tmp, *parent;
struct intel_iommu *iommu;
u8 bus, devfn;
@@ -1897,30 +1934,11 @@ static int domain_context_mapped(struct device *dev)
if (!iommu)
return -ENODEV;
- ret = device_context_mapped(iommu, bus, devfn);
- if (!ret || !dev_is_pci(dev))
- return ret;
+ if (!dev_is_pci(dev))
+ return device_context_mapped(iommu, bus, devfn);
- /* dependent device mapping */
- pdev = to_pci_dev(dev);
- tmp = pci_find_upstream_pcie_bridge(pdev);
- if (!tmp)
- return ret;
- /* Secondary interface's bus number and devfn 0 */
- parent = pdev->bus->self;
- while (parent != tmp) {
- ret = device_context_mapped(iommu, parent->bus->number,
- parent->devfn);
- if (!ret)
- return ret;
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp))
- return device_context_mapped(iommu, tmp->subordinate->number,
- 0);
- else
- return device_context_mapped(iommu, tmp->bus->number,
- tmp->devfn);
+ return !pci_for_each_dma_alias(to_pci_dev(dev),
+ domain_context_mapped_cb, iommu);
}
/* Returns a number of VTD pages, but aligned to MM page size */
@@ -1965,12 +1983,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
{
struct dma_pte *first_pte = NULL, *pte = NULL;
phys_addr_t uninitialized_var(pteval);
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned long sg_res;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
- BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
+ BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
@@ -2004,12 +2021,14 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
/* It is large page*/
if (largepage_lvl > 1) {
pteval |= DMA_PTE_LARGE_PAGE;
- /* Ensure that old small page tables are removed to make room
- for superpage, if they exist. */
- dma_pte_clear_range(domain, iov_pfn,
- iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+ lvl_pages = lvl_to_nr_pages(largepage_lvl);
+ /*
+ * Ensure that old small page tables are
+ * removed to make room for superpage,
+ * if they exist.
+ */
dma_pte_free_pagetable(domain, iov_pfn,
- iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+ iov_pfn + lvl_pages - 1);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
@@ -2102,31 +2121,20 @@ static inline void unlink_domain_info(struct device_domain_info *info)
static void domain_remove_dev_info(struct dmar_domain *domain)
{
- struct device_domain_info *info;
- unsigned long flags, flags2;
+ struct device_domain_info *info, *tmp;
+ unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- while (!list_empty(&domain->devices)) {
- info = list_entry(domain->devices.next,
- struct device_domain_info, link);
+ list_for_each_entry_safe(info, tmp, &domain->devices, link) {
unlink_domain_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_disable_dev_iotlb(info);
iommu_detach_dev(info->iommu, info->bus, info->devfn);
- if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ if (domain_type_is_vm(domain)) {
iommu_detach_dependent_devices(info->iommu, info->dev);
- /* clear this iommu in iommu_bmp, update iommu count
- * and capabilities
- */
- spin_lock_irqsave(&domain->iommu_lock, flags2);
- if (test_and_clear_bit(info->iommu->seq_id,
- domain->iommu_bmp)) {
- domain->iommu_count--;
- domain_update_iommu_cap(domain);
- }
- spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+ domain_detach_iommu(domain, info->iommu);
}
free_devinfo_mem(info);
@@ -2181,8 +2189,6 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
info->dev = dev;
info->domain = domain;
info->iommu = iommu;
- if (!dev)
- domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
spin_lock_irqsave(&device_domain_lock, flags);
if (dev)
@@ -2209,79 +2215,86 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
return domain;
}
+static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ *(u16 *)opaque = alias;
+ return 0;
+}
+
/* domain is initialized */
static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
{
- struct dmar_domain *domain, *free = NULL;
- struct intel_iommu *iommu = NULL;
+ struct dmar_domain *domain, *tmp;
+ struct intel_iommu *iommu;
struct device_domain_info *info;
- struct pci_dev *dev_tmp = NULL;
+ u16 dma_alias;
unsigned long flags;
- u8 bus, devfn, bridge_bus, bridge_devfn;
+ u8 bus, devfn;
domain = find_domain(dev);
if (domain)
return domain;
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return NULL;
+
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- u16 segment;
- segment = pci_domain_nr(pdev->bus);
- dev_tmp = pci_find_upstream_pcie_bridge(pdev);
- if (dev_tmp) {
- if (pci_is_pcie(dev_tmp)) {
- bridge_bus = dev_tmp->subordinate->number;
- bridge_devfn = 0;
- } else {
- bridge_bus = dev_tmp->bus->number;
- bridge_devfn = dev_tmp->devfn;
- }
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dmar_search_domain_by_dev_info(segment,
- bridge_bus,
- bridge_devfn);
- if (info) {
- iommu = info->iommu;
- domain = info->domain;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
- /* pcie-pci bridge already has a domain, uses it */
- if (info)
- goto found_domain;
+ pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
+ PCI_BUS_NUM(dma_alias),
+ dma_alias & 0xff);
+ if (info) {
+ iommu = info->iommu;
+ domain = info->domain;
}
- }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- goto error;
+ /* DMA alias already has a domain, uses it */
+ if (info)
+ goto found_domain;
+ }
/* Allocate and initialize new domain for the device */
- domain = alloc_domain(false);
+ domain = alloc_domain(0);
if (!domain)
- goto error;
- if (iommu_attach_domain(domain, iommu)) {
+ return NULL;
+ domain->id = iommu_attach_domain(domain, iommu);
+ if (domain->id < 0) {
free_domain_mem(domain);
- domain = NULL;
- goto error;
+ return NULL;
}
- free = domain;
- if (domain_init(domain, gaw))
- goto error;
+ domain_attach_iommu(domain, iommu);
+ if (domain_init(domain, gaw)) {
+ domain_exit(domain);
+ return NULL;
+ }
+
+ /* register PCI DMA alias device */
+ if (dev_is_pci(dev)) {
+ tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+ dma_alias & 0xff, NULL, domain);
+
+ if (!tmp || tmp != domain) {
+ domain_exit(domain);
+ domain = tmp;
+ }
- /* register pcie-to-pci device */
- if (dev_tmp) {
- domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
- NULL, domain);
if (!domain)
- goto error;
+ return NULL;
}
found_domain:
- domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
-error:
- if (free != domain)
- domain_exit(free);
+ tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+
+ if (!tmp || tmp != domain) {
+ domain_exit(domain);
+ domain = tmp;
+ }
return domain;
}
@@ -2405,6 +2418,7 @@ static inline void iommu_prepare_isa(void)
printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
"floppy might not work\n");
+ pci_dev_put(pdev);
}
#else
static inline void iommu_prepare_isa(void)
@@ -2420,19 +2434,25 @@ static int __init si_domain_init(int hw)
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int nid, ret = 0;
+ bool first = true;
- si_domain = alloc_domain(false);
+ si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
if (!si_domain)
return -EFAULT;
- si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
-
for_each_active_iommu(iommu, drhd) {
ret = iommu_attach_domain(si_domain, iommu);
- if (ret) {
+ if (ret < 0) {
+ domain_exit(si_domain);
+ return -EFAULT;
+ } else if (first) {
+ si_domain->id = ret;
+ first = false;
+ } else if (si_domain->id != ret) {
domain_exit(si_domain);
return -EFAULT;
}
+ domain_attach_iommu(si_domain, iommu);
}
if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
@@ -2523,22 +2543,46 @@ static bool device_has_rmrr(struct device *dev)
return false;
}
+/*
+ * There are a couple cases where we need to restrict the functionality of
+ * devices associated with RMRRs. The first is when evaluating a device for
+ * identity mapping because problems exist when devices are moved in and out
+ * of domains and their respective RMRR information is lost. This means that
+ * a device with associated RMRRs will never be in a "passthrough" domain.
+ * The second is use of the device through the IOMMU API. This interface
+ * expects to have full control of the IOVA space for the device. We cannot
+ * satisfy both the requirement that RMRR access is maintained and have an
+ * unencumbered IOVA space. We also have no ability to quiesce the device's
+ * use of the RMRR space or even inform the IOMMU API user of the restriction.
+ * We therefore prevent devices associated with an RMRR from participating in
+ * the IOMMU API, which eliminates them from device assignment.
+ *
+ * In both cases we assume that PCI USB devices with RMRRs have them largely
+ * for historical reasons and that the RMRR space is not actively used post
+ * boot. This exclusion may change if vendors begin to abuse it.
+ */
+static bool device_is_rmrr_locked(struct device *dev)
+{
+ if (!device_has_rmrr(dev))
+ return false;
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
+ return false;
+ }
+
+ return true;
+}
+
static int iommu_should_identity_map(struct device *dev, int startup)
{
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- /*
- * We want to prevent any device associated with an RMRR from
- * getting placed into the SI Domain. This is done because
- * problems exist when devices are moved in and out of domains
- * and their respective RMRR info is lost. We exempt USB devices
- * from this process due to their usage of RMRRs that are known
- * to not be needed after BIOS hand-off to OS.
- */
- if (device_has_rmrr(dev) &&
- (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
+ if (device_is_rmrr_locked(dev))
return 0;
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
@@ -2850,11 +2894,7 @@ static int __init init_dmars(void)
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
- ret = iommu_enable_translation(iommu);
- if (ret)
- goto free_iommu;
-
+ iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
}
@@ -3091,10 +3131,10 @@ static void flush_unmaps(void)
/* On real hardware multiple invalidations are expensive */
if (cap_caching_mode(iommu->cap))
iommu_flush_iotlb_psi(iommu, domain->id,
- iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
+ iova->pfn_lo, iova_size(iova),
!deferred_flush[i].freelist[j], 0);
else {
- mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
+ mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
}
@@ -3144,9 +3184,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
{
struct dmar_domain *domain;
unsigned long start_pfn, last_pfn;
@@ -3190,6 +3228,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
}
}
+static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ intel_unmap(dev, dev_addr);
+}
+
static void *intel_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
@@ -3246,7 +3291,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
size = PAGE_ALIGN(size);
order = get_order(size);
- intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
+ intel_unmap(dev, dma_handle);
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
__free_pages(page, order);
}
@@ -3255,43 +3300,7 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- struct dmar_domain *domain;
- unsigned long start_pfn, last_pfn;
- struct iova *iova;
- struct intel_iommu *iommu;
- struct page *freelist;
-
- if (iommu_no_mapping(dev))
- return;
-
- domain = find_domain(dev);
- BUG_ON(!domain);
-
- iommu = domain_get_iommu(domain);
-
- iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
- if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
- (unsigned long long)sglist[0].dma_address))
- return;
-
- start_pfn = mm_to_dma_pfn(iova->pfn_lo);
- last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
-
- freelist = domain_unmap(domain, start_pfn, last_pfn);
-
- if (intel_iommu_strict) {
- iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
- last_pfn - start_pfn + 1, !freelist, 0);
- /* free iova */
- __free_iova(&domain->iovad, iova);
- dma_free_pagelist(freelist);
- } else {
- add_unmap(domain, iova, freelist);
- /*
- * queue up the release of the unmap to save the 1/6th of the
- * cpu used up by the iotlb flush operation...
- */
- }
+ intel_unmap(dev, sglist[0].dma_address);
}
static int intel_nontranslate_map_sg(struct device *hddev,
@@ -3355,13 +3364,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
if (unlikely(ret)) {
- /* clear the page */
- dma_pte_clear_range(domain, start_vpfn,
- start_vpfn + size - 1);
- /* free page tables */
dma_pte_free_pagetable(domain, start_vpfn,
start_vpfn + size - 1);
- /* free iova */
__free_iova(&domain->iovad, iova);
return 0;
}
@@ -3568,10 +3572,8 @@ static int init_iommu_hw(void)
iommu->flush.flush_context(iommu, 0, 0, 0,
DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH);
- if (iommu_enable_translation(iommu))
- return 1;
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
}
@@ -3873,9 +3875,7 @@ static int device_notifier(struct notifier_block *nb,
down_read(&dmar_global_lock);
domain_remove_one_dev_info(domain, dev);
- if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
- !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
- list_empty(&domain->devices))
+ if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
domain_exit(domain);
up_read(&dmar_global_lock);
@@ -3935,8 +3935,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain->id,
- iova->pfn_lo,
- iova->pfn_hi - iova->pfn_lo + 1,
+ iova->pfn_lo, iova_size(iova),
!freelist, 0);
rcu_read_unlock();
dma_free_pagelist(freelist);
@@ -3955,6 +3954,63 @@ static struct notifier_block intel_iommu_memory_nb = {
.priority = 0
};
+
+static ssize_t intel_iommu_show_version(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_iommu *iommu = dev_get_drvdata(dev);
+ u32 ver = readl(iommu->reg + DMAR_VER_REG);
+ return sprintf(buf, "%d:%d\n",
+ DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
+}
+static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
+
+static ssize_t intel_iommu_show_address(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_iommu *iommu = dev_get_drvdata(dev);
+ return sprintf(buf, "%llx\n", iommu->reg_phys);
+}
+static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
+
+static ssize_t intel_iommu_show_cap(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_iommu *iommu = dev_get_drvdata(dev);
+ return sprintf(buf, "%llx\n", iommu->cap);
+}
+static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
+
+static ssize_t intel_iommu_show_ecap(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_iommu *iommu = dev_get_drvdata(dev);
+ return sprintf(buf, "%llx\n", iommu->ecap);
+}
+static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
+
+static struct attribute *intel_iommu_attrs[] = {
+ &dev_attr_version.attr,
+ &dev_attr_address.attr,
+ &dev_attr_cap.attr,
+ &dev_attr_ecap.attr,
+ NULL,
+};
+
+static struct attribute_group intel_iommu_group = {
+ .name = "intel-iommu",
+ .attrs = intel_iommu_attrs,
+};
+
+const struct attribute_group *intel_iommu_groups[] = {
+ &intel_iommu_group,
+ NULL,
+};
+
int __init intel_iommu_init(void)
{
int ret = -ENODEV;
@@ -4026,6 +4082,11 @@ int __init intel_iommu_init(void)
init_iommu_pm_ops();
+ for_each_active_iommu(iommu, drhd)
+ iommu->iommu_dev = iommu_device_create(NULL, iommu,
+ intel_iommu_groups,
+ iommu->name);
+
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
bus_register_notifier(&pci_bus_type, &device_nb);
if (si_domain && !hw_pass_through)
@@ -4044,33 +4105,27 @@ out_free_dmar:
return ret;
}
+static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct intel_iommu *iommu = opaque;
+
+ iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
+ return 0;
+}
+
+/*
+ * NB - intel-iommu lacks any sort of reference counting for the users of
+ * dependent devices. If multiple endpoints have intersecting dependent
+ * devices, unbinding the driver from any one of them will possibly leave
+ * the others unable to operate.
+ */
static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
struct device *dev)
{
- struct pci_dev *tmp, *parent, *pdev;
-
if (!iommu || !dev || !dev_is_pci(dev))
return;
- pdev = to_pci_dev(dev);
-
- /* dependent device detach */
- tmp = pci_find_upstream_pcie_bridge(pdev);
- /* Secondary interface's bus number and devfn 0 */
- if (tmp) {
- parent = pdev->bus->self;
- while (parent != tmp) {
- iommu_detach_dev(iommu, parent->bus->number,
- parent->devfn);
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
- iommu_detach_dev(iommu,
- tmp->subordinate->number, 0);
- else /* this is a legacy PCI bridge */
- iommu_detach_dev(iommu, tmp->bus->number,
- tmp->devfn);
- }
+ pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
}
static void domain_remove_one_dev_info(struct dmar_domain *domain,
@@ -4117,20 +4172,9 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
if (found == 0) {
- unsigned long tmp_flags;
- spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
- clear_bit(iommu->seq_id, domain->iommu_bmp);
- domain->iommu_count--;
- domain_update_iommu_cap(domain);
- spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
-
- if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
- !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
- spin_lock_irqsave(&iommu->lock, tmp_flags);
- clear_bit(domain->id, iommu->domain_ids);
- iommu->domains[domain->id] = NULL;
- spin_unlock_irqrestore(&iommu->lock, tmp_flags);
- }
+ domain_detach_iommu(domain, iommu);
+ if (!domain_type_is_vm_or_si(domain))
+ iommu_detach_domain(domain, iommu);
}
}
@@ -4150,7 +4194,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_snooping = 0;
domain->iommu_superpage = 0;
domain->max_addr = 0;
- domain->nid = -1;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
@@ -4164,7 +4207,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain;
- dmar_domain = alloc_domain(true);
+ dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
if (!dmar_domain) {
printk(KERN_ERR
"intel_iommu_domain_init: dmar_domain == NULL\n");
@@ -4202,14 +4245,18 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
int addr_width;
u8 bus, devfn;
+ if (device_is_rmrr_locked(dev)) {
+ dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
+ return -EPERM;
+ }
+
/* normally dev is not mapped */
if (unlikely(domain_context_mapped(dev))) {
struct dmar_domain *old_domain;
old_domain = find_domain(dev);
if (old_domain) {
- if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
- dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
+ if (domain_type_is_vm_or_si(dmar_domain))
domain_remove_one_dev_info(old_domain, dev);
else
domain_remove_dev_info(old_domain);
@@ -4373,99 +4420,42 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
-#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
-
static int intel_iommu_add_device(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_dev *bridge, *dma_pdev = NULL;
+ struct intel_iommu *iommu;
struct iommu_group *group;
- int ret;
u8 bus, devfn;
- if (!device_to_iommu(dev, &bus, &devfn))
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
return -ENODEV;
- bridge = pci_find_upstream_pcie_bridge(pdev);
- if (bridge) {
- if (pci_is_pcie(bridge))
- dma_pdev = pci_get_domain_bus_and_slot(
- pci_domain_nr(pdev->bus),
- bridge->subordinate->number, 0);
- if (!dma_pdev)
- dma_pdev = pci_dev_get(bridge);
- } else
- dma_pdev = pci_dev_get(pdev);
-
- /* Account for quirked devices */
- swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+ iommu_device_link(iommu->iommu_dev, dev);
- /*
- * If it's a multifunction device that does not support our
- * required ACS flags, add to the same group as lowest numbered
- * function that also does not suport the required ACS flags.
- */
- if (dma_pdev->multifunction &&
- !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
- u8 i, slot = PCI_SLOT(dma_pdev->devfn);
-
- for (i = 0; i < 8; i++) {
- struct pci_dev *tmp;
+ group = iommu_group_get_for_dev(dev);
- tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
- if (!tmp)
- continue;
-
- if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
- swap_pci_ref(&dma_pdev, tmp);
- break;
- }
- pci_dev_put(tmp);
- }
- }
-
- /*
- * Devices on the root bus go through the iommu. If that's not us,
- * find the next upstream device and test ACS up to the root bus.
- * Finding the next device may require skipping virtual buses.
- */
- while (!pci_is_root_bus(dma_pdev->bus)) {
- struct pci_bus *bus = dma_pdev->bus;
-
- while (!bus->self) {
- if (!pci_is_root_bus(bus))
- bus = bus->parent;
- else
- goto root_bus;
- }
-
- if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
- break;
-
- swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
- }
-
-root_bus:
- group = iommu_group_get(&dma_pdev->dev);
- pci_dev_put(dma_pdev);
- if (!group) {
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
- }
-
- ret = iommu_group_add_device(group, dev);
+ if (IS_ERR(group))
+ return PTR_ERR(group);
iommu_group_put(group);
- return ret;
+ return 0;
}
static void intel_iommu_remove_device(struct device *dev)
{
+ struct intel_iommu *iommu;
+ u8 bus, devfn;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return;
+
iommu_group_remove_device(dev);
+
+ iommu_device_unlink(iommu->iommu_dev, dev);
}
-static struct iommu_ops intel_iommu_ops = {
+static const struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
.attach_dev = intel_iommu_attach_device,
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 9b17489..0df41f6 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -70,6 +70,11 @@ static int get_irte(int irq, struct irte *entry)
raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ if (unlikely(!irq_iommu->iommu)) {
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ return -1;
+ }
+
index = irq_iommu->irte_index + irq_iommu->sub_handle;
*entry = *(irq_iommu->iommu->ir_table->base + index);
@@ -369,29 +374,52 @@ static int set_hpet_sid(struct irte *irte, u8 id)
return 0;
}
+struct set_msi_sid_data {
+ struct pci_dev *pdev;
+ u16 alias;
+};
+
+static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct set_msi_sid_data *data = opaque;
+
+ data->pdev = pdev;
+ data->alias = alias;
+
+ return 0;
+}
+
static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
{
- struct pci_dev *bridge;
+ struct set_msi_sid_data data;
if (!irte || !dev)
return -1;
- /* PCIe device or Root Complex integrated PCI device */
- if (pci_is_pcie(dev) || !dev->bus->parent) {
- set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- (dev->bus->number << 8) | dev->devfn);
- return 0;
- }
+ pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
- bridge = pci_find_upstream_pcie_bridge(dev);
- if (bridge) {
- if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
- set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
- (bridge->bus->number << 8) | dev->bus->number);
- else /* this is a legacy PCI bridge */
- set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- (bridge->bus->number << 8) | bridge->devfn);
- }
+ /*
+ * DMA alias provides us with a PCI device and alias. The only case
+ * where the it will return an alias on a different bus than the
+ * device is the case of a PCIe-to-PCI bridge, where the alias is for
+ * the subordinate bus. In this case we can only verify the bus.
+ *
+ * If the alias device is on a different bus than our source device
+ * then we have a topology based alias, use it.
+ *
+ * Otherwise, the alias is for a device DMA quirk and we cannot
+ * assume that MSI uses the same requester ID. Therefore use the
+ * original device.
+ */
+ if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
+ set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+ PCI_DEVID(PCI_BUS_NUM(data.alias),
+ dev->bus->number));
+ else if (data.pdev->bus->number != dev->bus->number)
+ set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
+ else
+ set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+ PCI_DEVID(dev->bus->number, dev->devfn));
return 0;
}
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
new file mode 100644
index 0000000..39b2d91
--- /dev/null
+++ b/drivers/iommu/iommu-sysfs.c
@@ -0,0 +1,134 @@
+/*
+ * IOMMU sysfs class support
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+ * Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/*
+ * We provide a common class "devices" group which initially has no attributes.
+ * As devices are added to the IOMMU, we'll add links to the group.
+ */
+static struct attribute *devices_attr[] = {
+ NULL,
+};
+
+static const struct attribute_group iommu_devices_attr_group = {
+ .name = "devices",
+ .attrs = devices_attr,
+};
+
+static const struct attribute_group *iommu_dev_groups[] = {
+ &iommu_devices_attr_group,
+ NULL,
+};
+
+static void iommu_release_device(struct device *dev)
+{
+ kfree(dev);
+}
+
+static struct class iommu_class = {
+ .name = "iommu",
+ .dev_release = iommu_release_device,
+ .dev_groups = iommu_dev_groups,
+};
+
+static int __init iommu_dev_init(void)
+{
+ return class_register(&iommu_class);
+}
+postcore_initcall(iommu_dev_init);
+
+/*
+ * Create an IOMMU device and return a pointer to it. IOMMU specific
+ * attributes can be provided as an attribute group, allowing a unique
+ * namespace per IOMMU type.
+ */
+struct device *iommu_device_create(struct device *parent, void *drvdata,
+ const struct attribute_group **groups,
+ const char *fmt, ...)
+{
+ struct device *dev;
+ va_list vargs;
+ int ret;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ device_initialize(dev);
+
+ dev->class = &iommu_class;
+ dev->parent = parent;
+ dev->groups = groups;
+ dev_set_drvdata(dev, drvdata);
+
+ va_start(vargs, fmt);
+ ret = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
+ va_end(vargs);
+ if (ret)
+ goto error;
+
+ ret = device_add(dev);
+ if (ret)
+ goto error;
+
+ return dev;
+
+error:
+ put_device(dev);
+ return ERR_PTR(ret);
+}
+
+void iommu_device_destroy(struct device *dev)
+{
+ if (!dev || IS_ERR(dev))
+ return;
+
+ device_unregister(dev);
+}
+
+/*
+ * IOMMU drivers can indicate a device is managed by a given IOMMU using
+ * this interface. A link to the device will be created in the "devices"
+ * directory of the IOMMU device in sysfs and an "iommu" link will be
+ * created under the linked device, pointing back at the IOMMU device.
+ */
+int iommu_device_link(struct device *dev, struct device *link)
+{
+ int ret;
+
+ if (!dev || IS_ERR(dev))
+ return -ENODEV;
+
+ ret = sysfs_add_link_to_group(&dev->kobj, "devices",
+ &link->kobj, dev_name(link));
+ if (ret)
+ return ret;
+
+ ret = sysfs_create_link_nowarn(&link->kobj, &dev->kobj, "iommu");
+ if (ret)
+ sysfs_remove_link_from_group(&dev->kobj, "devices",
+ dev_name(link));
+
+ return ret;
+}
+
+void iommu_device_unlink(struct device *dev, struct device *link)
+{
+ if (!dev || IS_ERR(dev))
+ return;
+
+ sysfs_remove_link(&link->kobj, "iommu");
+ sysfs_remove_link_from_group(&dev->kobj, "devices", dev_name(link));
+}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e5555fc..1698360 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -29,12 +29,17 @@
#include <linux/idr.h>
#include <linux/notifier.h>
#include <linux/err.h>
+#include <linux/pci.h>
#include <trace/events/iommu.h>
static struct kset *iommu_group_kset;
static struct ida iommu_group_ida;
static struct mutex iommu_group_mutex;
+struct iommu_callback_data {
+ const struct iommu_ops *ops;
+};
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -514,9 +519,191 @@ int iommu_group_id(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_id);
+/*
+ * To consider a PCI device isolated, we require ACS to support Source
+ * Validation, Request Redirection, Completer Redirection, and Upstream
+ * Forwarding. This effectively means that devices cannot spoof their
+ * requester ID, requests and completions cannot be redirected, and all
+ * transactions are forwarded upstream, even as it passes through a
+ * bridge where the target device is downstream.
+ */
+#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+struct group_for_pci_data {
+ struct pci_dev *pdev;
+ struct iommu_group *group;
+};
+
+/*
+ * DMA alias iterator callback, return the last seen device. Stop and return
+ * the IOMMU group if we find one along the way.
+ */
+static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct group_for_pci_data *data = opaque;
+
+ data->pdev = pdev;
+ data->group = iommu_group_get(&pdev->dev);
+
+ return data->group != NULL;
+}
+
+/*
+ * Use standard PCI bus topology, isolation features, and DMA alias quirks
+ * to find or create an IOMMU group for a device.
+ */
+static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+{
+ struct group_for_pci_data data;
+ struct pci_bus *bus;
+ struct iommu_group *group = NULL;
+ struct pci_dev *tmp;
+
+ /*
+ * Find the upstream DMA alias for the device. A device must not
+ * be aliased due to topology in order to have its own IOMMU group.
+ * If we find an alias along the way that already belongs to a
+ * group, use it.
+ */
+ if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
+ return data.group;
+
+ pdev = data.pdev;
+
+ /*
+ * Continue upstream from the point of minimum IOMMU granularity
+ * due to aliases to the point where devices are protected from
+ * peer-to-peer DMA by PCI ACS. Again, if we find an existing
+ * group, use it.
+ */
+ for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+ if (!bus->self)
+ continue;
+
+ if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+ break;
+
+ pdev = bus->self;
+
+ group = iommu_group_get(&pdev->dev);
+ if (group)
+ return group;
+ }
+
+ /*
+ * Next we need to consider DMA alias quirks. If one device aliases
+ * to another, they should be grouped together. It's theoretically
+ * possible that aliases could create chains of devices where each
+ * device aliases another device. If we then factor in multifunction
+ * ACS grouping requirements, each alias could incorporate a new slot
+ * with multiple functions, each with aliases. This is all extremely
+ * unlikely as DMA alias quirks are typically only used for PCIe
+ * devices where we usually have a single slot per bus. Furthermore,
+ * the alias quirk is usually to another function within the slot
+ * (and ACS multifunction is not supported) or to a different slot
+ * that doesn't physically exist. The likely scenario is therefore
+ * that everything on the bus gets grouped together. To reduce the
+ * problem space, share the IOMMU group for all devices on the bus
+ * if a DMA alias quirk is present on the bus.
+ */
+ tmp = NULL;
+ for_each_pci_dev(tmp) {
+ if (tmp->bus != pdev->bus ||
+ !(tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN))
+ continue;
+
+ pci_dev_put(tmp);
+ tmp = NULL;
+
+ /* We have an alias quirk, search for an existing group */
+ for_each_pci_dev(tmp) {
+ struct iommu_group *group_tmp;
+
+ if (tmp->bus != pdev->bus)
+ continue;
+
+ group_tmp = iommu_group_get(&tmp->dev);
+ if (!group) {
+ group = group_tmp;
+ continue;
+ }
+
+ if (group_tmp) {
+ WARN_ON(group != group_tmp);
+ iommu_group_put(group_tmp);
+ }
+ }
+
+ return group ? group : iommu_group_alloc();
+ }
+
+ /*
+ * Non-multifunction devices or multifunction devices supporting
+ * ACS get their own group.
+ */
+ if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
+ return iommu_group_alloc();
+
+ /*
+ * Multifunction devices not supporting ACS share a group with other
+ * similar devices in the same slot.
+ */
+ tmp = NULL;
+ for_each_pci_dev(tmp) {
+ if (tmp == pdev || tmp->bus != pdev->bus ||
+ PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
+ pci_acs_enabled(tmp, REQ_ACS_FLAGS))
+ continue;
+
+ group = iommu_group_get(&tmp->dev);
+ if (group) {
+ pci_dev_put(tmp);
+ return group;
+ }
+ }
+
+ /* No shared group found, allocate new */
+ return iommu_group_alloc();
+}
+
+/**
+ * iommu_group_get_for_dev - Find or create the IOMMU group for a device
+ * @dev: target device
+ *
+ * This function is intended to be called by IOMMU drivers and extended to
+ * support common, bus-defined algorithms when determining or creating the
+ * IOMMU group for a device. On success, the caller will hold a reference
+ * to the returned IOMMU group, which will already include the provided
+ * device. The reference should be released with iommu_group_put().
+ */
+struct iommu_group *iommu_group_get_for_dev(struct device *dev)
+{
+ struct iommu_group *group = ERR_PTR(-EIO);
+ int ret;
+
+ group = iommu_group_get(dev);
+ if (group)
+ return group;
+
+ if (dev_is_pci(dev))
+ group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+
+ if (IS_ERR(group))
+ return group;
+
+ ret = iommu_group_add_device(group, dev);
+ if (ret) {
+ iommu_group_put(group);
+ return ERR_PTR(ret);
+ }
+
+ return group;
+}
+
static int add_iommu_group(struct device *dev, void *data)
{
- struct iommu_ops *ops = data;
+ struct iommu_callback_data *cb = data;
+ const struct iommu_ops *ops = cb->ops;
if (!ops->add_device)
return -ENODEV;
@@ -532,7 +719,7 @@ static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
- struct iommu_ops *ops = dev->bus->iommu_ops;
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
unsigned long group_action = 0;
@@ -585,10 +772,14 @@ static struct notifier_block iommu_bus_nb = {
.notifier_call = iommu_bus_notifier,
};
-static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
+static void iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
{
+ struct iommu_callback_data cb = {
+ .ops = ops,
+ };
+
bus_register_notifier(bus, &iommu_bus_nb);
- bus_for_each_dev(bus, NULL, ops, add_iommu_group);
+ bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
}
/**
@@ -604,7 +795,7 @@ static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
* is set up. With this function the iommu-driver can set the iommu-ops
* afterwards.
*/
-int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
+int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
{
if (bus->iommu_ops != NULL)
return -EBUSY;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 53cde08..7dab5cb 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1120,7 +1120,7 @@ static void ipmmu_remove_device(struct device *dev)
dev->archdata.iommu = NULL;
}
-static struct iommu_ops ipmmu_ops = {
+static const struct iommu_ops ipmmu_ops = {
.domain_init = ipmmu_domain_init,
.domain_destroy = ipmmu_domain_destroy,
.attach_dev = ipmmu_attach_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f5ff657..49f41d6 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -674,7 +674,7 @@ fail:
return 0;
}
-static struct iommu_ops msm_iommu_ops = {
+static const struct iommu_ops msm_iommu_ops = {
.domain_init = msm_iommu_domain_init,
.domain_destroy = msm_iommu_domain_destroy,
.attach_dev = msm_iommu_attach_dev,
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 80fffba..531658d 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -213,116 +213,6 @@ static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
return bytes;
}
-static ssize_t debug_read_mmap(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct device *dev = file->private_data;
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- char *p, *buf;
- struct iovm_struct *tmp;
- int uninitialized_var(i);
- ssize_t bytes;
-
- buf = (char *)__get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- p = buf;
-
- p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n",
- "No", "start", "end", "size", "flags");
- p += sprintf(p, "-------------------------------------------------\n");
-
- mutex_lock(&iommu_debug_lock);
-
- list_for_each_entry(tmp, &obj->mmap, list) {
- size_t len;
- const char *str = "%3d %08x-%08x %6x %8x\n";
- const int maxcol = 39;
-
- len = tmp->da_end - tmp->da_start;
- p += snprintf(p, maxcol, str,
- i, tmp->da_start, tmp->da_end, len, tmp->flags);
-
- if (PAGE_SIZE - (p - buf) < maxcol)
- break;
- i++;
- }
-
- bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-
- mutex_unlock(&iommu_debug_lock);
- free_page((unsigned long)buf);
-
- return bytes;
-}
-
-static ssize_t debug_read_mem(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct device *dev = file->private_data;
- char *p, *buf;
- struct iovm_struct *area;
- ssize_t bytes;
-
- count = min_t(ssize_t, count, PAGE_SIZE);
-
- buf = (char *)__get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- p = buf;
-
- mutex_lock(&iommu_debug_lock);
-
- area = omap_find_iovm_area(dev, (u32)ppos);
- if (!area) {
- bytes = -EINVAL;
- goto err_out;
- }
- memcpy(p, area->va, count);
- p += count;
-
- bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-err_out:
- mutex_unlock(&iommu_debug_lock);
- free_page((unsigned long)buf);
-
- return bytes;
-}
-
-static ssize_t debug_write_mem(struct file *file, const char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct device *dev = file->private_data;
- struct iovm_struct *area;
- char *p, *buf;
-
- count = min_t(size_t, count, PAGE_SIZE);
-
- buf = (char *)__get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- p = buf;
-
- mutex_lock(&iommu_debug_lock);
-
- if (copy_from_user(p, userbuf, count)) {
- count = -EFAULT;
- goto err_out;
- }
-
- area = omap_find_iovm_area(dev, (u32)ppos);
- if (!area) {
- count = -EINVAL;
- goto err_out;
- }
- memcpy(area->va, p, count);
-err_out:
- mutex_unlock(&iommu_debug_lock);
- free_page((unsigned long)buf);
-
- return count;
-}
-
#define DEBUG_FOPS(name) \
static const struct file_operations debug_##name##_fops = { \
.open = simple_open, \
@@ -342,8 +232,6 @@ DEBUG_FOPS_RO(ver);
DEBUG_FOPS_RO(regs);
DEBUG_FOPS_RO(tlb);
DEBUG_FOPS(pagetable);
-DEBUG_FOPS_RO(mmap);
-DEBUG_FOPS(mem);
#define __DEBUG_ADD_FILE(attr, mode) \
{ \
@@ -389,8 +277,6 @@ static int iommu_debug_register(struct device *dev, void *data)
DEBUG_ADD_FILE_RO(regs);
DEBUG_ADD_FILE_RO(tlb);
DEBUG_ADD_FILE(pagetable);
- DEBUG_ADD_FILE_RO(mmap);
- DEBUG_ADD_FILE(mem);
return 0;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 895af06..e202b0c 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -959,31 +959,18 @@ static int omap_iommu_probe(struct platform_device *pdev)
return err;
if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
return -EINVAL;
- /*
- * da_start and da_end are needed for omap-iovmm, so hardcode
- * these values as used by OMAP3 ISP - the only user for
- * omap-iovmm
- */
- obj->da_start = 0;
- obj->da_end = 0xfffff000;
if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
} else {
obj->nr_tlb_entries = pdata->nr_tlb_entries;
obj->name = pdata->name;
- obj->da_start = pdata->da_start;
- obj->da_end = pdata->da_end;
}
- if (obj->da_end <= obj->da_start)
- return -EINVAL;
obj->dev = &pdev->dev;
obj->ctx = (void *)obj + sizeof(*obj);
spin_lock_init(&obj->iommu_lock);
- mutex_init(&obj->mmap_lock);
spin_lock_init(&obj->page_table_lock);
- INIT_LIST_HEAD(&obj->mmap);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
obj->regbase = devm_ioremap_resource(obj->dev, res);
@@ -1291,7 +1278,7 @@ static void omap_iommu_remove_device(struct device *dev)
kfree(arch_data);
}
-static struct iommu_ops omap_iommu_ops = {
+static const struct iommu_ops omap_iommu_ops = {
.domain_init = omap_iommu_domain_init,
.domain_destroy = omap_iommu_domain_destroy,
.attach_dev = omap_iommu_attach_dev,
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index ea920c3..1275a82 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -46,12 +46,7 @@ struct omap_iommu {
int nr_tlb_entries;
- struct list_head mmap;
- struct mutex mmap_lock; /* protect mmap */
-
void *ctx; /* iommu context: registres saved area */
- u32 da_start;
- u32 da_end;
int has_bus_err_back;
};
@@ -154,9 +149,12 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
#define MMU_RAM_PADDR_MASK \
((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
+#define MMU_RAM_ENDIAN_SHIFT 9
#define MMU_RAM_ENDIAN_MASK (1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_LITTLE (0 << MMU_RAM_ENDIAN_SHIFT)
#define MMU_RAM_ENDIAN_BIG (1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ELSZ_SHIFT 7
#define MMU_RAM_ELSZ_MASK (3 << MMU_RAM_ELSZ_SHIFT)
#define MMU_RAM_ELSZ_8 (0 << MMU_RAM_ELSZ_SHIFT)
#define MMU_RAM_ELSZ_16 (1 << MMU_RAM_ELSZ_SHIFT)
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
deleted file mode 100644
index d147259..0000000
--- a/drivers/iommu/omap-iovmm.c
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * omap iommu: simple virtual address space management
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/device.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu.h>
-#include <linux/omap-iommu.h>
-#include <linux/platform_data/iommu-omap.h>
-
-#include <asm/cacheflush.h>
-#include <asm/mach/map.h>
-
-#include "omap-iopgtable.h"
-#include "omap-iommu.h"
-
-/*
- * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
- *
- * lower 16 bit is used for h/w and upper 16 bit is for s/w.
- */
-#define IOVMF_SW_SHIFT 16
-
-/*
- * iovma: h/w flags derived from cam and ram attribute
- */
-#define IOVMF_CAM_MASK (~((1 << 10) - 1))
-#define IOVMF_RAM_MASK (~IOVMF_CAM_MASK)
-
-#define IOVMF_PGSZ_MASK (3 << 0)
-#define IOVMF_PGSZ_1M MMU_CAM_PGSZ_1M
-#define IOVMF_PGSZ_64K MMU_CAM_PGSZ_64K
-#define IOVMF_PGSZ_4K MMU_CAM_PGSZ_4K
-#define IOVMF_PGSZ_16M MMU_CAM_PGSZ_16M
-
-#define IOVMF_ENDIAN_MASK (1 << 9)
-#define IOVMF_ENDIAN_BIG MMU_RAM_ENDIAN_BIG
-
-#define IOVMF_ELSZ_MASK (3 << 7)
-#define IOVMF_ELSZ_16 MMU_RAM_ELSZ_16
-#define IOVMF_ELSZ_32 MMU_RAM_ELSZ_32
-#define IOVMF_ELSZ_NONE MMU_RAM_ELSZ_NONE
-
-#define IOVMF_MIXED_MASK (1 << 6)
-#define IOVMF_MIXED MMU_RAM_MIXED
-
-/*
- * iovma: s/w flags, used for mapping and umapping internally.
- */
-#define IOVMF_MMIO (1 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC (2 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC_MASK (3 << IOVMF_SW_SHIFT)
-
-/* "superpages" is supported just with physically linear pages */
-#define IOVMF_DISCONT (1 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR (2 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR_MASK (3 << (2 + IOVMF_SW_SHIFT))
-
-#define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT))
-
-static struct kmem_cache *iovm_area_cachep;
-
-/* return the offset of the first scatterlist entry in a sg table */
-static unsigned int sgtable_offset(const struct sg_table *sgt)
-{
- if (!sgt || !sgt->nents)
- return 0;
-
- return sgt->sgl->offset;
-}
-
-/* return total bytes of sg buffers */
-static size_t sgtable_len(const struct sg_table *sgt)
-{
- unsigned int i, total = 0;
- struct scatterlist *sg;
-
- if (!sgt)
- return 0;
-
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
- size_t bytes;
-
- bytes = sg->length + sg->offset;
-
- if (!iopgsz_ok(bytes)) {
- pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n",
- __func__, i, bytes, sg->offset);
- return 0;
- }
-
- if (i && sg->offset) {
- pr_err("%s: sg[%d] offset not allowed in internal entries\n",
- __func__, i);
- return 0;
- }
-
- total += bytes;
- }
-
- return total;
-}
-#define sgtable_ok(x) (!!sgtable_len(x))
-
-static unsigned max_alignment(u32 addr)
-{
- int i;
- unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
- for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++)
- ;
- return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0;
-}
-
-/*
- * calculate the optimal number sg elements from total bytes based on
- * iommu superpages
- */
-static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa)
-{
- unsigned nr_entries = 0, ent_sz;
-
- if (!IS_ALIGNED(bytes, PAGE_SIZE)) {
- pr_err("%s: wrong size %08x\n", __func__, bytes);
- return 0;
- }
-
- while (bytes) {
- ent_sz = max_alignment(da | pa);
- ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes));
- nr_entries++;
- da += ent_sz;
- pa += ent_sz;
- bytes -= ent_sz;
- }
-
- return nr_entries;
-}
-
-/* allocate and initialize sg_table header(a kind of 'superblock') */
-static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags,
- u32 da, u32 pa)
-{
- unsigned int nr_entries;
- int err;
- struct sg_table *sgt;
-
- if (!bytes)
- return ERR_PTR(-EINVAL);
-
- if (!IS_ALIGNED(bytes, PAGE_SIZE))
- return ERR_PTR(-EINVAL);
-
- if (flags & IOVMF_LINEAR) {
- nr_entries = sgtable_nents(bytes, da, pa);
- if (!nr_entries)
- return ERR_PTR(-EINVAL);
- } else
- nr_entries = bytes / PAGE_SIZE;
-
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
- if (!sgt)
- return ERR_PTR(-ENOMEM);
-
- err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
- if (err) {
- kfree(sgt);
- return ERR_PTR(err);
- }
-
- pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
-
- return sgt;
-}
-
-/* free sg_table header(a kind of superblock) */
-static void sgtable_free(struct sg_table *sgt)
-{
- if (!sgt)
- return;
-
- sg_free_table(sgt);
- kfree(sgt);
-
- pr_debug("%s: sgt:%p\n", __func__, sgt);
-}
-
-/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
-static void *vmap_sg(const struct sg_table *sgt)
-{
- u32 va;
- size_t total;
- unsigned int i;
- struct scatterlist *sg;
- struct vm_struct *new;
- const struct mem_type *mtype;
-
- mtype = get_mem_type(MT_DEVICE);
- if (!mtype)
- return ERR_PTR(-EINVAL);
-
- total = sgtable_len(sgt);
- if (!total)
- return ERR_PTR(-EINVAL);
-
- new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
- if (!new)
- return ERR_PTR(-ENOMEM);
- va = (u32)new->addr;
-
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
- size_t bytes;
- u32 pa;
- int err;
-
- pa = sg_phys(sg) - sg->offset;
- bytes = sg->length + sg->offset;
-
- BUG_ON(bytes != PAGE_SIZE);
-
- err = ioremap_page(va, pa, mtype);
- if (err)
- goto err_out;
-
- va += bytes;
- }
-
- flush_cache_vmap((unsigned long)new->addr,
- (unsigned long)(new->addr + total));
- return new->addr;
-
-err_out:
- WARN_ON(1); /* FIXME: cleanup some mpu mappings */
- vunmap(new->addr);
- return ERR_PTR(-EAGAIN);
-}
-
-static inline void vunmap_sg(const void *va)
-{
- vunmap(va);
-}
-
-static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
- const u32 da)
-{
- struct iovm_struct *tmp;
-
- list_for_each_entry(tmp, &obj->mmap, list) {
- if ((da >= tmp->da_start) && (da < tmp->da_end)) {
- size_t len;
-
- len = tmp->da_end - tmp->da_start;
-
- dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
- __func__, tmp->da_start, da, tmp->da_end, len,
- tmp->flags);
-
- return tmp;
- }
- }
-
- return NULL;
-}
-
-/**
- * omap_find_iovm_area - find iovma which includes @da
- * @dev: client device
- * @da: iommu device virtual address
- *
- * Find the existing iovma starting at @da
- */
-struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- struct iovm_struct *area;
-
- mutex_lock(&obj->mmap_lock);
- area = __find_iovm_area(obj, da);
- mutex_unlock(&obj->mmap_lock);
-
- return area;
-}
-EXPORT_SYMBOL_GPL(omap_find_iovm_area);
-
-/*
- * This finds the hole(area) which fits the requested address and len
- * in iovmas mmap, and returns the new allocated iovma.
- */
-static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da,
- size_t bytes, u32 flags)
-{
- struct iovm_struct *new, *tmp;
- u32 start, prev_end, alignment;
-
- if (!obj || !bytes)
- return ERR_PTR(-EINVAL);
-
- start = da;
- alignment = PAGE_SIZE;
-
- if (~flags & IOVMF_DA_FIXED) {
- /* Don't map address 0 */
- start = obj->da_start ? obj->da_start : alignment;
-
- if (flags & IOVMF_LINEAR)
- alignment = iopgsz_max(bytes);
- start = roundup(start, alignment);
- } else if (start < obj->da_start || start > obj->da_end ||
- obj->da_end - start < bytes) {
- return ERR_PTR(-EINVAL);
- }
-
- tmp = NULL;
- if (list_empty(&obj->mmap))
- goto found;
-
- prev_end = 0;
- list_for_each_entry(tmp, &obj->mmap, list) {
-
- if (prev_end > start)
- break;
-
- if (tmp->da_start > start && (tmp->da_start - start) >= bytes)
- goto found;
-
- if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED)
- start = roundup(tmp->da_end + 1, alignment);
-
- prev_end = tmp->da_end;
- }
-
- if ((start >= prev_end) && (obj->da_end - start >= bytes))
- goto found;
-
- dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
- __func__, da, bytes, flags);
-
- return ERR_PTR(-EINVAL);
-
-found:
- new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
- if (!new)
- return ERR_PTR(-ENOMEM);
-
- new->iommu = obj;
- new->da_start = start;
- new->da_end = start + bytes;
- new->flags = flags;
-
- /*
- * keep ascending order of iovmas
- */
- if (tmp)
- list_add_tail(&new->list, &tmp->list);
- else
- list_add(&new->list, &obj->mmap);
-
- dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
- __func__, new->da_start, start, new->da_end, bytes, flags);
-
- return new;
-}
-
-static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
-{
- size_t bytes;
-
- BUG_ON(!obj || !area);
-
- bytes = area->da_end - area->da_start;
-
- dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
- __func__, area->da_start, area->da_end, bytes, area->flags);
-
- list_del(&area->list);
- kmem_cache_free(iovm_area_cachep, area);
-}
-
-/**
- * omap_da_to_va - convert (d) to (v)
- * @dev: client device
- * @da: iommu device virtual address
- * @va: mpu virtual address
- *
- * Returns mpu virtual addr which corresponds to a given device virtual addr
- */
-void *omap_da_to_va(struct device *dev, u32 da)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- void *va = NULL;
- struct iovm_struct *area;
-
- mutex_lock(&obj->mmap_lock);
-
- area = __find_iovm_area(obj, da);
- if (!area) {
- dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
- goto out;
- }
- va = area->va;
-out:
- mutex_unlock(&obj->mmap_lock);
-
- return va;
-}
-EXPORT_SYMBOL_GPL(omap_da_to_va);
-
-static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
-{
- unsigned int i;
- struct scatterlist *sg;
- void *va = _va;
- void *va_end;
-
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
- struct page *pg;
- const size_t bytes = PAGE_SIZE;
-
- /*
- * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()'
- */
- pg = vmalloc_to_page(va);
- BUG_ON(!pg);
- sg_set_page(sg, pg, bytes, 0);
-
- va += bytes;
- }
-
- va_end = _va + PAGE_SIZE * i;
-}
-
-static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
-{
- /*
- * Actually this is not necessary at all, just exists for
- * consistency of the code readability.
- */
- BUG_ON(!sgt);
-}
-
-/* create 'da' <-> 'pa' mapping from 'sgt' */
-static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
- const struct sg_table *sgt, u32 flags)
-{
- int err;
- unsigned int i, j;
- struct scatterlist *sg;
- u32 da = new->da_start;
-
- if (!domain || !sgt)
- return -EINVAL;
-
- BUG_ON(!sgtable_ok(sgt));
-
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
- u32 pa;
- size_t bytes;
-
- pa = sg_phys(sg) - sg->offset;
- bytes = sg->length + sg->offset;
-
- flags &= ~IOVMF_PGSZ_MASK;
-
- if (bytes_to_iopgsz(bytes) < 0)
- goto err_out;
-
- pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
- i, da, pa, bytes);
-
- err = iommu_map(domain, da, pa, bytes, flags);
- if (err)
- goto err_out;
-
- da += bytes;
- }
- return 0;
-
-err_out:
- da = new->da_start;
-
- for_each_sg(sgt->sgl, sg, i, j) {
- size_t bytes;
-
- bytes = sg->length + sg->offset;
-
- /* ignore failures.. we're already handling one */
- iommu_unmap(domain, da, bytes);
-
- da += bytes;
- }
- return err;
-}
-
-/* release 'da' <-> 'pa' mapping */
-static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
- struct iovm_struct *area)
-{
- u32 start;
- size_t total = area->da_end - area->da_start;
- const struct sg_table *sgt = area->sgt;
- struct scatterlist *sg;
- int i;
- size_t unmapped;
-
- BUG_ON(!sgtable_ok(sgt));
- BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
-
- start = area->da_start;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
- size_t bytes;
-
- bytes = sg->length + sg->offset;
-
- unmapped = iommu_unmap(domain, start, bytes);
- if (unmapped < bytes)
- break;
-
- dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
- __func__, start, bytes, area->flags);
-
- BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
-
- total -= bytes;
- start += bytes;
- }
- BUG_ON(total);
-}
-
-/* template function for all unmapping */
-static struct sg_table *unmap_vm_area(struct iommu_domain *domain,
- struct omap_iommu *obj, const u32 da,
- void (*fn)(const void *), u32 flags)
-{
- struct sg_table *sgt = NULL;
- struct iovm_struct *area;
-
- if (!IS_ALIGNED(da, PAGE_SIZE)) {
- dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
- return NULL;
- }
-
- mutex_lock(&obj->mmap_lock);
-
- area = __find_iovm_area(obj, da);
- if (!area) {
- dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
- goto out;
- }
-
- if ((area->flags & flags) != flags) {
- dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
- area->flags);
- goto out;
- }
- sgt = (struct sg_table *)area->sgt;
-
- unmap_iovm_area(domain, obj, area);
-
- fn(area->va);
-
- dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
- area->da_start, da, area->da_end,
- area->da_end - area->da_start, area->flags);
-
- free_iovm_area(obj, area);
-out:
- mutex_unlock(&obj->mmap_lock);
-
- return sgt;
-}
-
-static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj,
- u32 da, const struct sg_table *sgt, void *va,
- size_t bytes, u32 flags)
-{
- int err = -ENOMEM;
- struct iovm_struct *new;
-
- mutex_lock(&obj->mmap_lock);
-
- new = alloc_iovm_area(obj, da, bytes, flags);
- if (IS_ERR(new)) {
- err = PTR_ERR(new);
- goto err_alloc_iovma;
- }
- new->va = va;
- new->sgt = sgt;
-
- if (map_iovm_area(domain, new, sgt, new->flags))
- goto err_map;
-
- mutex_unlock(&obj->mmap_lock);
-
- dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
- __func__, new->da_start, bytes, new->flags, va);
-
- return new->da_start;
-
-err_map:
- free_iovm_area(obj, new);
-err_alloc_iovma:
- mutex_unlock(&obj->mmap_lock);
- return err;
-}
-
-static inline u32
-__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
- u32 da, const struct sg_table *sgt,
- void *va, size_t bytes, u32 flags)
-{
- return map_iommu_region(domain, obj, da, sgt, va, bytes, flags);
-}
-
-/**
- * omap_iommu_vmap - (d)-(p)-(v) address mapper
- * @domain: iommu domain
- * @dev: client device
- * @sgt: address of scatter gather table
- * @flags: iovma and page property
- *
- * Creates 1-n-1 mapping with given @sgt and returns @da.
- * All @sgt element must be io page size aligned.
- */
-u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
- const struct sg_table *sgt, u32 flags)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- size_t bytes;
- void *va = NULL;
-
- if (!obj || !obj->dev || !sgt)
- return -EINVAL;
-
- bytes = sgtable_len(sgt);
- if (!bytes)
- return -EINVAL;
- bytes = PAGE_ALIGN(bytes);
-
- if (flags & IOVMF_MMIO) {
- va = vmap_sg(sgt);
- if (IS_ERR(va))
- return PTR_ERR(va);
- }
-
- flags |= IOVMF_DISCONT;
- flags |= IOVMF_MMIO;
-
- da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
- if (IS_ERR_VALUE(da))
- vunmap_sg(va);
-
- return da + sgtable_offset(sgt);
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vmap);
-
-/**
- * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()'
- * @domain: iommu domain
- * @dev: client device
- * @da: iommu device virtual address
- *
- * Free the iommu virtually contiguous memory area starting at
- * @da, which was returned by 'omap_iommu_vmap()'.
- */
-struct sg_table *
-omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- struct sg_table *sgt;
- /*
- * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
- * Just returns 'sgt' to the caller to free
- */
- da &= PAGE_MASK;
- sgt = unmap_vm_area(domain, obj, da, vunmap_sg,
- IOVMF_DISCONT | IOVMF_MMIO);
- if (!sgt)
- dev_dbg(obj->dev, "%s: No sgt\n", __func__);
- return sgt;
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
-
-/**
- * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper
- * @dev: client device
- * @da: contiguous iommu virtual memory
- * @bytes: allocation size
- * @flags: iovma and page property
- *
- * Allocate @bytes linearly and creates 1-n-1 mapping and returns
- * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
- */
-u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
- size_t bytes, u32 flags)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- void *va;
- struct sg_table *sgt;
-
- if (!obj || !obj->dev || !bytes)
- return -EINVAL;
-
- bytes = PAGE_ALIGN(bytes);
-
- va = vmalloc(bytes);
- if (!va)
- return -ENOMEM;
-
- flags |= IOVMF_DISCONT;
- flags |= IOVMF_ALLOC;
-
- sgt = sgtable_alloc(bytes, flags, da, 0);
- if (IS_ERR(sgt)) {
- da = PTR_ERR(sgt);
- goto err_sgt_alloc;
- }
- sgtable_fill_vmalloc(sgt, va);
-
- da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
- if (IS_ERR_VALUE(da))
- goto err_iommu_vmap;
-
- return da;
-
-err_iommu_vmap:
- sgtable_drain_vmalloc(sgt);
- sgtable_free(sgt);
-err_sgt_alloc:
- vfree(va);
- return da;
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
-
-/**
- * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()'
- * @dev: client device
- * @da: iommu device virtual address
- *
- * Frees the iommu virtually continuous memory area starting at
- * @da, as obtained from 'omap_iommu_vmalloc()'.
- */
-void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
- const u32 da)
-{
- struct omap_iommu *obj = dev_to_omap_iommu(dev);
- struct sg_table *sgt;
-
- sgt = unmap_vm_area(domain, obj, da, vfree,
- IOVMF_DISCONT | IOVMF_ALLOC);
- if (!sgt)
- dev_dbg(obj->dev, "%s: No sgt\n", __func__);
- sgtable_free(sgt);
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vfree);
-
-static int __init iovmm_init(void)
-{
- const unsigned long flags = SLAB_HWCACHE_ALIGN;
- struct kmem_cache *p;
-
- p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
- flags, NULL);
- if (!p)
- return -ENOMEM;
- iovm_area_cachep = p;
-
- return 0;
-}
-module_init(iovmm_init);
-
-static void __exit iovmm_exit(void)
-{
- kmem_cache_destroy(iovm_area_cachep);
-}
-module_exit(iovmm_exit);
-
-MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
-MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h
deleted file mode 100644
index 352d80a..0000000
--- a/drivers/iommu/pci.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright (C) 2013 Red Hat, Inc.
- * Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
- */
-#ifndef __IOMMU_PCI_H
-#define __IOMMU_PCI_H
-
-/* Helper function for swapping pci device reference */
-static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
-{
- pci_dev_put(*from);
- *from = to;
-}
-
-#endif /* __IOMMU_PCI_H */
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index 464acda..1333e6fb 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c
@@ -354,7 +354,7 @@ static int shmobile_iommu_add_device(struct device *dev)
return 0;
}
-static struct iommu_ops shmobile_iommu_ops = {
+static const struct iommu_ops shmobile_iommu_ops = {
.domain_init = shmobile_iommu_domain_init,
.domain_destroy = shmobile_iommu_domain_destroy,
.attach_dev = shmobile_iommu_attach_device,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index dba1a9f..b10a8ec 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -309,7 +309,7 @@ static int gart_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
-static struct iommu_ops gart_iommu_ops = {
+static const struct iommu_ops gart_iommu_ops = {
.domain_init = gart_iommu_domain_init,
.domain_destroy = gart_iommu_domain_destroy,
.attach_dev = gart_iommu_attach_dev,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 605b5b4..792da5e 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -947,7 +947,7 @@ static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
dev_dbg(smmu->dev, "smmu_as@%p\n", as);
}
-static struct iommu_ops smmu_iommu_ops = {
+static const struct iommu_ops smmu_iommu_ops = {
.domain_init = smmu_iommu_domain_init,
.domain_destroy = smmu_iommu_domain_destroy,
.attach_dev = smmu_iommu_attach_dev,
OpenPOWER on IntegriCloud