diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 609 |
1 files changed, 237 insertions, 372 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c0dce78586a..31bc50e538a3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -17,7 +17,6 @@ #include <linux/dma-direct.h> #include <linux/dma-iommu.h> #include <linux/dmi.h> -#include <linux/intel-iommu.h> #include <linux/intel-svm.h> #include <linux/memory.h> #include <linux/pci.h> @@ -26,6 +25,7 @@ #include <linux/syscore_ops.h> #include <linux/tboot.h> +#include "iommu.h" #include "../irq_remapping.h" #include "../iommu-sva-lib.h" #include "pasid.h" @@ -126,13 +126,8 @@ static inline unsigned long virt_to_dma_pfn(void *p) return page_to_dma_pfn(virt_to_page(p)); } -/* global iommu list, set NULL for ignored DMAR units */ -static struct intel_iommu **g_iommus; - static void __init check_tylersburg_isoch(void); static int rwbf_quirk; -static inline struct device_domain_info * -dmar_search_domain_by_dev_info(int segment, int bus, int devfn); /* * set to 1 to panic kernel if can't successfully enable VT-d @@ -168,38 +163,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } -static inline void context_clear_pasid_enable(struct context_entry *context) -{ - context->lo &= ~(1ULL << 11); -} - -static inline bool context_pasid_enabled(struct context_entry *context) -{ - return !!(context->lo & (1ULL << 11)); -} - -static inline void context_set_copied(struct context_entry *context) -{ - context->hi |= (1ull << 3); -} - -static inline bool context_copied(struct context_entry *context) -{ - return !!(context->hi & (1ULL << 3)); -} - -static inline bool __context_present(struct context_entry *context) -{ - return (context->lo & 1); -} - -bool context_present(struct context_entry *context) -{ - return context_pasid_enabled(context) ? - __context_present(context) : - __context_present(context) && !context_copied(context); -} - static inline void context_set_present(struct context_entry *context) { context->lo |= 1; @@ -247,6 +210,26 @@ static inline void context_clear_entry(struct context_entry *context) context->hi = 0; } +static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + if (!iommu->copied_tables) + return false; + + return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + set_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + +static inline void +clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -256,10 +239,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -#define for_each_domain_iommu(idx, domain) \ - for (idx = 0; idx < g_num_of_iommus; idx++) \ - if (domain->iommu_refcnt[idx]) - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -293,12 +272,7 @@ static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -/* bitmap for indexing intel_iommus */ -static int g_num_of_iommus; - -static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); -static void __dmar_remove_one_dev_info(struct device_domain_info *info); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -314,12 +288,6 @@ static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -int intel_iommu_gfx_mapped; -EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); - -DEFINE_SPINLOCK(device_domain_lock); -static LIST_HEAD(device_domain_list); - const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) @@ -422,14 +390,36 @@ static inline int domain_pfn_supported(struct dmar_domain *domain, return !(addr_width < BITS_PER_LONG && pfn >> addr_width); } +/* + * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. + * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of + * the returned SAGAW. + */ +static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) +{ + unsigned long fl_sagaw, sl_sagaw; + + fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0); + sl_sagaw = cap_sagaw(iommu->cap); + + /* Second level only. */ + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) + return sl_sagaw; + + /* First level only. */ + if (!ecap_slts(iommu->ecap)) + return fl_sagaw; + + return fl_sagaw & sl_sagaw; +} + static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; int agaw; - sagaw = cap_sagaw(iommu->cap); - for (agaw = width_to_agaw(max_gaw); - agaw >= 0; agaw--) { + sagaw = __iommu_calculate_sagaw(iommu); + for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { if (test_bit(agaw, &sagaw)) break; } @@ -455,24 +445,6 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -/* This functionin only returns single iommu in a domain */ -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) -{ - int iommu_id; - - /* si_domain and vm domain should not get here. */ - if (WARN_ON(!iommu_is_dma_domain(&domain->domain))) - return NULL; - - for_each_domain_iommu(iommu_id, domain) - break; - - if (iommu_id < 0 || iommu_id >= g_num_of_iommus) - return NULL; - - return g_iommus[iommu_id]; -} - static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) { return sm_supported(iommu) ? @@ -481,16 +453,16 @@ static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) static void domain_update_iommu_coherency(struct dmar_domain *domain) { + struct iommu_domain_info *info; struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; bool found = false; - int i; + unsigned long i; domain->iommu_coherency = true; - - for_each_domain_iommu(i, domain) { + xa_for_each(&domain->iommu_array, i, info) { found = true; - if (!iommu_paging_structure_coherency(g_iommus[i])) { + if (!iommu_paging_structure_coherency(info->iommu)) { domain->iommu_coherency = false; break; } @@ -543,16 +515,10 @@ static int domain_update_device_node(struct dmar_domain *domain) { struct device_domain_info *info; int nid = NUMA_NO_NODE; + unsigned long flags; - assert_spin_locked(&device_domain_lock); - - if (list_empty(&domain->devices)) - return NUMA_NO_NODE; - + spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) { - if (!info->dev) - continue; - /* * There could possibly be multiple device numa nodes as devices * within the same domain may sit behind different IOMMUs. There @@ -563,6 +529,7 @@ static int domain_update_device_node(struct dmar_domain *domain) if (nid != NUMA_NO_NODE) break; } + spin_unlock_irqrestore(&domain->lock, flags); return nid; } @@ -622,6 +589,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, struct context_entry *context; u64 *entry; + /* + * Except that the caller requested to allocate a new entry, + * returning a copied context entry makes no sense. + */ + if (!alloc && context_copied(iommu, bus, devfn)) + return NULL; + entry = &root->lo; if (sm_supported(iommu)) { if (devfn >= 0x80) { @@ -804,26 +778,23 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) { struct context_entry *context; int ret = 0; - unsigned long flags; - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (context) ret = context_present(context); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); return ret; } static void free_context_table(struct intel_iommu *iommu) { - int i; - unsigned long flags; struct context_entry *context; + int i; + + if (!iommu->root_entry) + return; - spin_lock_irqsave(&iommu->lock, flags); - if (!iommu->root_entry) { - goto out; - } for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) @@ -835,36 +806,18 @@ static void free_context_table(struct intel_iommu *iommu) context = iommu_context_addr(iommu, i, 0x80, 0); if (context) free_pgtable_page(context); - } + free_pgtable_page(iommu->root_entry); iommu->root_entry = NULL; -out: - spin_unlock_irqrestore(&iommu->lock, flags); } #ifdef CONFIG_DMAR_DEBUG -static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn) +static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, + u8 bus, u8 devfn, struct dma_pte *parent, int level) { - struct device_domain_info *info; - struct dma_pte *parent, *pte; - struct dmar_domain *domain; - int offset, level; - - info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); - if (!info || !info->domain) { - pr_info("device [%02x:%02x.%d] not probed\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return; - } - - domain = info->domain; - level = agaw_to_level(domain->agaw); - parent = domain->pgd; - if (!parent) { - pr_info("no page table setup\n"); - return; - } + struct dma_pte *pte; + int offset; while (1) { offset = pfn_level_offset(pfn, level); @@ -891,9 +844,10 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, struct pasid_entry *entries, *pte; struct context_entry *ctx_entry; struct root_entry *rt_entry; + int i, dir_index, index, level; u8 devfn = source_id & 0xff; u8 bus = source_id >> 8; - int i, dir_index, index; + struct dma_pte *pgtable; pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); @@ -921,8 +875,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, ctx_entry->hi, ctx_entry->lo); /* legacy mode does not require PASID entries */ - if (!sm_supported(iommu)) + if (!sm_supported(iommu)) { + level = agaw_to_level(ctx_entry->hi & 7); + pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); goto pgtable_walk; + } /* get the pointer to pasid directory entry */ dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); @@ -949,8 +906,16 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, for (i = 0; i < ARRAY_SIZE(pte->val); i++) pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); + if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { + level = pte->val[2] & BIT_ULL(2) ? 5 : 4; + pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); + } else { + level = agaw_to_level((pte->val[0] >> 2) & 0x7); + pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); + } + pgtable_walk: - pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn); + pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); } #endif @@ -1234,7 +1199,6 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - unsigned long flags; root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) { @@ -1244,10 +1208,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) } __iommu_flush_cache(iommu, root, ROOT_SIZE); - - spin_lock_irqsave(&iommu->lock, flags); iommu->root_entry = root; - spin_unlock_irqrestore(&iommu->lock, flags); return 0; } @@ -1389,23 +1350,24 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, } static struct device_domain_info * -iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, - u8 bus, u8 devfn) +iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu, + u8 bus, u8 devfn) { struct device_domain_info *info; - - assert_spin_locked(&device_domain_lock); + unsigned long flags; if (!iommu->qi) return NULL; - list_for_each_entry(info, &domain->devices, link) + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(info, &domain->devices, link) { if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - if (info->ats_supported && info->dev) - return info; - break; + spin_unlock_irqrestore(&domain->lock, flags); + return info->ats_supported ? info : NULL; } + } + spin_unlock_irqrestore(&domain->lock, flags); return NULL; } @@ -1414,24 +1376,23 @@ static void domain_update_iotlb(struct dmar_domain *domain) { struct device_domain_info *info; bool has_iotlb_device = false; + unsigned long flags; - assert_spin_locked(&device_domain_lock); - - list_for_each_entry(info, &domain->devices, link) + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(info, &domain->devices, link) { if (info->ats_enabled) { has_iotlb_device = true; break; } - + } domain->has_iotlb_device = has_iotlb_device; + spin_unlock_irqrestore(&domain->lock, flags); } static void iommu_enable_dev_iotlb(struct device_domain_info *info) { struct pci_dev *pdev; - assert_spin_locked(&device_domain_lock); - if (!info || !dev_is_pci(info->dev)) return; @@ -1477,8 +1438,6 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) { struct pci_dev *pdev; - assert_spin_locked(&device_domain_lock); - if (!dev_is_pci(info->dev)) return; @@ -1518,17 +1477,16 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { - unsigned long flags; struct device_domain_info *info; + unsigned long flags; if (!domain->has_iotlb_device) return; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); - - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, @@ -1539,7 +1497,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned int aligned_pages = __roundup_pow_of_two(pages); unsigned int mask = ilog2(aligned_pages); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - u16 did = domain->iommu_did[iommu->seq_id]; + u16 did = domain_id_iommu(domain, iommu); BUG_ON(pages == 0); @@ -1609,11 +1567,12 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - int idx; + struct iommu_domain_info *info; + unsigned long idx; - for_each_domain_iommu(idx, dmar_domain) { - struct intel_iommu *iommu = g_iommus[idx]; - u16 did = dmar_domain->iommu_did[iommu->seq_id]; + xa_for_each(&dmar_domain->iommu_array, idx, info) { + struct intel_iommu *iommu = info->iommu; + u16 did = domain_id_iommu(dmar_domain, iommu); if (domain_use_first_level(dmar_domain)) qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); @@ -1719,23 +1678,16 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void disable_dmar_iommu(struct intel_iommu *iommu) { - struct device_domain_info *info, *tmp; - unsigned long flags; - if (!iommu->domain_ids) return; - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry_safe(info, tmp, &device_domain_list, global) { - if (info->iommu != iommu) - continue; - - if (!info->dev || !info->domain) - continue; - - __dmar_remove_one_dev_info(info); - } - spin_unlock_irqrestore(&device_domain_lock, flags); + /* + * All iommu domains must have been detached from the devices, + * hence there should be no domain IDs in use. + */ + if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap)) + > NUM_RESERVED_DID)) + return; if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); @@ -1748,7 +1700,10 @@ static void free_dmar_iommu(struct intel_iommu *iommu) iommu->domain_ids = NULL; } - g_iommus[iommu->seq_id] = NULL; + if (iommu->copied_tables) { + bitmap_free(iommu->copied_tables); + iommu->copied_tables = NULL; + } /* free context mapping */ free_context_table(iommu); @@ -1795,55 +1750,77 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + spin_lock_init(&domain->lock); + xa_init(&domain->iommu_array); return domain; } -/* Must be called with iommu->lock */ static int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { + struct iommu_domain_info *info, *curr; unsigned long ndomains; - int num; + int num, ret = -ENOSPC; - assert_spin_locked(&device_domain_lock); - assert_spin_locked(&iommu->lock); + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; - domain->iommu_refcnt[iommu->seq_id] += 1; - if (domain->iommu_refcnt[iommu->seq_id] == 1) { - ndomains = cap_ndoms(iommu->cap); - num = find_first_zero_bit(iommu->domain_ids, ndomains); + spin_lock(&iommu->lock); + curr = xa_load(&domain->iommu_array, iommu->seq_id); + if (curr) { + curr->refcnt++; + spin_unlock(&iommu->lock); + kfree(info); + return 0; + } - if (num >= ndomains) { - pr_err("%s: No free domain ids\n", iommu->name); - domain->iommu_refcnt[iommu->seq_id] -= 1; - return -ENOSPC; - } + ndomains = cap_ndoms(iommu->cap); + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + pr_err("%s: No free domain ids\n", iommu->name); + goto err_unlock; + } - set_bit(num, iommu->domain_ids); - domain->iommu_did[iommu->seq_id] = num; - domain->nid = iommu->node; - domain_update_iommu_cap(domain); + set_bit(num, iommu->domain_ids); + info->refcnt = 1; + info->did = num; + info->iommu = iommu; + curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, + NULL, info, GFP_ATOMIC); + if (curr) { + ret = xa_err(curr) ? : -EBUSY; + goto err_clear; } + domain_update_iommu_cap(domain); + spin_unlock(&iommu->lock); return 0; + +err_clear: + clear_bit(info->did, iommu->domain_ids); +err_unlock: + spin_unlock(&iommu->lock); + kfree(info); + return ret; } static void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { - int num; + struct iommu_domain_info *info; - assert_spin_locked(&device_domain_lock); - assert_spin_locked(&iommu->lock); - - domain->iommu_refcnt[iommu->seq_id] -= 1; - if (domain->iommu_refcnt[iommu->seq_id] == 0) { - num = domain->iommu_did[iommu->seq_id]; - clear_bit(num, iommu->domain_ids); + spin_lock(&iommu->lock); + info = xa_load(&domain->iommu_array, iommu->seq_id); + if (--info->refcnt == 0) { + clear_bit(info->did, iommu->domain_ids); + xa_erase(&domain->iommu_array, iommu->seq_id); + domain->nid = NUMA_NO_NODE; domain_update_iommu_cap(domain); - domain->iommu_did[iommu->seq_id] = 0; + kfree(info); } + spin_unlock(&iommu->lock); } static inline int guestwidth_to_adjustwidth(int gaw) @@ -1862,10 +1839,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) static void domain_exit(struct dmar_domain *domain) { - - /* Remove associated devices and clear attached or cached domains */ - domain_remove_dev_info(domain); - if (domain->pgd) { LIST_HEAD(freelist); @@ -1873,6 +1846,9 @@ static void domain_exit(struct dmar_domain *domain) put_pages_list(&freelist); } + if (WARN_ON(!list_empty(&domain->devices))) + return; + kfree(domain); } @@ -1930,11 +1906,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct pasid_table *table, u8 bus, u8 devfn) { - u16 did = domain->iommu_did[iommu->seq_id]; + struct device_domain_info *info = + iommu_support_dev_iotlb(domain, iommu, bus, devfn); + u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; - struct device_domain_info *info = NULL; struct context_entry *context; - unsigned long flags; int ret; WARN_ON(did == 0); @@ -1947,16 +1923,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, BUG_ON(!domain->pgd); - spin_lock_irqsave(&device_domain_lock, flags); spin_lock(&iommu->lock); - ret = -ENOMEM; context = iommu_context_addr(iommu, bus, devfn, 1); if (!context) goto out_unlock; ret = 0; - if (context_present(context)) + if (context_present(context) && !context_copied(iommu, bus, devfn)) goto out_unlock; /* @@ -1968,7 +1942,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * in-flight DMA will exist, and we don't need to worry anymore * hereafter. */ - if (context_copied(context)) { + if (context_copied(iommu, bus, devfn)) { u16 did_old = context_domain_id(context); if (did_old < cap_ndoms(iommu->cap)) { @@ -1979,6 +1953,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, iommu->flush.flush_iotlb(iommu, did_old, 0, 0, DMA_TLB_DSI_FLUSH); } + + clear_context_copied(iommu, bus, devfn); } context_clear_entry(context); @@ -2000,7 +1976,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * Setup the Device-TLB enable bit and Page request * Enable bit: */ - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); if (info && info->ats_supported) context_set_sm_dte(context); if (info && info->pri_supported) @@ -2023,7 +1998,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, goto out_unlock; } - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); if (info && info->ats_supported) translation = CONTEXT_TT_DEV_IOTLB; else @@ -2069,7 +2043,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, out_unlock: spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); return ret; } @@ -2186,8 +2159,9 @@ static void switch_to_super_page(struct dmar_domain *domain, unsigned long end_pfn, int level) { unsigned long lvl_pages = lvl_to_nr_pages(level); + struct iommu_domain_info *info; struct dma_pte *pte = NULL; - int i; + unsigned long i; while (start_pfn <= end_pfn) { if (!pte) @@ -2198,8 +2172,8 @@ static void switch_to_super_page(struct dmar_domain *domain, start_pfn + lvl_pages - 1, level + 1); - for_each_domain_iommu(i, domain) - iommu_flush_iotlb_psi(g_iommus[i], domain, + xa_for_each(&domain->iommu_array, i, info) + iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); } @@ -2313,16 +2287,15 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; - unsigned long flags; u16 did_old; if (!iommu) return; - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) { - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); return; } @@ -2330,14 +2303,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 if (hw_pass_through && domain_type_is_si(info->domain)) did_old = FLPT_DEFAULT_DID; else - did_old = info->domain->iommu_did[iommu->seq_id]; + did_old = domain_id_iommu(info->domain, iommu); } else { did_old = context_domain_id(context); } context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, @@ -2356,30 +2329,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); } -static void domain_remove_dev_info(struct dmar_domain *domain) -{ - struct device_domain_info *info, *tmp; - unsigned long flags; - - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry_safe(info, tmp, &domain->devices, link) - __dmar_remove_one_dev_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags); -} - -static inline struct device_domain_info * -dmar_search_domain_by_dev_info(int segment, int bus, int devfn) -{ - struct device_domain_info *info; - - list_for_each_entry(info, &device_domain_list, global) - if (info->segment == segment && info->bus == bus && - info->devfn == devfn) - return info; - - return NULL; -} - static int domain_setup_first_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, @@ -2412,7 +2361,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags |= PASID_FLAG_PAGE_SNOOP; return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, - domain->iommu_did[iommu->seq_id], + domain_id_iommu(domain, iommu), flags); } @@ -2507,17 +2456,13 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) if (!iommu) return -ENODEV; - spin_lock_irqsave(&device_domain_lock, flags); - info->domain = domain; - spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); - spin_unlock(&iommu->lock); - if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); + if (ret) return ret; - } + info->domain = domain; + spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { @@ -2529,7 +2474,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) } /* Setup the PASID entry for requests without PASID: */ - spin_lock_irqsave(&iommu->lock, flags); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2539,7 +2483,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock_irqrestore(&iommu->lock, flags); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); @@ -2761,32 +2704,14 @@ static int copy_context_table(struct intel_iommu *iommu, /* Now copy the context entry */ memcpy(&ce, old_ce + idx, sizeof(ce)); - if (!__context_present(&ce)) + if (!context_present(&ce)) continue; did = context_domain_id(&ce); if (did >= 0 && did < cap_ndoms(iommu->cap)) set_bit(did, iommu->domain_ids); - /* - * We need a marker for copied context entries. This - * marker needs to work for the old format as well as - * for extended context entries. - * - * Bit 67 of the context entry is used. In the old - * format this bit is available to software, in the - * extended format it is the PGE bit, but PGE is ignored - * by HW if PASIDs are disabled (and thus still - * available). - * - * So disable PASIDs first and then mark the entry - * copied. This means that we don't copy PASID - * translations from the old kernel, but this is fine as - * faults there are not fatal. - */ - context_clear_pasid_enable(&ce); - context_set_copied(&ce); - + set_context_copied(iommu, bus, devfn); new_ce[idx] = ce; } @@ -2807,14 +2732,13 @@ static int copy_translation_tables(struct intel_iommu *iommu) struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; - unsigned long flags; u64 rtaddr_reg; int bus, ret; bool new_ext, ext; rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); - ext = !!(rtaddr_reg & DMA_RTADDR_RTT); - new_ext = !!ecap_ecs(iommu->ecap); + ext = !!(rtaddr_reg & DMA_RTADDR_SMT); + new_ext = !!sm_supported(iommu); /* * The RTT bit can only be changed when translation is disabled, @@ -2825,6 +2749,10 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (new_ext != ext) return -EINVAL; + iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); + if (!iommu->copied_tables) + return -ENOMEM; + old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; if (!old_rt_phys) return -EINVAL; @@ -2850,7 +2778,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) } } - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); /* Context tables are copied, now write them to the root_entry table */ for (bus = 0; bus < 256; bus++) { @@ -2869,7 +2797,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) iommu->root_entry[bus].hi = val; } - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); kfree(ctxt_tbls); @@ -2968,36 +2896,6 @@ static int __init init_dmars(void) struct intel_iommu *iommu; int ret; - /* - * for each drhd - * allocate root - * initialize and program root entry to not present - * endfor - */ - for_each_drhd_unit(drhd) { - /* - * lock not needed as this is only incremented in the single - * threaded kernel __init code path all other access are read - * only - */ - if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { - g_num_of_iommus++; - continue; - } - pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED); - } - - /* Preallocate enough resources for IOMMU hot-addition */ - if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) - g_num_of_iommus = DMAR_UNITS_SUPPORTED; - - g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), - GFP_KERNEL); - if (!g_iommus) { - ret = -ENOMEM; - goto error; - } - ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); if (ret) goto free_iommu; @@ -3020,8 +2918,6 @@ static int __init init_dmars(void) intel_pasid_max_id); } - g_iommus[iommu->seq_id] = iommu; - intel_iommu_init_qi(iommu); ret = iommu_init_domains(iommu); @@ -3147,9 +3043,6 @@ free_iommu: free_dmar_iommu(iommu); } - kfree(g_iommus); - -error: return ret; } @@ -3530,9 +3423,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) int sp, ret; struct intel_iommu *iommu = dmaru->iommu; - if (g_iommus[iommu->seq_id]) - return 0; - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); if (ret) goto out; @@ -3556,7 +3446,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - g_iommus[iommu->seq_id] = iommu; ret = iommu_init_domains(iommu); if (ret == 0) ret = iommu_alloc_root_entry(iommu); @@ -4022,6 +3911,20 @@ static int __init probe_acpi_namespace_devices(void) return 0; } +static __init int tboot_force_iommu(void) +{ + if (!tboot_enabled()) + return 0; + + if (no_iommu || dmar_disabled) + pr_warn("Forcing Intel-IOMMU to enabled\n"); + + dmar_disabled = 0; + no_iommu = 0; + + return 1; +} + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -4093,9 +3996,6 @@ int __init intel_iommu_init(void) if (list_empty(&dmar_satc_units)) pr_info("No SATC found\n"); - if (dmar_map_gfx) - intel_iommu_gfx_mapped = 1; - init_no_remapping_devices(); ret = init_dmars(); @@ -4181,21 +4081,14 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void __dmar_remove_one_dev_info(struct device_domain_info *info) +static void dmar_remove_one_dev_info(struct device *dev) { - struct dmar_domain *domain; - struct intel_iommu *iommu; + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *domain = info->domain; + struct intel_iommu *iommu = info->iommu; unsigned long flags; - assert_spin_locked(&device_domain_lock); - - if (WARN_ON(!info)) - return; - - iommu = info->iommu; - domain = info->domain; - - if (info->dev && !dev_is_real_dma_subdevice(info->dev)) { + if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, PASID_RID2PASID, false); @@ -4205,23 +4098,12 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) intel_pasid_free_table(info->dev); } + spin_lock_irqsave(&domain->lock, flags); list_del(&info->link); + spin_unlock_irqrestore(&domain->lock, flags); - spin_lock_irqsave(&iommu->lock, flags); domain_detach_iommu(domain, iommu); - spin_unlock_irqrestore(&iommu->lock, flags); -} - -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info; - unsigned long flags; - - spin_lock_irqsave(&device_domain_lock, flags); - info = dev_iommu_priv_get(dev); - if (info) - __dmar_remove_one_dev_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags); + info->domain = NULL; } static int md_domain_init(struct dmar_domain *domain, int guest_width) @@ -4466,15 +4348,16 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long iova_pfn = IOVA_PFN(gather->start); size_t size = gather->end - gather->start; + struct iommu_domain_info *info; unsigned long start_pfn; unsigned long nrpages; - int iommu_id; + unsigned long i; nrpages = aligned_nrpages(gather->start, size); start_pfn = mm_to_dma_pfn(iova_pfn); - for_each_domain_iommu(iommu_id, dmar_domain) - iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain, + xa_for_each(&dmar_domain->iommu_array, i, info) + iommu_flush_iotlb_psi(info->iommu, dmar_domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); @@ -4503,7 +4386,7 @@ static bool domain_support_force_snooping(struct dmar_domain *domain) struct device_domain_info *info; bool support = true; - assert_spin_locked(&device_domain_lock); + assert_spin_locked(&domain->lock); list_for_each_entry(info, &domain->devices, link) { if (!ecap_sc_support(info->iommu->ecap)) { support = false; @@ -4518,8 +4401,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) { struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); - + assert_spin_locked(&domain->lock); /* * Second level page table supports per-PTE snoop control. The * iommu_map() interface will handle this by setting SNP bit. @@ -4542,15 +4424,15 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) if (dmar_domain->force_snooping) return true; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock_irqsave(&dmar_domain->lock, flags); if (!domain_support_force_snooping(dmar_domain)) { - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock_irqrestore(&dmar_domain->lock, flags); return false; } domain_set_force_snooping(dmar_domain); dmar_domain->force_snooping = true; - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock_irqrestore(&dmar_domain->lock, flags); return true; } @@ -4572,7 +4454,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; struct device_domain_info *info; struct intel_iommu *iommu; - unsigned long flags; u8 bus, devfn; iommu = device_to_iommu(dev, &bus, &devfn); @@ -4615,10 +4496,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } } - spin_lock_irqsave(&device_domain_lock, flags); - list_add(&info->global, &device_domain_list); dev_iommu_priv_set(dev, info); - spin_unlock_irqrestore(&device_domain_lock, flags); return &iommu->iommu; } @@ -4626,15 +4504,9 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - unsigned long flags; dmar_remove_one_dev_info(dev); - - spin_lock_irqsave(&device_domain_lock, flags); dev_iommu_priv_set(dev, NULL); - list_del(&info->global); - spin_unlock_irqrestore(&device_domain_lock, flags); - kfree(info); set_dma_ops(dev, NULL); } @@ -4707,7 +4579,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct context_entry *context; struct dmar_domain *domain; - unsigned long flags; u64 ctx_lo; int ret; @@ -4715,9 +4586,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) if (!domain) return -EINVAL; - spin_lock_irqsave(&device_domain_lock, flags); spin_lock(&iommu->lock); - ret = -EINVAL; if (!info->pasid_supported) goto out; @@ -4733,7 +4602,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, - domain->iommu_did[iommu->seq_id], + domain_id_iommu(domain, iommu), PCI_DEVID(info->bus, info->devfn), DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); @@ -4747,7 +4616,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) out: spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); return ret; } @@ -4871,13 +4739,11 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long pages = aligned_nrpages(iova, size); unsigned long pfn = iova >> VTD_PAGE_SHIFT; - struct intel_iommu *iommu; - int iommu_id; + struct iommu_domain_info *info; + unsigned long i; - for_each_domain_iommu(iommu_id, dmar_domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, dmar_domain, pfn, pages); - } + xa_for_each(&dmar_domain->iommu_array, i, info) + __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } const struct iommu_ops intel_iommu_ops = { @@ -4887,7 +4753,6 @@ const struct iommu_ops intel_iommu_ops = { .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .device_group = intel_iommu_device_group, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, |