summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h24
-rw-r--r--arch/x86/include/asm/dma-mapping.h6
-rw-r--r--arch/x86/include/asm/ds.h6
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/vmi.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/amd_iommu.c54
-rw-r--r--arch/x86/kernel/amd_iommu_init.c7
-rw-r--r--arch/x86/kernel/apic.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c3
-rw-r--r--arch/x86/kernel/ds.c88
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c789
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/microcode_core.c19
-rw-r--r--arch/x86/kernel/microcode_intel.c6
-rw-r--r--arch/x86/kernel/mpparse.c3
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c6
-rw-r--r--arch/x86/kernel/setup.c14
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/vmi_32.c16
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/op_model_ppro.c6
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
47 files changed, 806 insertions, 439 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7..e4c038a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -238,6 +238,28 @@ config X86_HAS_BOOT_CPU_ID
def_bool y
depends on X86_VOYAGER
+config SPARSE_IRQ
+ bool "Support sparse irq numbering"
+ depends on PCI_MSI || HT_IRQ
+ help
+ This enables support for sparse irqs. This is useful for distro
+ kernels that want to define a high CONFIG_NR_CPUS value but still
+ want to have low kernel memory footprint on smaller machines.
+
+ ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
+ out the irq_desc[] array in a more NUMA-friendly way. )
+
+ If you don't know what to do here, say N.
+
+config NUMA_MIGRATE_IRQ_DESC
+ bool "Move irq desc when changing irq smp_affinity"
+ depends on SPARSE_IRQ && NUMA
+ default n
+ help
+ This enables moving irq_desc to cpu/node that irq will use handled.
+
+ If you don't know what to do here, say N.
+
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664..8e99073 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -520,6 +520,7 @@ config X86_PTRACE_BTS
bool "Branch Trace Store"
default y
depends on X86_DEBUGCTLMSR
+ depends on BROKEN
help
This adds a ptrace interface to the hardware's branch trace store.
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b3..7e8e8b2 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
{
u8 pending;
asm volatile("int $0x16; setnz %0"
- : "=rm" (pending)
+ : "=qm" (pending)
: "a" (0x0100));
return pending;
}
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c04..ac302a2 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
/* Pointer to PCI device of this IOMMU */
struct pci_dev *dev;
- /*
- * Capability pointer. There could be more than one IOMMU per PCI
- * device function if there are more than one AMD IOMMU capability
- * pointers.
- */
- u16 cap_ptr;
-
/* physical address of MMIO space */
u64 mmio_phys;
/* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */
u32 cap;
+ /*
+ * Capability pointer. There could be more than one IOMMU per PCI
+ * device function if there are more than one AMD IOMMU capability
+ * pointers.
+ */
+ u16 cap_ptr;
+
/* pci domain of this IOMMU */
u16 pci_seg;
@@ -284,19 +284,19 @@ struct amd_iommu {
/* size of command buffer */
u32 cmd_buf_size;
- /* event buffer virtual address */
- u8 *evt_buf;
/* size of event buffer */
u32 evt_buf_size;
+ /* event buffer virtual address */
+ u8 *evt_buf;
/* MSI number for event interrupt */
u16 evt_msi_num;
- /* if one, we need to send a completion wait command */
- int need_sync;
-
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
+ /* if one, we need to send a completion wait command */
+ int need_sync;
+
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
};
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 7f225a4..097794f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
/* Make sure we keep the same behaviour */
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
-#ifdef CONFIG_X86_32
- return 0;
-#else
+#ifdef CONFIG_X86_64
struct dma_mapping_ops *ops = get_dma_ops(dev);
if (ops->mapping_error)
return ops->mapping_error(dev, dma_addr);
- return (dma_addr == bad_dma_address);
#endif
+ return (dma_addr == bad_dma_address);
}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a19..a950084 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -23,12 +23,13 @@
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
-#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
+#ifdef CONFIG_X86_DS
+
struct task_struct;
/*
@@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd993..25d527c 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void);
extern void reinit_intr_remapped_IO_APIC(int);
#endif
-extern int probe_nr_irqs(void);
+extern void probe_nr_irqs_gsi(void);
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void) { }
+static inline void ioapic_init_mappings(void) { }
-static inline int probe_nr_irqs(void)
-{
- return NR_IRQS;
-}
+static inline void probe_nr_irqs_gsi(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb..f7ff650 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,23 @@
#define LAST_VM86_IRQ 15
#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+#define NR_IRQS_LEGACY 16
+
#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+
+#ifndef CONFIG_SPARSE_IRQ
# if NR_CPUS < MAX_IO_APICS
# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
# else
# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
# endif
+#else
+# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+# else
+# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+#endif
#elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995..d02d936 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
*/
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
-
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
-#else
-/* No IOMMU */
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8..eefb059 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#define __ARCH_WANT_COMPAT_SYS_PTRACE
-
#endif /* __KERNEL__ */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4850e4b..ff386ff 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -239,7 +239,7 @@ struct pci_bus;
void set_pci_bus_resources_arch_default(struct pci_bus *b);
#ifdef CONFIG_SMP
-#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
+#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
#define smt_capable() (smp_num_siblings > 1)
#endif
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
index b7c0dea..61e08c0 100644
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@@ -223,9 +223,15 @@ struct pci_header {
} __attribute__((packed));
/* Function prototypes for bootstrapping */
+#ifdef CONFIG_VMI
extern void vmi_init(void);
+extern void vmi_activate(void);
extern void vmi_bringup(void);
-extern void vmi_apply_boot_page_allocations(void);
+#else
+static inline void vmi_init(void) {}
+static inline void vmi_activate(void) {}
+static inline void vmi_bringup(void) {}
+#endif
/* State needed to start an application processor in an SMP system. */
struct vmi_ap_state {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9..b62a766 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
-obj-y += ds.o
+obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e4899e0..0a60d60 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
+ if (!ret)
+ iommu->need_sync = 1;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
- iommu->need_sync = 0;
-
spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->need_sync)
+ goto out;
+
+ iommu->need_sync = 0;
+
ret = __iommu_queue_command(iommu, &cmd);
if (ret)
@@ -230,8 +235,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
- printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
+ if (unlikely(i == EXIT_LOOP_COUNT))
+ panic("AMD IOMMU: Completion wait loop failed\n");
+
out:
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -254,8 +260,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -281,8 +285,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -343,7 +345,7 @@ static int iommu_map(struct protection_domain *dom,
u64 __pte, *pte, *page;
bus_addr = PAGE_ALIGN(bus_addr);
- phys_addr = PAGE_ALIGN(bus_addr);
+ phys_addr = PAGE_ALIGN(phys_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
@@ -599,7 +601,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
continue;
p2 = IOMMU_PTE_PAGE(p1[i]);
- for (j = 0; j < 512; ++i) {
+ for (j = 0; j < 512; ++j) {
if (!IOMMU_PTE_PRESENT(p2[j]))
continue;
p3 = IOMMU_PTE_PAGE(p2[j]);
@@ -762,8 +764,6 @@ static void set_device_domain(struct amd_iommu *iommu,
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
iommu_queue_inv_dev_entry(iommu, devid);
-
- iommu->need_sync = 1;
}
/*****************************************************************************
@@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev,
print_devid(_bdf, 1);
}
+ if (domain_for_device(_bdf) == NULL)
+ set_device_domain(*iommu, *domain, _bdf);
+
return 1;
}
@@ -908,7 +911,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
if (address >= dom->aperture_size)
return;
- WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+ WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
@@ -920,8 +923,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
/*
* This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is uses by all
- * mapping functions provided by this IOMMU driver.
+ * contiguous memory region into DMA address space. It is used by all
+ * mapping functions provided with this IOMMU driver.
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev,
@@ -981,7 +984,8 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_addr_t i, start;
unsigned int pages;
- if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+ if ((dma_addr == bad_dma_address) ||
+ (dma_addr + size > dma_dom->aperture_size))
return;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -1031,8 +1035,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
if (addr == bad_dma_address)
goto out;
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1060,8 +1063,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1127,8 +1129,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1173,8 +1174,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
s->dma_address = s->dma_length = 0;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1225,8 +1225,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
goto out;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1257,8 +1256,7 @@ static void free_coherent(struct device *dev, size_t size,
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 30ae270..c6cc228 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -427,6 +427,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
+ /* set head and tail to zero manually */
+ writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
return cmd_buf;
@@ -1074,7 +1078,8 @@ int __init amd_iommu_init(void)
goto free;
/* IOMMU rlookup table - find the IOMMU for a specific device */
- amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
+ amd_iommu_rlookup_table = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO,
get_order(rlookup_table_size));
if (amd_iommu_rlookup_table == NULL)
goto free;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f96..16f9487 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
}
}
-void enable_IR_x2apic(void)
+void __init enable_IR_x2apic(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58..7f05f44 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 i = 0;
if (cpu_family == CPU_HW_PSTATE) {
- rdmsr(MSR_PSTATE_STATUS, lo, hi);
- i = lo & HW_PSTATE_MASK;
- data->currpstate = i;
+ if (data->currpstate == HW_PSTATE_INVALID) {
+ /* read (initial) hw pstate if not yet set */
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+
+ /*
+ * a workaround for family 11h erratum 311 might cause
+ * an "out-of-range Pstate if the core is in Pstate-0
+ */
+ if (i >= data->numps)
+ data->currpstate = HW_PSTATE_0;
+ else
+ data->currpstate = i;
+ }
return 0;
}
do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
}
data->cpu = pol->cpu;
+ data->currpstate = HW_PSTATE_INVALID;
if (powernow_k8_cpu_init_acpi(data)) {
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfe..65cfb5d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
* http://www.gnu.org/licenses/gpl.html
*/
+
+enum pstate {
+ HW_PSTATE_INVALID = 0xff,
+ HW_PSTATE_0 = 0,
+ HW_PSTATE_1 = 1,
+ HW_PSTATE_2 = 2,
+ HW_PSTATE_3 = 3,
+ HW_PSTATE_4 = 4,
+ HW_PSTATE_5 = 5,
+ HW_PSTATE_6 = 6,
+ HW_PSTATE_7 = 7,
+};
+
struct powernow_k8_data {
unsigned int cpu;
@@ -23,7 +36,9 @@ struct powernow_k8_data {
u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid or pstate */
- u32 currvid, currfid, currpstate;
+ u32 currvid;
+ u32 currfid;
+ enum pstate currpstate;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 4b031a4..1c83803 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
{
- static cpumask_t mce_cpus = CPU_MASK_NONE;
-
mce_cpu_quirks(c);
if (mce_dont_init ||
- cpu_test_and_set(smp_processor_id(), mce_cpus) ||
!mce_available(c))
return;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d1a1214..a2d1176 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -21,8 +21,6 @@
*/
-#ifdef CONFIG_X86_DS
-
#include <asm/ds.h>
#include <linux/errno.h>
@@ -211,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context);
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
struct ds_context *context;
+ unsigned long irq;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
context = (task ? task->thread.ds_ctx : this_system_context);
if (context)
context->count++;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
return context;
}
@@ -226,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
/*
* Same as ds_get_context, but allocates the context and it's DS
* structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
*/
static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
struct ds_context *context = *p_context;
+ unsigned long irq;
if (!context) {
- spin_unlock(&ds_lock);
-
context = kzalloc(sizeof(*context), GFP_KERNEL);
-
- if (!context) {
- spin_lock(&ds_lock);
+ if (!context)
return NULL;
- }
context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
if (!context->ds) {
kfree(context);
- spin_lock(&ds_lock);
return NULL;
}
- spin_lock(&ds_lock);
- /*
- * Check for race - another CPU could have allocated
- * it meanwhile:
- */
+ spin_lock_irqsave(&ds_lock, irq);
+
if (*p_context) {
kfree(context->ds);
kfree(context);
- return *p_context;
- }
- *p_context = context;
+ context = *p_context;
+ } else {
+ *p_context = context;
- context->this = p_context;
- context->task = task;
+ context->this = p_context;
+ context->task = task;
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
-
- get_tracer(task);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA,
+ (unsigned long)context->ds);
+ }
+ spin_unlock_irqrestore(&ds_lock, irq);
}
context->count++;
@@ -288,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
*/
static inline void ds_put_context(struct ds_context *context)
{
+ unsigned long irq;
+
if (!context)
return;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
if (--context->count)
goto out;
@@ -313,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context)
kfree(context->ds);
kfree(context);
out:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
}
@@ -384,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
struct ds_context *context;
unsigned long buffer, adj;
const unsigned long alignment = (1 << 3);
+ unsigned long irq;
int error = 0;
if (!ds_cfg.sizeof_ds)
@@ -398,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
return -EOPNOTSUPP;
- spin_lock(&ds_lock);
-
- error = -ENOMEM;
context = ds_alloc_context(task);
if (!context)
- goto out_unlock;
+ return -ENOMEM;
+
+ spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
+ get_tracer(task);
+
error = -EALREADY;
if (context->owner[qual] == current)
- goto out_unlock;
+ goto out_put_tracer;
error = -EPERM;
if (context->owner[qual] != NULL)
- goto out_unlock;
+ goto out_put_tracer;
context->owner[qual] = current;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
error = -ENOMEM;
@@ -465,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
out_release:
context->owner[qual] = NULL;
ds_put_context(context);
+ put_tracer(task);
+ return error;
+
+ out_put_tracer:
+ spin_unlock_irqrestore(&ds_lock, irq);
+ ds_put_context(context);
+ put_tracer(task);
return error;
out_unlock:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
return error;
}
@@ -818,13 +819,21 @@ static const struct ds_configuration ds_cfg_var = {
.sizeof_ds = sizeof(long) * 12,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10
+#else
+ .sizeof_rec[ds_pebs] = sizeof(long) * 18
+#endif
};
static const struct ds_configuration ds_cfg_64 = {
.sizeof_ds = 8 * 12,
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = 8 * 10
+#else
+ .sizeof_rec[ds_pebs] = 8 * 18
+#endif
};
static inline void
@@ -878,4 +887,3 @@ void ds_free(struct ds_context *context)
while (leftovers--)
ds_put_context(context);
}
-#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608..b0f61f0 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts();
}
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
{
if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index c9513e1..a74887b 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,94 +108,253 @@ static int __init parse_noapic(char *str)
early_param("noapic", parse_noapic);
struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+ struct irq_pin_list *pin;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+ return pin;
+}
+
struct irq_cfg {
- unsigned int irq;
struct irq_pin_list *irq_2_pin;
cpumask_t domain;
cpumask_t old_domain;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ u8 move_desc_pending : 1;
+#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
static struct irq_cfg irq_cfgx[NR_IRQS] = {
- [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#endif
+ [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
+ [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
+ [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
+ [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
+ [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
+ [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
+ [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
+ [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
+ [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
+ [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
+ [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+ [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+ [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+ [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+ [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+ [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
};
-#define for_each_irq_cfg(irq, cfg) \
- for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+void __init arch_early_irq_init(void)
+{
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ int count;
+ int i;
+
+ cfg = irq_cfgx;
+ count = ARRAY_SIZE(irq_cfgx);
+ for (i = 0; i < count; i++) {
+ desc = irq_to_desc(i);
+ desc->chip_data = &cfg[i];
+ }
+}
+
+#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ struct irq_cfg *cfg = NULL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ if (desc)
+ cfg = desc->chip_data;
+
+ return cfg;
}
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
{
- return irq_cfg(irq);
+ struct irq_cfg *cfg;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
+
+ return cfg;
}
-/*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+ struct irq_cfg *cfg;
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+ cfg = desc->chip_data;
+ if (!cfg) {
+ desc->chip_data = get_one_free_irq_cfg(cpu);
+ if (!desc->chip_data) {
+ printk(KERN_ERR "can not alloc irq_cfg\n");
+ BUG_ON(1);
+ }
+ }
+}
-struct irq_pin_list {
- int apic, pin;
- struct irq_pin_list *next;
-};
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+ struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+ cfg->irq_2_pin = NULL;
+ old_entry = old_cfg->irq_2_pin;
+ if (!old_entry)
+ return;
+
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry)
+ return;
+
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ head = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ while (old_entry) {
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ entry = head;
+ while (entry) {
+ head = entry->next;
+ kfree(entry);
+ entry = head;
+ }
+ /* still use the old one */
+ return;
+ }
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ tail->next = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ }
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
-static struct irq_pin_list *irq_2_pin_ptr;
+ tail->next = NULL;
+ cfg->irq_2_pin = head;
+}
-static void __init irq_2_pin_init(void)
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
{
- struct irq_pin_list *pin = irq_2_pin_head;
- int i;
+ struct irq_pin_list *entry, *next;
+
+ if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+ return;
- for (i = 1; i < PIN_MAP_SIZE; i++)
- pin[i-1].next = &pin[i];
+ entry = old_cfg->irq_2_pin;
- irq_2_pin_ptr = &pin[0];
+ while (entry) {
+ next = entry->next;
+ kfree(entry);
+ entry = next;
+ }
+ old_cfg->irq_2_pin = NULL;
}
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
{
- struct irq_pin_list *pin = irq_2_pin_ptr;
+ struct irq_cfg *cfg;
+ struct irq_cfg *old_cfg;
- if (!pin)
- panic("can not get more irq_2_pin\n");
+ cfg = get_one_free_irq_cfg(cpu);
- irq_2_pin_ptr = pin->next;
- pin->next = NULL;
- return pin;
+ if (!cfg)
+ return;
+
+ desc->chip_data = cfg;
+
+ old_cfg = old_desc->chip_data;
+
+ memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+ init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+ kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ struct irq_cfg *old_cfg, *cfg;
+
+ old_cfg = old_desc->chip_data;
+ cfg = desc->chip_data;
+
+ if (old_cfg == cfg)
+ return;
+
+ if (old_cfg) {
+ free_irq_2_pin(old_cfg, cfg);
+ free_irq_cfg(old_cfg);
+ old_desc->chip_data = NULL;
+ }
}
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+ struct irq_cfg *cfg = desc->chip_data;
+
+ if (!cfg->move_in_progress) {
+ /* it means that domain is not changed */
+ if (!cpus_intersects(desc->affinity, mask))
+ cfg->move_desc_pending = 1;
+ }
+}
+#endif
+
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+#endif
+
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+}
+#endif
+
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -237,11 +396,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
writel(value, &io_apic->data);
}
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
- struct irq_cfg *cfg = irq_cfg(irq);
spin_lock_irqsave(&ioapic_lock, flags);
entry = cfg->irq_2_pin;
@@ -323,13 +481,12 @@ static void ioapic_mask_entry(int apic, int pin)
}
#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
{
int apic, pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
- cfg = irq_cfg(irq);
entry = cfg->irq_2_pin;
for (;;) {
unsigned int reg;
@@ -359,24 +516,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
}
}
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
+ unsigned int irq;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
- cfg = irq_cfg(irq);
- if (assign_irq_vector(irq, mask))
+ irq = desc->irq;
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
/*
@@ -384,12 +544,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
*/
dest = SET_APIC_LOGICAL_ID(dest);
- desc = irq_to_desc(irq);
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
+ __target_IO_APIC_irq(irq, dest, cfg);
desc->affinity = mask;
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ set_ioapic_affinity_irq_desc(desc, mask);
+}
#endif /* CONFIG_SMP */
/*
@@ -397,16 +565,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
{
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- /* first time to refer irq_cfg, so with new */
- cfg = irq_cfg_alloc(irq);
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin();
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+ apic, pin);
+ return;
+ }
cfg->irq_2_pin = entry;
entry->apic = apic;
entry->pin = pin;
@@ -421,7 +591,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin();
+ entry->next = get_one_free_irq_2_pin(cpu);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
@@ -430,11 +600,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_cfg *cfg = irq_cfg(irq);
struct irq_pin_list *entry = cfg->irq_2_pin;
int replaced = 0;
@@ -451,18 +620,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq(irq, newapic, newpin);
+ add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
}
-static inline void io_apic_modify_irq(unsigned int irq,
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
int pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- cfg = irq_cfg(irq);
for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
unsigned int reg;
pin = entry->pin;
@@ -475,9 +642,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
}
}
-static void __unmask_IO_APIC_irq(unsigned int irq)
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
#ifdef CONFIG_X86_64
@@ -492,47 +659,64 @@ void io_apic_sync(struct irq_pin_list *entry)
readl(&io_apic->data);
}
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
}
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
IO_APIC_REDIR_MASKED, NULL);
}
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
#endif /* CONFIG_X86_32 */
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
+ BUG_ON(!cfg);
+
spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
+ __mask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ unmask_IO_APIC_irq_desc(desc);
+}
+
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
@@ -809,7 +993,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -1034,7 +1218,7 @@ void unlock_vector_lock(void)
spin_unlock(&vector_lock);
}
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -1050,16 +1234,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
unsigned int old_vector;
int cpu;
- struct irq_cfg *cfg;
- cfg = irq_cfg(irq);
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ return -EBUSY;
/* Only try and allocate irqs on cpus that are present */
cpus_and(mask, mask, cpu_online_map);
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
- return -EBUSY;
-
old_vector = cfg->vector;
if (old_vector) {
cpumask_t tmp;
@@ -1113,24 +1294,22 @@ next:
return -ENOSPC;
}
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
{
int err;
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, mask);
+ err = __assign_irq_vector(irq, cfg, mask);
spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
{
- struct irq_cfg *cfg;
cpumask_t mask;
int cpu, vector;
- cfg = irq_cfg(irq);
BUG_ON(!cfg->vector);
vector = cfg->vector;
@@ -1162,9 +1341,13 @@ void __setup_vector_irq(int cpu)
/* This function must be called with vector_lock held */
int irq, vector;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
/* Mark the inuse vectors */
- for_each_irq_cfg(irq, cfg) {
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
if (!cpu_isset(cpu, cfg->domain))
continue;
vector = cfg->vector;
@@ -1215,11 +1398,8 @@ static inline int IO_APIC_irq_trigger(int irq)
}
#endif
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
@@ -1311,7 +1491,7 @@ static int setup_ioapic_entry(int apic, int irq,
return 0;
}
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
int trigger, int polarity)
{
struct irq_cfg *cfg;
@@ -1321,10 +1501,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
if (!IO_APIC_IRQ(irq))
return;
- cfg = irq_cfg(irq);
+ cfg = desc->chip_data;
mask = TARGET_CPUS;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, cfg, mask))
return;
cpus_and(mask, cfg->domain, mask);
@@ -1341,12 +1521,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic].mp_apicid, pin);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
return;
}
- ioapic_register_intr(irq, trigger);
- if (irq < 16)
+ ioapic_register_intr(irq, desc, trigger);
+ if (irq < NR_IRQS_LEGACY)
disable_8259A_irq(irq);
ioapic_write_entry(apic, pin, entry);
@@ -1356,6 +1536,9 @@ static void __init setup_IO_APIC_irqs(void)
{
int apic, pin, idx, irq;
int notcon = 0;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1387,9 +1570,15 @@ static void __init setup_IO_APIC_irqs(void)
if (multi_timer_check(apic, irq))
continue;
#endif
- add_pin_to_irq(irq, apic, pin);
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
+ }
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, apic, pin);
- setup_IO_APIC_irq(apic, pin, irq,
+ setup_IO_APIC_irq(apic, pin, irq, desc,
irq_trigger(idx), irq_polarity(idx));
}
}
@@ -1448,6 +1637,7 @@ __apicdebuginit(void) print_IO_APIC(void)
union IO_APIC_reg_03 reg_03;
unsigned long flags;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
unsigned int irq;
if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1727,13 @@ __apicdebuginit(void) print_IO_APIC(void)
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for_each_irq_cfg(irq, cfg) {
- struct irq_pin_list *entry = cfg->irq_2_pin;
+ for_each_irq_desc(irq, desc) {
+ struct irq_pin_list *entry;
+
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
+ entry = cfg->irq_2_pin;
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,14 +2217,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
unsigned long flags;
+ struct irq_cfg *cfg;
spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
}
- __unmask_IO_APIC_irq(irq);
+ cfg = irq_cfg(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
@@ -2092,35 +2289,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
*/
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
cpumask_t tmp, cleanup_mask;
struct irte irte;
int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
+ unsigned int irq;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
+ irq = desc->irq;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
- desc = irq_to_desc(irq);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
+ __target_IO_APIC_irq(irq, dest, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -2142,14 +2341,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
desc->affinity = mask;
}
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
{
int ret = -1;
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
- if (io_apic_level_ack_pending(irq)) {
+ if (io_apic_level_ack_pending(cfg)) {
/*
* Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
@@ -2161,14 +2360,15 @@ static int migrate_irq_remapped_level(int irq)
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq(irq, desc->pending_mask);
+ migrate_ioapic_irq_desc(desc, desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
cpus_clear(desc->pending_mask);
unmask:
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
+
return ret;
}
@@ -2178,6 +2378,9 @@ static void ir_irq_migration(struct work_struct *work)
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
@@ -2198,18 +2401,22 @@ static void ir_irq_migration(struct work_struct *work)
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
desc->pending_mask = mask;
- migrate_irq_remapped_level(irq);
+ migrate_irq_remapped_level_desc(desc);
return;
}
- migrate_ioapic_irq(irq, mask);
+ migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#endif
@@ -2229,6 +2436,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
struct irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
+ if (irq == -1)
+ continue;
+
desc = irq_to_desc(irq);
if (!desc)
continue;
@@ -2250,19 +2460,40 @@ unlock:
irq_exit();
}
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_desc *desc = *descp;
+ struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress))
+ if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ if (likely(!cfg->move_desc_pending))
+ return;
+
+ /* domain has not changed, but affinity did */
+ me = smp_processor_id();
+ if (cpu_isset(me, desc->affinity)) {
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+ cfg->move_desc_pending = 0;
+ }
+#endif
return;
+ }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+#endif
+
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
@@ -2270,8 +2501,9 @@ static void irq_complete_move(unsigned int irq)
}
}
#else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
+
#ifdef CONFIG_INTR_REMAP
static void ack_x2apic_level(unsigned int irq)
{
@@ -2282,11 +2514,14 @@ static void ack_x2apic_edge(unsigned int irq)
{
ack_x2APIC_irq();
}
+
#endif
static void ack_apic_edge(unsigned int irq)
{
- irq_complete_move(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ irq_complete_move(&desc);
move_native_irq(irq);
ack_APIC_irq();
}
@@ -2295,18 +2530,21 @@ atomic_t irq_mis_count;
static void ack_apic_level(unsigned int irq)
{
+ struct irq_desc *desc = irq_to_desc(irq);
+
#ifdef CONFIG_X86_32
unsigned long v;
int i;
#endif
+ struct irq_cfg *cfg;
int do_unmask_irq = 0;
- irq_complete_move(irq);
+ irq_complete_move(&desc);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
}
#endif
@@ -2330,7 +2568,8 @@ static void ack_apic_level(unsigned int irq)
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
- i = irq_cfg(irq)->vector;
+ cfg = desc->chip_data;
+ i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
#endif
@@ -2369,17 +2608,18 @@ static void ack_apic_level(unsigned int irq)
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- if (!io_apic_level_ack_pending(irq))
+ cfg = desc->chip_data;
+ if (!io_apic_level_ack_pending(cfg))
move_masked_irq(irq);
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
}
#ifdef CONFIG_X86_32
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ __mask_and_edge_IO_APIC_irq(cfg);
+ __unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
#endif
@@ -2430,20 +2670,22 @@ static inline void init_IO_APIC_traps(void)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for_each_irq_cfg(irq, cfg) {
- if (IO_APIC_IRQ(irq) && !cfg->vector) {
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
+ cfg = desc->chip_data;
+ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < 16)
+ if (irq < NR_IRQS_LEGACY)
make_8259A_irq(irq);
- else {
- desc = irq_to_desc(irq);
+ else
/* Strange. Oh, well.. */
desc->chip = &no_irq_chip;
- }
}
}
}
@@ -2468,7 +2710,7 @@ static void unmask_lapic_irq(unsigned int irq)
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
{
ack_APIC_irq();
}
@@ -2480,11 +2722,8 @@ static struct irq_chip lapic_chip __read_mostly = {
.ack = ack_lapic_irq,
};
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
@@ -2588,7 +2827,9 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_cfg(0);
+ struct irq_desc *desc = irq_to_desc(0);
+ struct irq_cfg *cfg = desc->chip_data;
+ int cpu = boot_cpu_id;
int apic1, pin1, apic2, pin2;
unsigned long flags;
unsigned int ver;
@@ -2603,7 +2844,7 @@ static inline void __init check_timer(void)
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- assign_irq_vector(0, TARGET_CPUS);
+ assign_irq_vector(0, cfg, TARGET_CPUS);
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2654,10 +2895,10 @@ static inline void __init check_timer(void)
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq(0, apic1, pin1);
+ add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
}
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
@@ -2683,9 +2924,9 @@ static inline void __init check_timer(void)
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
enable_8259A_irq(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2717,7 +2958,7 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0);
+ lapic_register_intr(0, desc);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
@@ -2902,22 +3143,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
unsigned int irq;
unsigned int new;
unsigned long flags;
- struct irq_cfg *cfg_new;
-
- irq_want = nr_irqs - 1;
+ struct irq_cfg *cfg_new = NULL;
+ int cpu = boot_cpu_id;
+ struct irq_desc *desc_new = NULL;
irq = 0;
spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new > 0; new--) {
+ for (new = irq_want; new < NR_IRQS; new++) {
if (platform_legacy_irq(new))
continue;
- cfg_new = irq_cfg(new);
- if (cfg_new && cfg_new->vector != 0)
+
+ desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ if (!desc_new) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
- /* check if need to create one */
- if (!cfg_new)
- cfg_new = irq_cfg_alloc(new);
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+ }
+ cfg_new = desc_new->chip_data;
+
+ if (cfg_new->vector != 0)
+ continue;
+ if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
irq = new;
break;
}
@@ -2925,15 +3170,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
if (irq > 0) {
dynamic_irq_init(irq);
+ /* restore it, in case dynamic_irq_init clear it */
+ if (desc_new)
+ desc_new->chip_data = cfg_new;
}
return irq;
}
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
int create_irq(void)
{
+ unsigned int irq_want;
int irq;
- irq = create_irq_nr(nr_irqs - 1);
+ irq_want = nr_irqs_gsi;
+ irq = create_irq_nr(irq_want);
if (irq == 0)
irq = -1;
@@ -2944,14 +3195,22 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ /* store it, in case dynamic_irq_cleanup clear it */
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
+ /* connect back irq_cfg */
+ if (desc)
+ desc->chip_data = cfg;
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags);
}
@@ -2966,12 +3225,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
unsigned dest;
cpumask_t tmp;
+ cfg = irq_cfg(irq);
tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ err = assign_irq_vector(irq, cfg, tmp);
if (err)
return err;
- cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
@@ -3029,35 +3288,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
#ifdef CONFIG_SMP
static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
- read_msi_msg(irq, &msg);
+ read_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg(irq, &msg);
- desc = irq_to_desc(irq);
+ write_msi_msg_desc(desc, &msg);
desc->affinity = mask;
}
-
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
@@ -3065,11 +3324,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
*/
static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp, cleanup_mask;
struct irte irte;
- struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -3078,10 +3337,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -3105,9 +3366,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
cfg->move_in_progress = 0;
}
- desc = irq_to_desc(irq);
desc->affinity = mask;
}
+
#endif
#endif /* CONFIG_SMP */
@@ -3166,7 +3427,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
}
#endif
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
int ret;
struct msi_msg msg;
@@ -3175,7 +3436,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
if (ret < 0)
return ret;
- set_irq_msi(irq, desc);
+ set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
@@ -3195,26 +3456,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
return 0;
}
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
- unsigned int irq;
-
- irq = dev->bus->number;
- irq <<= 8;
- irq |= dev->devfn;
- irq <<= 12;
-
- return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
{
unsigned int irq;
int ret;
unsigned int irq_want;
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+ irq_want = nr_irqs_gsi;
irq = create_irq_nr(irq_want);
if (irq == 0)
return -1;
@@ -3228,7 +3476,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
goto error;
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
@@ -3246,7 +3494,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
unsigned int irq;
int ret, sub_handle;
- struct msi_desc *desc;
+ struct msi_desc *msidesc;
unsigned int irq_want;
#ifdef CONFIG_INTR_REMAP
@@ -3254,10 +3502,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int index = 0;
#endif
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
+ irq_want = nr_irqs_gsi;
sub_handle = 0;
- list_for_each_entry(desc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want--);
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = create_irq_nr(irq_want);
+ irq_want++;
if (irq == 0)
return -1;
#ifdef CONFIG_INTR_REMAP
@@ -3289,7 +3538,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0)
goto error;
sub_handle++;
@@ -3310,20 +3559,22 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_SMP
static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -3335,9 +3586,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
desc->affinity = mask;
}
+
#endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = {
@@ -3371,8 +3622,8 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_SMP
static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
- struct irq_desc *desc;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
@@ -3381,10 +3632,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
if (cpus_empty(tmp))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -3396,9 +3649,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
desc->affinity = mask;
}
+
#endif /* CONFIG_SMP */
struct irq_chip hpet_msi_type = {
@@ -3453,26 +3706,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
target_ht_irq(irq, dest, cfg->vector);
- desc = irq_to_desc(irq);
desc->affinity = mask;
}
+
#endif
static struct irq_chip ht_irq_chip = {
@@ -3492,13 +3747,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
int err;
cpumask_t tmp;
+ cfg = irq_cfg(irq);
tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ err = assign_irq_vector(irq, cfg, tmp);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
- cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
@@ -3544,7 +3799,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long flags;
int err;
- err = assign_irq_vector(irq, *eligible_cpu);
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, *eligible_cpu);
if (err != 0)
return err;
@@ -3553,8 +3810,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
irq_name);
spin_unlock_irqrestore(&vector_lock, flags);
- cfg = irq_cfg(irq);
-
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3606,29 +3861,16 @@ int __init io_apic_get_redir_entries (int ioapic)
return reg_01.bits.entries;
}
-int __init probe_nr_irqs(void)
+void __init probe_nr_irqs_gsi(void)
{
int idx;
int nr = 0;
-#ifndef CONFIG_XEN
- int nr_min = 32;
-#else
- int nr_min = NR_IRQS;
-#endif
for (idx = 0; idx < nr_ioapics; idx++)
nr += io_apic_get_redir_entries(idx) + 1;
- /* double it for hotplug and msi and nmi */
- nr <<= 1;
-
- /* something wrong ? */
- if (nr < nr_min)
- nr = nr_min;
- if (WARN_ON(nr > NR_IRQS))
- nr = NR_IRQS;
-
- return nr;
+ if (nr > nr_irqs_gsi)
+ nr_irqs_gsi = nr;
}
/* --------------------------------------------------------------------------
@@ -3727,19 +3969,31 @@ int __init io_apic_get_version(int ioapic)
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
+
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
ioapic);
return -EINVAL;
}
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+ }
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
return 0;
}
@@ -3775,7 +4029,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
+ struct irq_desc *desc;
struct irq_cfg *cfg;
+ cpumask_t mask;
if (skip_ioapic_setup == 1)
return;
@@ -3791,17 +4047,31 @@ void __init setup_ioapic_dest(void)
* when you have too many devices, because at that time only boot
* cpu is online.
*/
- cfg = irq_cfg(irq);
- if (!cfg->vector)
- setup_IO_APIC_irq(ioapic, pin, irq,
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
+ if (!cfg->vector) {
+ setup_IO_APIC_irq(ioapic, pin, irq, desc,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+ continue;
+
+ }
+
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = TARGET_CPUS;
+
#ifdef CONFIG_INTR_REMAP
- else if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
else
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+ set_ioapic_affinity_irq_desc(desc, mask);
}
}
@@ -3850,7 +4120,6 @@ void __init ioapic_init_mappings(void)
struct resource *ioapic_res;
int i;
- irq_2_pin_init();
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc5..3f1d9d1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
}
desc = irq_to_desc(i);
+ if (!desc)
+ return 0;
+
spin_lock_irqsave(&desc->lock, flags);
#ifndef CONFIG_SMP
any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a513826..119fc9c 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
for_each_irq_desc(irq, desc) {
cpumask_t mask;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84e..900009c 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
int break_affinity = 0;
int set_affinity = 1;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa98..6a92f47 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
/*
* 16 old-style INTA-cycle interrupts:
*/
- for (i = 0; i < 16; i++) {
- /* first time call this irq_desc */
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff02353..40c1e62 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,8 +142,7 @@ void __init init_ISA_irqs(void)
init_bsp_APIC();
init_8259A(0);
- for (i = 0; i < 16; i++) {
- /* first time call this irq_desc */
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1c9cc43..e169ae9 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static void __devinit kvm_setup_secondary_clock(void)
+static void __cpuinit kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 82fb280..c4b5b24 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = {
.name = "microcode",
};
-static void microcode_fini_cpu(int cpu)
+static void __microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- mutex_lock(&microcode_mutex);
microcode_ops->microcode_fini_cpu(cpu);
uci->valid = 0;
+}
+
+static void microcode_fini_cpu(int cpu)
+{
+ mutex_lock(&microcode_mutex);
+ __microcode_fini_cpu(cpu);
mutex_unlock(&microcode_mutex);
}
@@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu)
* to this cpu (a bit of paranoia):
*/
if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
- microcode_fini_cpu(cpu);
+ __microcode_fini_cpu(cpu);
+ printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
+ cpu);
return -1;
}
- if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
- microcode_fini_cpu(cpu);
+ if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
+ __microcode_fini_cpu(cpu);
+ printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
+ cpu);
/* Should we look for a new ucode here? */
return 1;
}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 622dc4a..a8e6279 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock);
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
+ unsigned long flags;
unsigned int val[2];
memset(csig, 0, sizeof(*csig));
@@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
csig->pf = 1 << ((val[1] >> 18) & 7);
}
+ /* serialize access to the physical write to MSR 0x79 */
+ spin_lock_irqsave(&microcode_update_lock, flags);
+
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
+ spin_unlock_irqrestore(&microcode_update_lock, flags);
+
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
csig->sig, csig->pf, csig->rev);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1..0f4c1fd 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early)
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
"configuration information\n");
+ if (!mpf)
+ return;
+
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f198..95777b0 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
#include <asm/paravirt.h>
-static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static inline void
+default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_lock(lock);
}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d..d28bbdc 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtol(p, &endp, 0);
+ bridge = simple_strtoul(p, &endp, 0);
if (p == endp)
break;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b..a35eaa3 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
iommu_area_free(iommu_gart_bitmap, offset, size);
+ if (offset >= next_bit)
+ next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -743,10 +745,8 @@ void __init gart_iommu_init(void)
unsigned long scratch;
long i;
- if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
- printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
+ if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
return;
- }
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f..5e028e1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
+ /* VMI may relocate the fixmap; do this before touching ioremap area */
+ vmi_init();
+
early_cpu_init();
early_ioremap_init();
@@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p)
check_efer();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be before kernel pagetables are setup
- * or fixmap area is touched.
- */
- vmi_init();
-#endif
+ /* Must be before kernel pagetables are setup */
+ vmi_activate();
/* after early param, so could get panic from serial */
reserve_early_setup_data();
@@ -1082,7 +1080,7 @@ void __init setup_arch(char **cmdline_p)
ioapic_init_mappings();
/* need to wait for io_apic is mapped */
- nr_irqs = probe_nr_irqs();
+ probe_nr_irqs_gsi();
kvm_guest_init();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b10933..f71f96f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -294,9 +294,7 @@ static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
-#ifdef CONFIG_VMI
vmi_bringup();
-#endif
cpu_init();
preempt_disable();
smp_callin();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393..22fd657 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -960,8 +960,6 @@ static inline int __init activate_vmi(void)
void __init vmi_init(void)
{
- unsigned long flags;
-
if (!vmi_rom)
probe_vmi_rom();
else
@@ -973,13 +971,21 @@ void __init vmi_init(void)
reserve_top_address(-vmi_rom->virtual_top);
- local_irq_save(flags);
- activate_vmi();
-
#ifdef CONFIG_X86_IO_APIC
/* This is virtual hardware; timer routing is wired correctly */
no_timer_check = 1;
#endif
+}
+
+void vmi_activate(void)
+{
+ unsigned long flags;
+
+ if (!vmi_rom)
+ return;
+
+ local_irq_save(flags);
+ activate_vmi();
local_irq_restore(flags & X86_EFLAGS_IF);
}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb7..15c3e69 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
/*
* Enable and initialize the xsave feature.
*/
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f1983d9..410ddbc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
}
rmap_write_protect(vcpu->kvm, sp->gfn);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
}
kvm_mmu_flush_tlb(vcpu);
- kvm_unlink_unsync_page(vcpu->kvm, sp);
return 0;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9a..84eee43 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
kvm_release_pfn_clean(sw->pfn);
sw->sptep = NULL;
return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d06b4dc..a4018b0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (cpu_has_virtual_nmis()) {
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vmx_nmi_enabled(vcpu)) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vmx_nmi_enabled(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41..202864a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/pii";
break;
case 6 ... 8:
+ case 10 ... 11:
*cpu_type = "i386/piii";
break;
case 9:
+ case 13:
*cpu_type = "i386/p6_mobile";
break;
- case 10 ... 13:
- *cpu_type = "i386/p6";
- break;
case 14:
*cpu_type = "i386/core";
break;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a..e9f80c7 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
int i;
if (!reset_value) {
- reset_value = kmalloc(sizeof(unsigned) * num_counters,
+ reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
@@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
CTRL_READ(low, high, msrs, i);
@@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a80..2051dc9 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
- * have 4096 bytes. Even if the device is capable, that doesn't mean we can
- * access it. Maybe we don't have a way to generate extended config space
- * accesses. So check it
+ * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
+ * 4096 bytes configuration space for each function of their processor
+ * configuration space.
*/
-static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
{
dev->cfg_size = pci_cfg_space_size_ext(dev);
}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
/*
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6889360..636ef4c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
return flush;
}
+static int xen_pgd_walk(struct mm_struct *mm,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
+{
+ return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
xen_mc_batch();
- if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+ if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for flushing */
xen_mc_issue(0);
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
PT_PMD);
#endif
- xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+ __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da61..acd9b67 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
alternatives_smp_switch(0);
}
-static void xen_play_dead(void)
+static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc..9e1afae 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void);
-void __init xen_setup_vcpu_info_placement(void);
+void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
OpenPOWER on IntegriCloud