diff options
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r-- | arch/x86_64/kernel/entry.S | 18 | ||||
-rw-r--r-- | arch/x86_64/kernel/machine_kexec.c | 13 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 12 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce_amd.c | 19 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-calgary.c | 77 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-nommu.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-swiotlb.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/tce.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 18 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 24 |
11 files changed, 109 insertions, 85 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index d464dde..6f81042 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -513,6 +513,7 @@ END(stub_rt_sigreturn) swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count cmoveq %gs:pda_irqstackptr,%rsp + push %rbp # backlink for old unwinder /* * We entered an interrupt context - irqs are off: */ @@ -1139,18 +1140,21 @@ ENTRY(machine_check) END(machine_check) #endif +/* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC - movq %gs:pda_irqstackptr,%rax - movq %rsp,%rdx - CFI_DEF_CFA_REGISTER rdx + push %rbp + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rbp,0 + mov %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp incl %gs:pda_irqcount - cmove %rax,%rsp - pushq %rdx - /*todo CFI_DEF_CFA_EXPRESSION ...*/ + cmove %gs:pda_irqstackptr,%rsp + push %rbp # backlink for old unwinder call __do_softirq - popq %rsp + leaveq CFI_DEF_CFA_REGISTER rsp + CFI_ADJUST_CFA_OFFSET -8 decl %gs:pda_irqcount ret CFI_ENDPROC diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 83fb24a..106076b 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kimage *image) __flush_tlb(); - /* The segment registers are funny things, they are - * automatically loaded from a table, in memory wherever you - * set them to a specific selector, but this table is never - * accessed again unless you set the segment to a different selector. - * - * The more common model are caches where the behide - * the scenes work is done, but is also dropped at arbitrary - * times. + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is + * set to a specific selector, the invisible part is loaded + * with from a table in memory. At no other time is the + * descriptor table in memory accessed. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 8884567..4e017fb 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -615,7 +615,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static __cpuinit void mce_remove_device(unsigned int cpu) +static void mce_remove_device(unsigned int cpu) { int i; @@ -626,10 +626,9 @@ static __cpuinit void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); } -#endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int +static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -638,18 +637,17 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_ONLINE: mce_create_device(cpu); break; -#ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: mce_remove_device(cpu); break; -#endif } return NOTIFY_OK; } -static struct notifier_block __cpuinitdata mce_cpu_notifier = { +static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; +#endif static __init int mce_init_device(void) { @@ -664,7 +662,7 @@ static __init int mce_init_device(void) mce_create_device(i); } - register_cpu_notifier(&mce_cpu_notifier); + register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); return err; } diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index db2acbf..883fe74 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -558,7 +558,7 @@ out: * of shared sysfs dir/files, and rest of the cores will be symlinked to it. */ -static __cpuinit void deallocate_threshold_block(unsigned int cpu, +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; @@ -578,7 +578,7 @@ static __cpuinit void deallocate_threshold_block(unsigned int cpu, per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; } -static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) +static void threshold_remove_bank(unsigned int cpu, int bank) { int i = 0; struct threshold_bank *b; @@ -618,7 +618,7 @@ free_out: per_cpu(threshold_banks, cpu)[bank] = NULL; } -static __cpuinit void threshold_remove_device(unsigned int cpu) +static void threshold_remove_device(unsigned int cpu) { unsigned int bank; @@ -629,14 +629,8 @@ static __cpuinit void threshold_remove_device(unsigned int cpu) } } -#else /* !CONFIG_HOTPLUG_CPU */ -static void threshold_remove_device(unsigned int cpu) -{ -} -#endif - /* get notified when a cpu comes on/off */ -static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, +static int threshold_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ @@ -659,9 +653,10 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier __cpuinitdata = { +static struct notifier_block threshold_cpu_notifier = { .notifier_call = threshold_cpu_callback, }; +#endif /* CONFIG_HOTPLUG_CPU */ static __init int threshold_init_device(void) { @@ -673,7 +668,7 @@ static __init int threshold_init_device(void) if (err) return err; } - register_cpu_notifier(&threshold_cpu_notifier); + register_hotcpu_notifier(&threshold_cpu_notifier); return 0; } diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index e71ed53..146924b 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -85,7 +85,8 @@ #define CSR_AGENT_MASK 0xffe0ffff #define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ -#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */ +#define MAX_NUM_CHASSIS 8 /* max number of chassis */ +#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) /* max dev->bus->number */ #define PHBS_PER_CALGARY 4 /* register offsets in Calgary's internal register space */ @@ -110,7 +111,8 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; -void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES]; +static char bus_to_phb[MAX_PHB_BUS_NUM]; +void* tce_table_kva[MAX_PHB_BUS_NUM]; unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; @@ -119,7 +121,7 @@ static int calgary_detected __read_mostly = 0; * the bitmap of PHBs the user requested that we disable * translation on. */ -static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM); +static DECLARE_BITMAP(translation_disabled, MAX_PHB_BUS_NUM); static void tce_cache_blast(struct iommu_table *tbl); @@ -452,7 +454,7 @@ static struct dma_mapping_ops calgary_dma_ops = { static inline int busno_to_phbid(unsigned char num) { - return bus_to_phb(num) % PHBS_PER_CALGARY; + return bus_to_phb[num]; } static inline unsigned long split_queue_offset(unsigned char num) @@ -812,7 +814,7 @@ static int __init calgary_init(void) int i, ret = -ENODEV; struct pci_dev *dev = NULL; - for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) { + for (i = 0; i < MAX_PHB_BUS_NUM; i++) { dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, dev); @@ -822,7 +824,7 @@ static int __init calgary_init(void) calgary_init_one_nontraslated(dev); continue; } - if (!tce_table_kva[i] && !translate_empty_slots) { + if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) { pci_dev_put(dev); continue; } @@ -842,7 +844,7 @@ error: pci_dev_put(dev); continue; } - if (!tce_table_kva[i] && !translate_empty_slots) + if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) continue; calgary_disable_translation(dev); calgary_free_tar(dev); @@ -876,9 +878,10 @@ static inline int __init determine_tce_table_size(u64 ram) void __init detect_calgary(void) { u32 val; - int bus, table_idx; + int bus; void *tbl; - int detected = 0; + int calgary_found = 0; + int phb = -1; /* * if the user specified iommu=off or iommu=soft or we found @@ -889,38 +892,46 @@ void __init detect_calgary(void) specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); - for (bus = 0, table_idx = 0; - bus <= num_online_nodes() * MAX_PHB_BUS_NUM; - bus++) { - BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM); + for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { + int dev; + + tce_table_kva[bus] = NULL; + bus_to_phb[bus] = -1; + if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; + + /* + * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) + * it is connected to releative to the clagary chip. + */ + phb = (phb + 1) % PHBS_PER_CALGARY; + if (test_bit(bus, translation_disabled)) { printk(KERN_INFO "Calgary: translation is disabled for " "PHB 0x%x\n", bus); /* skip this phb, don't allocate a tbl for it */ - tce_table_kva[table_idx] = NULL; - table_idx++; continue; } /* - * scan the first slot of the PCI bus to see if there - * are any devices present + * Scan the slots of the PCI bus to see if there is a device present. + * The parent bus will be the zero-ith device, so start at 1. */ - val = read_pci_config(bus, 1, 0, 0); - if (val != 0xffffffff || translate_empty_slots) { - tbl = alloc_tce_table(); - if (!tbl) - goto cleanup; - detected = 1; - } else - tbl = NULL; - - tce_table_kva[table_idx] = tbl; - table_idx++; + for (dev = 1; dev < 8; dev++) { + val = read_pci_config(bus, dev, 0, 0); + if (val != 0xffffffff || translate_empty_slots) { + tbl = alloc_tce_table(); + if (!tbl) + goto cleanup; + tce_table_kva[bus] = tbl; + bus_to_phb[bus] = phb; + calgary_found = 1; + break; + } + } } - if (detected) { + if (calgary_found) { iommu_detected = 1; calgary_detected = 1; printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. " @@ -929,9 +940,9 @@ void __init detect_calgary(void) return; cleanup: - for (--table_idx; table_idx >= 0; --table_idx) - if (tce_table_kva[table_idx]) - free_tce_table(tce_table_kva[table_idx]); + for (--bus; bus >= 0; --bus) + if (tce_table_kva[bus]) + free_tce_table(tce_table_kva[bus]); } int __init calgary_iommu_init(void) @@ -1002,7 +1013,7 @@ static int __init calgary_parse_options(char *p) if (p == endp) break; - if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) { + if (bridge < MAX_PHB_BUS_NUM) { printk(KERN_INFO "Calgary: disabling " "translation for PHB 0x%x\n", bridge); set_bit(bridge, translation_disabled); diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index c4c3cc3..aad7609 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c @@ -92,5 +92,7 @@ void __init no_iommu_init(void) { if (dma_ops) return; + + force_iommu = 0; /* no HW IOMMU */ dma_ops = &nommu_dma_ops; } diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index ebdb77f..6a55f87 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c @@ -31,9 +31,10 @@ struct dma_mapping_ops swiotlb_dma_ops = { void pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ - if (!iommu_detected && !no_iommu && - (end_pfn > MAX_DMA32_PFN || force_iommu)) + if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN) swiotlb = 1; + if (swiotlb_force) + swiotlb = 1; if (swiotlb) { printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); swiotlb_init(); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 5a1c0a3..06af6ca 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -203,7 +203,7 @@ int __cpuinit init_smp_flush(void) { int i; for_each_cpu_mask(i, cpu_possible_map) { - spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i)); + spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); } return 0; } diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c index d3a9e79..5530dda 100644 --- a/arch/x86_64/kernel/tce.c +++ b/arch/x86_64/kernel/tce.c @@ -96,7 +96,6 @@ static inline unsigned int table_size_to_number_of_entries(unsigned char size) static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) { unsigned int bitmapsz; - unsigned int tce_table_index; unsigned long bmppages; int ret; @@ -105,8 +104,7 @@ static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) /* set the tce table size - measured in entries */ tbl->it_size = table_size_to_number_of_entries(specified_table_size); - tce_table_index = bus_to_phb(tbl->it_busno); - tbl->it_base = (unsigned long)tce_table_kva[tce_table_index]; + tbl->it_base = (unsigned long)tce_table_kva[dev->bus->number]; if (!tbl->it_base) { printk(KERN_ERR "Calgary: iommu_table_setparms: " "no table allocated?!\n"); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index b9ff759..7a9b182 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -28,6 +28,7 @@ #include <linux/acpi.h> #ifdef CONFIG_ACPI #include <acpi/achware.h> /* for PM timer frequency */ +#include <acpi/acpi_bus.h> #endif #include <asm/8253pit.h> #include <asm/pgtable.h> @@ -193,7 +194,7 @@ unsigned long profile_pc(struct pt_regs *regs) is just accounted to the spinlock function. Better would be to write these functions in assembler again and check exactly. */ - if (in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { char *v = *(char **)regs->rsp; if ((v >= _stext && v <= _etext) || (v >= _sinittext && v <= _einittext) || @@ -953,11 +954,18 @@ __cpuinit int unsynchronized_tsc(void) #ifdef CONFIG_SMP if (apic_is_clustered_box()) return 1; - /* Intel systems are normally all synchronized. Exceptions - are handled in the check above. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - return 0; #endif + /* Most intel systems have synchronized TSCs except for + multi node systems */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +#ifdef CONFIG_ACPI + /* But TSC doesn't tick in C3 so don't use it there */ + if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100) + return 1; +#endif + return 0; + } + /* Assume multi socket systems are not synchronized */ return num_present_cpus() > 1; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index eb39a27..4e9938d 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -254,7 +254,6 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s { const unsigned cpu = safe_smp_processor_id(); unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; - int i = 11; unsigned used = 0; printk("\nCall Trace:\n"); @@ -275,11 +274,20 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s if (unwind_init_blocked(&info, tsk) == 0) unw_ret = show_trace_unwind(&info, NULL); } - if (unw_ret > 0) { - if (call_trace > 0) + if (unw_ret > 0 && !arch_unw_user_mode(&info)) { +#ifdef CONFIG_STACK_UNWIND + unsigned long rip = info.regs.rip; + print_symbol("DWARF2 unwinder stuck at %s\n", rip); + if (call_trace == 1) { + printk("Leftover inexact backtrace:\n"); + stack = (unsigned long *)info.regs.rsp; + } else if (call_trace > 1) return; - printk("Legacy call trace:"); - i = 18; + else + printk("Full inexact backtrace again:\n"); +#else + printk("Inexact backtrace:\n"); +#endif } } @@ -521,7 +529,7 @@ void __kprobes oops_end(unsigned long flags) /* Nest count reaches zero, release the lock. */ spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) - panic("Oops"); + panic("Fatal exception: panic_on_oops"); } void __kprobes __die(const char * str, struct pt_regs * regs, long err) @@ -1118,8 +1126,10 @@ static int __init call_trace_setup(char *s) call_trace = -1; else if (strcmp(s, "both") == 0) call_trace = 0; - else if (strcmp(s, "new") == 0) + else if (strcmp(s, "newfallback") == 0) call_trace = 1; + else if (strcmp(s, "new") == 0) + call_trace = 2; return 1; } __setup("call_trace=", call_trace_setup); |