diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/domain.c | 78 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 32 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.h | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/fsys.S | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/ivt.S | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/jprobes.S | 61 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 601 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/minstate.h | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/module.c | 10 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 175 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 41 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/signal.c | 5 | ||||
-rw-r--r-- | arch/ia64/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/sys_ia64.c | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 62 | ||||
-rw-r--r-- | arch/ia64/kernel/uncached.c | 246 |
21 files changed, 1278 insertions, 89 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index c1a02bb..b2e2f65 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o # The gate DSO image is built using a special linker script. diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c index fe532c9..d65e87b 100644 --- a/arch/ia64/kernel/domain.c +++ b/arch/ia64/kernel/domain.c @@ -14,7 +14,7 @@ #include <linux/topology.h> #include <linux/nodemask.h> -#define SD_NODES_PER_DOMAIN 6 +#define SD_NODES_PER_DOMAIN 16 #ifdef CONFIG_NUMA /** @@ -27,7 +27,7 @@ * * Should use nodemask_t. */ -static int __devinit find_next_best_node(int node, unsigned long *used_nodes) +static int find_next_best_node(int node, unsigned long *used_nodes) { int i, n, val, min_val, best_node = 0; @@ -66,7 +66,7 @@ static int __devinit find_next_best_node(int node, unsigned long *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static cpumask_t __devinit sched_domain_node_span(int node) +static cpumask_t sched_domain_node_span(int node) { int i; cpumask_t span, nodemask; @@ -96,7 +96,7 @@ static cpumask_t __devinit sched_domain_node_span(int node) #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; -static int __devinit cpu_to_cpu_group(int cpu) +static int cpu_to_cpu_group(int cpu) { return cpu; } @@ -104,7 +104,7 @@ static int __devinit cpu_to_cpu_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; -static int __devinit cpu_to_phys_group(int cpu) +static int cpu_to_phys_group(int cpu) { #ifdef CONFIG_SCHED_SMT return first_cpu(cpu_sibling_map[cpu]); @@ -125,44 +125,36 @@ static struct sched_group *sched_group_nodes[MAX_NUMNODES]; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); static struct sched_group sched_group_allnodes[MAX_NUMNODES]; -static int __devinit cpu_to_allnodes_group(int cpu) +static int cpu_to_allnodes_group(int cpu) { return cpu_to_node(cpu); } #endif /* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. + * Build sched domains for a given set of cpus and attach the sched domains + * to the individual cpus */ -void __devinit arch_init_sched_domains(void) +void build_sched_domains(const cpumask_t *cpu_map) { int i; - cpumask_t cpu_default_map; /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. + * Set up domains for cpus specified by the cpu_map. */ - cpus_complement(cpu_default_map, cpu_isolated_map); - cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); - - /* - * Set up domains. Isolated domains just stay on the dummy domain. - */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int group; struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA if (num_online_cpus() > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { sd = &per_cpu(allnodes_domains, i); *sd = SD_ALLNODES_INIT; - sd->span = cpu_default_map; + sd->span = *cpu_map; group = cpu_to_allnodes_group(i); sd->groups = &sched_group_allnodes[group]; p = sd; @@ -173,7 +165,7 @@ void __devinit arch_init_sched_domains(void) *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; @@ -190,7 +182,7 @@ void __devinit arch_init_sched_domains(void) group = cpu_to_cpu_group(i); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; sd->groups = &sched_group_cpus[group]; #endif @@ -198,9 +190,9 @@ void __devinit arch_init_sched_domains(void) #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) continue; @@ -213,7 +205,7 @@ void __devinit arch_init_sched_domains(void) for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; @@ -222,7 +214,7 @@ void __devinit arch_init_sched_domains(void) } #ifdef CONFIG_NUMA - init_sched_build_groups(sched_group_allnodes, cpu_default_map, + init_sched_build_groups(sched_group_allnodes, *cpu_map, &cpu_to_allnodes_group); for (i = 0; i < MAX_NUMNODES; i++) { @@ -233,12 +225,12 @@ void __devinit arch_init_sched_domains(void) cpumask_t covered = CPU_MASK_NONE; int j; - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, cpu_default_map); + cpus_and(domainspan, domainspan, *cpu_map); sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); sched_group_nodes[i] = sg; @@ -266,7 +258,7 @@ void __devinit arch_init_sched_domains(void) int n = (i + j) % MAX_NUMNODES; cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, cpu_default_map); + cpus_and(tmp, notcovered, *cpu_map); cpus_and(tmp, tmp, domainspan); if (cpus_empty(tmp)) break; @@ -293,7 +285,7 @@ void __devinit arch_init_sched_domains(void) #endif /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int power; struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT @@ -359,13 +351,35 @@ next_sg: cpu_attach_domain(sd, i); } } +/* + * Set up scheduler domains and groups. Callers must hold the hotplug lock. + */ +void arch_init_sched_domains(const cpumask_t *cpu_map) +{ + cpumask_t cpu_default_map; + + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); + + build_sched_domains(&cpu_default_map); +} -void __devinit arch_destroy_sched_domains(void) +void arch_destroy_sched_domains(const cpumask_t *cpu_map) { #ifdef CONFIG_NUMA int i; for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); struct sched_group *oldsg, *sg = sched_group_nodes[i]; + + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + if (sg == NULL) continue; sg = sg->next; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 4a3b1aa..179f230 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg) } /* + * Walk the EFI memory map to pull out leftover pages in the lower + * memory regions which do not end up in the regular memory map and + * stick them into the uncached allocator + * + * The regular walk function is significantly more complex than the + * uncached walk which means it really doesn't make sense to try and + * marge the two. + */ +void __init +efi_memmap_walk_uc (efi_freemem_callback_t callback) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size, start, end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute == EFI_MEMORY_UC) { + start = PAGE_ALIGN(md->phys_addr); + end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK); + if ((*callback)(start, end, NULL) < 0) + return; + } + } +} + + +/* * Look for the PAL_CODE region reported by EFI and maps it using an * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor * Abstraction Layer chapter 11 in ADAG diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 81c45d4..b1d5d3d 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1182,7 +1182,7 @@ ENTRY(notify_resume_user) ;; (pNonSys) mov out2=0 // out2==0 => not a syscall .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch .body @@ -1208,7 +1208,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend) adds out2=8,sp // out2=&sigscratch->ar_pfs ;; .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch .body @@ -1579,7 +1579,7 @@ sys_call_table: data8 sys_keyctl data8 sys_ni_syscall data8 sys_ni_syscall // 1275 - data8 sys_ni_syscall + data8 sys_set_zone_reclaim data8 sys_ni_syscall data8 sys_ni_syscall data8 sys_ni_syscall diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h index 6d4ecec..78eeb07 100644 --- a/arch/ia64/kernel/entry.h +++ b/arch/ia64/kernel/entry.h @@ -60,7 +60,7 @@ .spillsp @priunat,SW(AR_UNAT)+16+(off); \ .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ - .spillsp pr,SW(PR)+16+(off)) + .spillsp pr,SW(PR)+16+(off) #define DO_SAVE_SWITCH_STACK \ movl r28=1f; \ diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 4f3cdef..962b6c4 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set ;; st8 [r2]=r14 // update current->blocked with new mask - cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 + cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 ;; - cmp.ne p6,p0=r17,r14 // update failed? + cmp.ne p6,p0=r17,r8 // update failed? (p6) br.cond.spnt.few 1b // yes -> retry #ifdef CONFIG_SMP diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index d9c05d5..2bc085a 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -405,17 +405,22 @@ ENTRY(nested_dtlb_miss) * r30: continuation address * r31: saved pr * - * Clobbered: b0, r18, r19, r21, psr.dt (cleared) + * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) */ rsm psr.dt // switch to using physical data addressing mov r19=IA64_KR(PT_BASE) // get the page table base address shl r21=r16,3 // shift bit 60 into sign bit + mov r18=cr.itir ;; shr.u r17=r16,61 // get the region number into r17 + extr.u r18=r18,2,6 // get the faulting page size ;; cmp.eq p6,p7=5,r17 // is faulting address in region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address + add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address + add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 ;; + shr.u r22=r16,r22 + shr.u r18=r16,r18 (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place srlz.d @@ -428,7 +433,7 @@ ENTRY(nested_dtlb_miss) (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position + shr.u r18=r22,PMD_SHIFT // shift L2 index into position ;; ld8 r17=[r17] // fetch the L1 entry (may be 0) ;; @@ -436,7 +441,7 @@ ENTRY(nested_dtlb_miss) dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry ;; (p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + shr.u r19=r22,PAGE_SHIFT // shift L3 index into position ;; (p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S new file mode 100644 index 0000000..b7fa3cc --- /dev/null +++ b/arch/ia64/kernel/jprobes.S @@ -0,0 +1,61 @@ +/* + * Jprobe specific operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> initial implementation + * + * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a + * probe to be inserted into the beginning of a function call. The fundamental + * difference between a jprobe and a kprobe is the jprobe handler is executed + * in the same context as the target function, while the kprobe handlers + * are executed in interrupt context. + * + * For jprobes we initially gain control by placing a break point in the + * first instruction of the targeted function. When we catch that specific + * break, we: + * * set the return address to our jprobe_inst_return() function + * * jump to the jprobe handler function + * + * Since we fixed up the return address, the jprobe handler will return to our + * jprobe_inst_return() function, giving us control again. At this point we + * are back in the parents frame marker, so we do yet another call to our + * jprobe_break() function to fix up the frame marker as it would normally + * exist in the target function. + * + * Our jprobe_return function then transfers control back to kprobes.c by + * executing a break instruction using one of our reserved numbers. When we + * catch that break in kprobes.c, we continue like we do for a normal kprobe + * by single stepping the emulated instruction, and then returning execution + * to the correct location. + */ +#include <asm/asmmacro.h> + + /* + * void jprobe_break(void) + */ +ENTRY(jprobe_break) + break.m 0x80300 +END(jprobe_break) + + /* + * void jprobe_inst_return(void) + */ +GLOBAL_ENTRY(jprobe_inst_return) + br.call.sptk.many b0=jprobe_break +END(jprobe_inst_return) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c new file mode 100644 index 0000000..5978823 --- /dev/null +++ b/arch/ia64/kernel/kprobes.c @@ -0,0 +1,601 @@ +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adapted from i386 + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/preempt.h> +#include <linux/moduleloader.h> + +#include <asm/pgtable.h> +#include <asm/kdebug.h> + +extern void jprobe_inst_return(void); + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe, *kprobe_prev; +static unsigned long kprobe_status, kprobe_status_prev; +static struct pt_regs jprobe_saved_regs; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + { M, I, I }, /* 00 */ + { M, I, I }, /* 01 */ + { M, I, I }, /* 02 */ + { M, I, I }, /* 03 */ + { M, L, X }, /* 04 */ + { M, L, X }, /* 05 */ + { u, u, u }, /* 06 */ + { u, u, u }, /* 07 */ + { M, M, I }, /* 08 */ + { M, M, I }, /* 09 */ + { M, M, I }, /* 0A */ + { M, M, I }, /* 0B */ + { M, F, I }, /* 0C */ + { M, F, I }, /* 0D */ + { M, M, F }, /* 0E */ + { M, M, F }, /* 0F */ + { M, I, B }, /* 10 */ + { M, I, B }, /* 11 */ + { M, B, B }, /* 12 */ + { M, B, B }, /* 13 */ + { u, u, u }, /* 14 */ + { u, u, u }, /* 15 */ + { B, B, B }, /* 16 */ + { B, B, B }, /* 17 */ + { M, M, B }, /* 18 */ + { M, M, B }, /* 19 */ + { u, u, u }, /* 1A */ + { u, u, u }, /* 1B */ + { M, F, B }, /* 1C */ + { M, F, B }, /* 1D */ + { u, u, u }, /* 1E */ + { u, u, u }, /* 1F */ +}; + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns 0 if supported + * Returns -EINVAL if unsupported + */ +static int unsupported_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (bundle_encoding[template][slot] == I) { + switch (major_opcode) { + case 0x0: //I_UNIT_MISC_OPCODE: + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + } + return 0; +} + + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode, +unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integere compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integere compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void prepare_break_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->ainsn.insn.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction iff !is_cmp_ctype_unc_inst + * because for cmp instruction with ctype equal to unc, + * which is a special instruction always needs to be + * executed regradless of qp + */ + if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) + break_inst |= (0x3f & kprobe_inst); + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static inline void get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +static int valid_kprobe_addr(int template, int slot, unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n", + addr); + return -EINVAL; + } + return 0; +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; +} + +static inline void set_current_kprobe(struct kprobe *p) +{ + current_kprobe = p; +} + +int arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle = &p->ainsn.insn.bundle; + + memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); + memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + return -EINVAL; + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + + return 0; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + /* p->opcode contains the original unaltered bundle */ + memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->opcode.bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* Update instruction pointer (IIP) and slot number (IPSR.ri) */ + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + + preempt_disable(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + unlock_kprobes(); + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p); + p->nmissed++; + prepare_ss(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; + } else if (args->err == __IA64_BREAK_JPROBE) { + /* + * jprobe instrumented function just completed + */ + p = current_kprobe; + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } else { + /* Not our break */ + goto no_kprobe; + } + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + + if (p->pre_handler && p->pre_handler(p, regs)) + /* + * Our pre-handler is specifically requesting that we just + * do a return. This is handling the case where the + * pre-handler is really our special jprobe pre-handler. + */ + return 1; + +ss_probe: + prepare_ss(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int post_kprobes_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + current_kprobe->post_handler(current_kprobe, regs, 0); + } + + resume_execution(current_kprobe, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } + + unlock_kprobes(); + +out: + preempt_enable_no_resched(); + return 1; +} + +static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->fault_handler && + current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(current_kprobe, regs); + unlock_kprobes(); + preempt_enable_no_resched(); + } + + return 0; +} + +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + switch(val) { + case DIE_BREAK: + if (pre_kprobes_handler(args)) + return NOTIFY_STOP; + break; + case DIE_SS: + if (post_kprobes_handler(args->regs)) + return NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + if (kprobes_fault_handler(args->regs, args->trapnr)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + + /* save architectural state */ + jprobe_saved_regs = *regs; + + /* after rfi, execute the jprobe instrumented function */ + regs->cr_iip = addr & ~0xFULL; + ia64_psr(regs)->ri = addr & 0xf; + regs->r1 = ((struct fnptr *)(jp->entry))->gp; + + /* + * fix the return address to our jprobe_inst_return() function + * in the jprobes.S file + */ + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + + return 1; +} + +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + *regs = jprobe_saved_regs; + return 1; +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 4d6c7b8f..736e328 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) return IRQ_HANDLED; } -#endif /* CONFIG_ACPI */ - /* * ia64_mca_cpe_poll * @@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy) platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); } +#endif /* CONFIG_ACPI */ + /* * C portion of the OS INIT handler * @@ -1390,8 +1390,7 @@ ia64_mca_init(void) register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); #ifdef CONFIG_ACPI - /* Setup the CPEI/P vector and handler */ - cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + /* Setup the CPEI/P handler */ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif @@ -1436,6 +1435,7 @@ ia64_mca_late_init(void) #ifdef CONFIG_ACPI /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); init_timer(&cpe_poll_timer); cpe_poll_timer.function = ia64_mca_cpe_poll; diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 1dbc7b2..f6d8a01 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -41,7 +41,7 @@ (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ (pKStk) ld8 r3 = [r3];; \ (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ -(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ +(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ ;; \ @@ -50,7 +50,6 @@ (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ (pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ ;; \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ ;; \ (pUStk) mov r18=ar.bsp; \ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index febc091..f1aca7c 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * XXX Should have an arch-hook for running this after final section * addresses have been selected... */ - /* See if gp can cover the entire core module: */ - uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; - if (mod->core_size >= MAX_LTOFF) + uint64_t gp; + if (mod->core_size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size - MAX_LTOFF / 2; + else + gp = mod->core_size / 2; + gp = (uint64_t) mod->module_core + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 71c1016..6407bff 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,7 +11,7 @@ * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> * @@ -497,6 +497,9 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ +static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED; +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; @@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); /* forward declaration */ @@ -1325,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) error_conflict: DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[cpu]->pid, - smp_processor_id())); + cpu)); abort: UNLOCK_PFS(flags); @@ -5555,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) int ret; this_cpu = get_cpu(); - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - start_cycles = ia64_get_itc(); + start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(irq, arg, regs); - total_cycles = ia64_get_itc(); + total_cycles = ia64_get_itc(); - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } + } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); } + put_cpu_no_resched(); return IRQ_HANDLED; } @@ -6425,6 +6435,141 @@ static struct irqaction perfmon_irqaction = { .name = "perfmon" }; +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + /* * perfmon initialization routine, called from the initcall() table */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 907464e..6d57aeb 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task) { struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); + /* + * Prevent migrating this task while + * we're fiddling with the FPU state + */ + preempt_disable(); if (ia64_is_local_fpu_owner(task) && psr->mfh) { psr->mfh = 0; task->thread.flags |= IA64_THREAD_FPH_VALID; ia64_save_fpu(&task->thread.fph[0]); } + preempt_enable(); } /* @@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, unsigned long cfm) { struct unw_frame_info info, prev_info; - unsigned long ip, pr; + unsigned long ip, sp, pr; unw_init_from_blocked_task(&info, child); while (1) { prev_info = info; if (unw_unwind(&info) < 0) return; - if (unw_get_rp(&info, &ip) < 0) + + unw_get_sp(&info, &sp); + if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + dprintk("ptrace.%s: ran off the top of the kernel " + "stack\n", __FUNCTION__); return; - if (ip < FIXADDR_USER_END) + } + if (unw_get_pr (&prev_info, &pr) < 0) { + unw_get_rp(&prev_info, &ip); + dprintk("ptrace.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __FUNCTION__, ip); + return; + } + if (unw_is_intr_frame(&info) + && (pr & (1UL << PRED_USER_STACK))) break; } @@ -925,6 +945,13 @@ access_uarea (struct task_struct *child, unsigned long addr, *data = (pt->cr_ipsr & IPSR_MASK); return 0; + case PT_AR_RSC: + if (write_access) + pt->ar_rsc = *data | (3 << 2); /* force PL3 */ + else + *data = pt->ar_rsc; + return 0; + case PT_AR_RNAT: urbs_end = ia64_get_user_rbs_end(child, pt, NULL); rnat_addr = (long) ia64_rse_rnat_addr((long *) @@ -976,9 +1003,6 @@ access_uarea (struct task_struct *child, unsigned long addr, case PT_AR_BSPSTORE: ptr = pt_reg_addr(pt, ar_bspstore); break; - case PT_AR_RSC: - ptr = pt_reg_addr(pt, ar_rsc); - break; case PT_AR_UNAT: ptr = pt_reg_addr(pt, ar_unat); break; @@ -1214,7 +1238,7 @@ ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) static long ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) { - unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; + unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; struct unw_frame_info info; struct switch_stack *sw; struct ia64_fpreg fpval; @@ -1247,7 +1271,7 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) /* app regs */ retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); - retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]); retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); @@ -1345,6 +1369,7 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) retval |= __get_user(nat_bits, &ppr->nat); retval |= access_uarea(child, PT_CR_IPSR, &psr, 1); + retval |= access_uarea(child, PT_AR_RSC, &rsc, 1); retval |= access_uarea(child, PT_AR_EC, &ec, 1); retval |= access_uarea(child, PT_AR_LC, &lc, 1); retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index b7e6b4c..d14692e 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -720,7 +720,8 @@ cpu_init (void) ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); /* - * Initialize default control register to defer all speculative faults. The + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The * kernel MUST NOT depend on a particular setting of these bits (in other words, * the kernel must have recovery code for all speculative accesses). Turn on * dcr.lc as per recommendation by the architecture team. Most IA-32 apps diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 499b7e5..edd9f07 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -94,7 +94,7 @@ sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, static long restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) { - unsigned long ip, flags, nat, um, cfm; + unsigned long ip, flags, nat, um, cfm, rsc; long err; /* Always make any pending restarted system calls return -EINTR */ @@ -106,7 +106,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ err |= __get_user(cfm, &sc->sc_cfm); err |= __get_user(um, &sc->sc_um); /* user mask */ - err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); + err |= __get_user(rsc, &sc->sc_ar_rsc); err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); @@ -119,6 +119,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ scr->pt.cr_ifs = cfm | (1UL << 63); + scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */ /* establish new instruction pointer: */ scr->pt.cr_iip = ip & ~0x3UL; diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 953095e..b49d4dd 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -269,7 +269,7 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int int me = get_cpu(); /* prevent preemption and reschedule on another processor */ if (cpuid == me) { - printk("%s: trying to call self\n", __FUNCTION__); + printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__); put_cpu(); return -EBUSY; } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0d5ee57..623b0a5 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -346,6 +346,7 @@ smp_callin (void) lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); + per_cpu(cpu_state, cpuid) = CPU_ONLINE; smp_setup_percpu_timer(); @@ -611,6 +612,7 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callin_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } /* @@ -624,7 +626,7 @@ static struct { __u16 thread_id; __u16 proc_fixed_addr; __u8 valid; -}mt_info[NR_CPUS] __devinit; +} mt_info[NR_CPUS] __devinitdata; #ifdef CONFIG_HOTPLUG_CPU static inline void @@ -688,6 +690,7 @@ int __cpu_disable(void) return -EBUSY; remove_siblinginfo(cpu); + cpu_clear(cpu, cpu_online_map); fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); @@ -774,6 +777,7 @@ __cpu_up (unsigned int cpu) if (cpu_isset(cpu, cpu_callin_map)) return -EINVAL; + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Processor goes to start_secondary(), sets online flag */ ret = do_boot_cpu(sapicid, cpu); if (ret < 0) diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index a8cf6d8..770fab3 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un } } - /* - * A zero mmap always succeeds in Linux, independent of whether or not the - * remaining arguments are valid. - */ - if (len == 0) - goto out; - /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len || len > TASK_SIZE) { diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e82ad78..e7e520d 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -21,12 +21,26 @@ #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); +struct notifier_block *ia64die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&ia64die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + void __init trap_init (void) { @@ -111,6 +125,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) siginfo_t siginfo; int sig, code; + /* break.b always sets cr.iim to 0, which causes problems for + * debuggers. Get the real break number from the original instruction, + * but only for kernel code. User space break.b is left alone, to + * preserve the existing behaviour. All break codings have the same + * format, so there is no need to check the slot type. + */ + if (break_num == 0 && !user_mode(regs)) { + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long *bundle = (unsigned long *)regs->cr_iip; + unsigned long slot; + switch (ipsr->ri) { + case 0: slot = (bundle[0] >> 5); break; + case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break; + default: slot = (bundle[1] >> 23); break; + } + break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff); + } + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); siginfo.si_imm = break_num; @@ -119,6 +151,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -171,6 +207,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; + case 0x80200: + case 0x80300: + if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } + sig = SIGTRAP; code = TRAP_BRKPT; + break; + default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -202,13 +247,21 @@ disabled_fph_fault (struct pt_regs *regs) /* first, grant user-level access to fph partition: */ psr->dfh = 0; + + /* + * Make sure that no other task gets in on this processor + * while we're claiming the FPU + */ + preempt_disable(); #ifndef CONFIG_SMP { struct task_struct *fpu_owner = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); - if (ia64_is_local_fpu_owner(current)) + if (ia64_is_local_fpu_owner(current)) { + preempt_enable_no_resched(); return; + } if (fpu_owner) ia64_flush_fph(fpu_owner); @@ -226,6 +279,7 @@ disabled_fph_fault (struct pt_regs *regs) */ psr->mfh = 1; } + preempt_enable_no_resched(); } static inline int @@ -521,7 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: + if (notify_die(DIE_SS, "ss", ®s, vector, + vector, SIGTRAP) == NOTIFY_STOP) + return; + siginfo.si_code = TRAP_TRACE; ifa = 0; break; } siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c new file mode 100644 index 0000000..490dfc9 --- /dev/null +++ b/arch/ia64/kernel/uncached.c @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * A simple uncached page allocator using the generic allocator. This + * allocator first utilizes the spare (spill) pages found in the EFI + * memmap and will then start converting cached pages to uncached ones + * at a granule at a time. Node awareness is implemented by having a + * pool of pages per node. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/efi.h> +#include <linux/genalloc.h> +#include <asm/page.h> +#include <asm/pal.h> +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/atomic.h> +#include <asm/tlbflush.h> +#include <asm/sn/arch.h> + +#define DEBUG 0 + +#if DEBUG +#define dprintk printk +#else +#define dprintk(x...) do { } while (0) +#endif + +void __init efi_memmap_walk_uc (efi_freemem_callback_t callback); + +#define MAX_UNCACHED_GRANULES 5 +static int allocated_granules; + +struct gen_pool *uncached_pool[MAX_NUMNODES]; + + +static void uncached_ipi_visibility(void *data) +{ + int status; + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if ((status != PAL_VISIBILITY_OK) && + (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) + printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on " + "CPU %i\n", status, get_cpu()); +} + + +static void uncached_ipi_mc_drain(void *data) +{ + int status; + status = ia64_pal_mc_drain(); + if (status) + printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " + "CPU %i\n", status, get_cpu()); +} + + +static unsigned long +uncached_get_new_chunk(struct gen_pool *poolp) +{ + struct page *page; + void *tmp; + int status, i; + unsigned long addr, node; + + if (allocated_granules >= MAX_UNCACHED_GRANULES) + return 0; + + node = poolp->private; + page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, + IA64_GRANULE_SHIFT-PAGE_SHIFT); + + dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n", + page, (unsigned long)(page-vmem_map) << PAGE_SHIFT); + + /* + * Do magic if no mem on local node! XXX + */ + if (!page) + return 0; + tmp = page_address(page); + + /* + * There's a small race here where it's possible for someone to + * access the page through /dev/mem halfway through the conversion + * to uncached - not sure it's really worth bothering about + */ + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + SetPageUncached(&page[i]); + + flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE); + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + + dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n", + status, get_cpu()); + + if (!status) { + status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); + if (status) + printk(KERN_WARNING "smp_call_function failed for " + "uncached_ipi_visibility! (%i)\n", status); + } + + if (ia64_platform_is("sn2")) + sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE); + else + flush_icache_range((unsigned long)tmp, + (unsigned long)tmp+IA64_GRANULE_SIZE); + + ia64_pal_mc_drain(); + status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); + if (status) + printk(KERN_WARNING "smp_call_function failed for " + "uncached_ipi_mc_drain! (%i)\n", status); + + addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + + allocated_granules++; + return addr; +} + + +/* + * uncached_alloc_page + * + * Allocate 1 uncached page. Allocates on the requested node. If no + * uncached pages are available on the requested node, roundrobin starting + * with higher nodes. + */ +unsigned long +uncached_alloc_page(int nid) +{ + unsigned long maddr; + + maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE); + + dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n", + maddr, nid); + + /* + * If no memory is availble on our local node, try the + * remaining nodes in the system. + */ + if (!maddr) { + int i; + + for (i = MAX_NUMNODES - 1; i >= 0; i--) { + if (i == nid || !node_online(i)) + continue; + maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE); + dprintk(KERN_DEBUG "uncached_alloc_page alternate search " + "returns %lx on node %i\n", maddr, i); + if (maddr) { + break; + } + } + } + + return maddr; +} +EXPORT_SYMBOL(uncached_alloc_page); + + +/* + * uncached_free_page + * + * Free a single uncached page. + */ +void +uncached_free_page(unsigned long maddr) +{ + int node; + + node = nasid_to_cnodeid(NASID_GET(maddr)); + + dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); + + if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", maddr); + + gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE); +} +EXPORT_SYMBOL(uncached_free_page); + + +/* + * uncached_build_memmap, + * + * Called at boot time to build a map of pages that can be used for + * memory special operations. + */ +static int __init +uncached_build_memmap(unsigned long start, unsigned long end, void *arg) +{ + long length; + unsigned long vstart, vend; + int node; + + length = end - start; + vstart = start + __IA64_UNCACHED_OFFSET; + vend = end + __IA64_UNCACHED_OFFSET; + + dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); + + memset((char *)vstart, 0, length); + + node = nasid_to_cnodeid(NASID_GET(start)); + + for (; vstart < vend ; vstart += PAGE_SIZE) { + dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); + gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE); + } + + return 0; +} + + +static int __init uncached_init(void) { + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (!node_online(i)) + continue; + uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT, + &uncached_get_new_chunk, i); + } + + efi_memmap_walk_uc(uncached_build_memmap); + + return 0; +} + +__initcall(uncached_init); |