diff options
107 files changed, 2236 insertions, 796 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 27e0488..661efd4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -741,10 +741,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/block/cfq-iosched.txt and Documentation/block/deadline-iosched.txt for details. - elfcorehdr= [IA-64,PPC,SH,X86] + elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390] Specifies physical address of start of kernel core - image elf header. Generally kexec loader will - pass this option to capture kernel. + image elf header and optionally the size. Generally + kexec loader will pass this option to capture kernel. See Documentation/kdump/kdump.txt for details. enable_mtrr_cleanup [X86] diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6b99fc3..a9fbd43 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -569,6 +569,16 @@ config KEXEC current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + config ZFCPDUMP def_bool n prompt "zfcpdump support" diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 028f23e..465eca7 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr; extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c6..6cf8e26 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 623f2fb..9381c92 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <asm/fcx.h> +#include <asm/irq.h> /* structs from asm/cio.h */ struct irb; @@ -127,6 +128,7 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { struct ccw_device_id *ids; @@ -144,6 +146,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index cdb9b78..2e49748b 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -12,6 +12,7 @@ #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL @@ -19,21 +20,19 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +extern u32 psw32_user_bits; #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 97cc440..6940abf 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -168,5 +168,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); +extern void store_status(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba7b01c..ba6d85f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -8,7 +8,8 @@ enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, EXTINT_CLK, - EXTINT_IPI, + EXTINT_EXC, + EXTINT_EMS, EXTINT_TMR, EXTINT_TLA, EXTINT_PFL, @@ -17,8 +18,8 @@ enum interruption_class { EXTINT_SCP, EXTINT_IUC, EXTINT_CPM, + IOINT_CIO, IOINT_QAI, - IOINT_QDI, IOINT_DAS, IOINT_C15, IOINT_C70, @@ -28,6 +29,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_CSC, NMI_NMI, NR_IRQS, }; diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b8..cf4e47b 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -30,9 +30,15 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1ca5de0..24e1847 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -145,6 +145,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; }; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index e85c911..9e13c7d 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -151,10 +151,8 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e08 */ - __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -290,9 +288,7 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ + __u64 vmcore_info; /* 0x0e0c */ __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index accb372..f7ec548 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c0cb794..34ede0e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -696,7 +696,9 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a4b6229..5f33d37 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. @@ -118,17 +120,17 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ } while (0) /* Forward declaration, a strange C thing */ @@ -187,7 +189,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -212,26 +213,37 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); #endif /* __s390x__ */ } - + /* - * Function to stop a processor until an interruption occurred + * Rewind PSW instruction address by specified number of bytes. */ -static inline void enabled_wait(void) +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) { - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + /* * Function to drop a processor into disabled wait state */ - static inline void ATTRIB_NORET disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c..a658463 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -230,17 +230,21 @@ typedef struct #define PSW_MASK_IO 0x02000000UL #define PSW_MASK_EXT 0x01000000UL #define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ #define PSW_MASK_MCHECK 0x00040000UL #define PSW_MASK_WAIT 0x00020000UL #define PSW_MASK_PSTATE 0x00010000UL #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x00003F00UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL -#define PSW_BASE_BITS 0x00080000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) #define PSW_ASC_PRIMARY 0x00000000UL @@ -254,6 +258,7 @@ typedef struct #define PSW_MASK_DAT 0x0400000000000000UL #define PSW_MASK_IO 0x0200000000000000UL #define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL #define PSW_MASK_KEY 0x00F0000000000000UL #define PSW_MASK_MCHECK 0x0004000000000000UL #define PSW_MASK_WAIT 0x0002000000000000UL @@ -261,12 +266,14 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x00003F0180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) #define PSW_ASC_PRIMARY 0x0000000000000000UL @@ -279,18 +286,7 @@ typedef struct #ifdef __KERNEL__ extern long psw_kernel_bits; extern long psw_user_bits; -#ifdef CONFIG_64BIT -extern long psw_user32_bits; #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) /* * The s390_regs structure is used to define the elf_gregset_t. @@ -328,8 +324,7 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int svc_code; }; /* @@ -487,6 +482,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a..3d6ad4a 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -17,5 +17,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef1..5a09971 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -26,15 +26,21 @@ #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) #else /* __s390x__ */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -48,6 +54,8 @@ extern int memory_end_set; extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 @@ -106,6 +114,7 @@ extern unsigned int user_mode; #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) /* * Console mode. Override with conmode= @@ -134,10 +143,14 @@ extern char kernel_nss_name[]; #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 #else /* __s390x__ */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc64..ca3f881 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009f..ab47a69 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -33,6 +33,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, int from, int to); +extern void smp_restart_with_online_cpu(void); extern void smp_restart_cpu(void); /* @@ -64,6 +65,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_restart_with_online_cpu(void) +{ +} + #define smp_vcpu_scheduled (1) #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc..fd94dfe 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include <linux/smp.h> +extern int spin_retry; + static inline int _raw_compare_and_swap(volatile unsigned int *lock, unsigned int old, unsigned int new) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b..b239ff5 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include <linux/sched.h> +#include <linux/err.h> #include <asm/ptrace.h> /* @@ -25,7 +26,8 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +39,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6582f69..ef573c1 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,6 +20,8 @@ struct task_struct; +extern int sysctl_userprocess_debug; + extern struct task_struct *__switch_to(void *, void *); extern void update_per_regs(struct task_struct *task); @@ -114,6 +116,8 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ @@ -210,8 +214,10 @@ __set_psw_mask(unsigned long mask) __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) #ifdef CONFIG_SMP diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1a5dbb6..a231834 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ @@ -84,10 +85,10 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ +#define TIF_SYSCALL 0 /* inside a system call */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -103,11 +104,11 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLE_STEP 20 /* This task is single stepped */ #define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define _TIF_SYSCALL (1<<TIF_SYSCALL) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) #define _TIF_PER_TRAP (1<<TIF_PER_TRAP) #define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -117,7 +118,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIE (1<<TIF_SIE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) -#define _TIF_SINGLE_STEP (1<<TIF_FREEZE) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) #define _TIF_FREEZE (1<<TIF_FREEZE) #ifdef CONFIG_64BIT diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a4..d610bef 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (test_facility(25)) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 3044453..1d8648c 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } #else #define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() #endif /* diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index df37322..dd4f076 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2b45591..7513187 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,8 +45,7 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -141,7 +140,6 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 2554356..f8828d3 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -86,6 +86,8 @@ s390_base_pgm_handler_fn: ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 lghi %r3,0 lg %r4,0(%r4) # Save PSW @@ -99,6 +101,8 @@ ENTRY(diag308_reset) sam64 # Switch to 64 bit addressing mode larl %r4,.Lctlregs # Restore control registers lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) br %r14 .align 16 .Lrestart_psw: @@ -110,6 +114,8 @@ ENTRY(diag308_reset) .rept 16 .quad 0 .endr +.Lfpctl: + .long 0 .previous #else /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa8..84a9828 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -60,12 +60,9 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; /* For this source file, we want overflow handling. */ @@ -365,12 +362,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, if (set) { if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } set_fs (KERNEL_DS); ret = sys_rt_sigprocmask(how, @@ -380,12 +372,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, set_fs (old_fs); if (ret) return ret; if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } @@ -404,12 +392,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); set_fs (old_fs); if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b..4f68c81 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -213,16 +214,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = get_user(sa_handler, &act->sa_handler); ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) @@ -233,20 +226,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); @@ -300,9 +281,10 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -327,8 +309,9 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -342,7 +325,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -496,11 +479,11 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -509,11 +492,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ @@ -521,7 +505,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, regs->gprs[5] = current->thread.prot_addr; /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -564,20 +548,21 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, } else { regs->gprs[14] = (__u64) frame->retcode; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + (u16 __force __user *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; return 0; give_sigsegv: diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7526db6..5006a1d 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1623,8 +1623,7 @@ ENTRY(sys_syncfs_wrapper) lgfr %r2,%r2 # int jg sys_syncfs - .globl sys_setns_wrapper -sys_setns_wrapper: +ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 0000000..39f8fd4 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,426 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <asm/ipl.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); + else + memcpy_real(buf, (void *) src, csize); + return csize; +} + +/* + * Copy memory from old kernel + */ +static int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return ptr; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return ptr; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return ptr; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return ptr; + vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return ptr; + vmcoreinfo[note.n_descsz + 1] = 0; + return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f297456..37394b3 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02ec8fe..b131570 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -43,16 +43,15 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -228,9 +227,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -239,17 +239,17 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: sll %r7,2 # svc number *4 l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r10) # get system call addr. bnz BASED(sysc_tracesys) @@ -259,23 +259,19 @@ sysc_nr_ok: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r12),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r12),_TIF_NEED_RESCHED @@ -284,8 +280,6 @@ sysc_work_tif: bo BASED(sysc_sigpending) tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) # beware of critical section cleanup @@ -314,11 +308,14 @@ sysc_sigpending: la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_SYSCALL + bno BASED(sysc_return) + lm %r2,%r6,SP_R2(%r15) # load svc arguments + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number + b BASED(sysc_nr_ok) # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -329,24 +326,11 @@ sysc_notify_resume: la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - b BASED(sysc_nr_ok) # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -361,7 +345,7 @@ sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) + icm %r0,3,SP_SVC_CODE(%r15) st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) @@ -376,7 +360,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -454,7 +438,6 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -530,9 +513,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -540,7 +524,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -550,7 +533,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -853,13 +835,13 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + st %r15,__LC_SAVE_AREA+48(%r0) # save r15 basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) ahi %r15,-SP_SIZE # make room for pt_regs stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 basr %r14,0 @@ -965,9 +947,11 @@ cleanup_system_call: s %r15,BASED(.Lc_spsize) # make room for registers & psw st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC mvc 0(4,%r12),__LC_THREAD_INFO + l %r12,__LC_THREAD_INFO + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 66729eb..ef8fb1d 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,24 +5,33 @@ #include <linux/signal.h> #include <asm/ptrace.h> + +extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +extern void *restart_stack; + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + void do_protection_exception(struct pt_regs *, long, unsigned long); void do_dat_exception(struct pt_regs *, long, unsigned long); void do_asce_exception(struct pt_regs *, long, unsigned long); -extern int sysctl_userprocess_debug; - void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); +void __init time_init(void); + struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 713da07..83a9374 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -43,19 +43,18 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -249,9 +248,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -260,14 +260,14 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) + llgh %r7,SP_SVC_CODE+2(%r15) slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: larl %r10,sys_call_table @@ -277,7 +277,7 @@ sysc_nr_ok: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys @@ -287,23 +287,19 @@ sysc_noemu: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED @@ -312,8 +308,6 @@ sysc_work_tif: jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -339,11 +333,15 @@ sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r6,SP_R2(%r15) # load svc arguments + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -354,23 +352,10 @@ sysc_notify_resume: jg do_notify_resume # call do_notify_resume # -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 - j sysc_nr_ok # restart svc - -# # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -382,7 +367,7 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) + llgh %r0,SP_SVC_CODE+2(%r15) stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls @@ -397,7 +382,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -470,7 +455,6 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -550,9 +534,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -561,7 +546,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -571,7 +555,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -869,12 +852,12 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + stg %r15,__LC_SAVE_AREA+120(%r0) # save r15 larl %r15,restart_stack # load restart stack lg %r15,0(%r15) aghi %r15,-SP_SIZE # make room for pt_regs stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 brasl %r14,do_restart @@ -972,9 +955,11 @@ cleanup_system_call: stg %r15,32(%r12) stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC mvc 8(8,%r12),__LC_THREAD_INFO + lg %r12,__LC_THREAD_INFO + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime @@ -1096,6 +1081,7 @@ sie_exit: lghi %r2,0 br %r14 sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 2d781ba..900068d 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -449,10 +449,28 @@ ENTRY(start) # .org 0x10000 ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) @@ -534,6 +552,8 @@ ENTRY(startup) .align 8 5: .long 0x7fffffff,0xffffffff +#include "head_kdump.S" + # # params at 10400 (setup.h) # @@ -541,6 +561,8 @@ ENTRY(startup) .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index f21954b..d3f1ab7 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -92,7 +92,7 @@ ENTRY(_stext) .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index ae5d492..99348c0 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -90,7 +90,7 @@ ENTRY(_stext) .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 0000000..e1ac389 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 48c7102..affa8e6 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/crash_dump.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -26,6 +27,7 @@ #include <asm/sclp.h> #include <asm/sigp.h> #include <asm/checksum.h> +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -275,8 +277,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -338,8 +340,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; @@ -1738,7 +1740,11 @@ static struct kobj_attribute on_restart_attr = void do_restart(void) { + smp_restart_with_online_cpu(); smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); } @@ -2009,7 +2015,7 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -2028,15 +2034,19 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1f4050d..b9a7fdd 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = { {.name = "EXT" }, {.name = "I/O" }, {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, {.name = "TMR", .desc = "[EXT] CPU Timer" }, {.name = "TAL", .desc = "[EXT] Timing Alert" }, {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, @@ -42,8 +43,8 @@ static const struct irq_class intrclass_names[] = { {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, {.name = "C15", .desc = "[I/O] 3215" }, {.name = "C70", .desc = "[I/O] 3270" }, @@ -53,6 +54,7 @@ static const struct irq_class intrclass_names[] = { {.name = "CLW", .desc = "[I/O] CLAW" }, {.name = "CTC", .desc = "[I/O] CTC" }, {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, {.name = "NMI", .desc = "[NMI] Machine Check" }, }; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d66..64b761a 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -635,7 +635,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b09b9c6..3cd0f25 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,11 @@ /* * arch/s390/kernel/machine_kexec.c * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005,2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ #include <linux/device.h> @@ -21,12 +22,162 @@ #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/asm-offsets.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Store status of next available physical CPU + */ +static int store_status_next(int start_cpu, int this_cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + int cpu, rc; + + for (cpu = start_cpu; cpu < 65536; cpu++) { + if (cpu == this_cpu) + continue; + do { + rc = raw_sigp(cpu, sigp_stop_and_store_status); + } while (rc == sigp_busy); + if (rc != sigp_order_code_accepted) + continue; + if (sa->pref_reg) + return cpu; + } + return -1; +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu, phys_cpu = 0, first = 1; + + this_cpu = stap(); + + if (!S390_lowcore.prefixreg_save_area) + first = 0; + for_each_online_cpu(cpu) { + if (first) { + add_elf_notes(cpu); + first = 0; + continue; + } + phys_cpu = store_status_next(phys_cpu, this_cpu); + if (phys_cpu == -1) + break; + add_elf_notes(cpu); + phys_cpu++; + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; @@ -35,6 +186,9 @@ int machine_kexec_prepare(struct kimage *image) if (ipl_flags & IPL_NSS_VALID) return -ENOSYS; + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; @@ -51,27 +205,53 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + if (image->type == KEXEC_TYPE_CRASH) + s390_reset_system(__do_machine_kdump, data); + else + s390_reset_system(__do_machine_kexec, data); + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 0fbe4e3..19b4568 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[]) arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunks[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long start, end, new_size; + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunks[i].size == 0) + continue; + if (addr + size < chunks[i].addr) + continue; + if (addr >= chunks[i].addr + chunks[i].size) + continue; + start = max(addr, chunks[i].addr); + end = min(addr + size, chunks[i].addr + chunks[i].size); + new_size = end - start; + if (new_size == 0) + continue; + if (start == chunks[i].addr && + end == chunks[i].addr + chunks[i].size) { + /* Remove chunk */ + chunks[i].type = type; + } else if (start == chunks[i].addr) { + /* Make chunk smaller at start */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = chunks[i].addr + new_size; + chunks[i + 1].size = chunks[i].size - new_size; + chunks[i].size = new_size; + chunks[i].type = type; + i += 1; + } else if (end == chunks[i].addr + chunks[i].size) { + /* Make chunk smaller at end */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = start; + chunks[i + 1].size = new_size; + chunks[i + 1].type = type; + chunks[i].size -= new_size; + i += 1; + } else { + /* Create memory hole */ + if (i >= MEMORY_CHUNKS - 2) + panic("Unable to create memory hole"); + memmove(&chunks[i + 2], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 2))); + chunks[i + 1].addr = addr; + chunks[i + 1].size = size; + chunks[i + 1].type = type; + chunks[i + 2].addr = addr + size; + chunks[i + 2].size = + chunks[i].addr + chunks[i].size - (addr + size); + chunks[i + 2].type = chunks[i].type; + chunks[i].size = addr - chunks[i].addr; + i += 2; + } + } +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 541a750..9451b21 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/elfcore.h> #include <linux/smp.h> #include <linux/slab.h> #include <linux/interrupt.h> @@ -117,7 +118,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d7..6e0073e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad2..450931a 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,34 +42,37 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; void update_per_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; - - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) @@ -166,8 +169,8 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER); } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -289,18 +292,17 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && -#endif - data != PSW_MASK_MERGE(psw_user_bits, data)) + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_64BIT if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -495,21 +497,21 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -594,24 +596,32 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -735,7 +745,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + clear_thread_flag(TIF_SYSCALL); ret = -1; } @@ -897,6 +907,26 @@ static int s390_last_break_get(struct task_struct *target, #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -923,6 +953,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1102,6 +1140,14 @@ static const struct user_regset s390_compat_regsets[] = { .align = sizeof(long), .get = s390_compat_last_break_get, }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 303d961c..ad67c21 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -10,6 +10,12 @@ #include <asm/asm-offsets.h> # +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + +# # do_reipl_asm # Parameter: r2 = schid of reipl device # diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index e690975..732a793 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_64(%r0) + stg %r1,__LC_SAVE_AREA+120(%r0) lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_64(%r0) + lg %r2,__LC_SAVE_AREA+120(%r0) stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) @@ -62,8 +62,11 @@ ENTRY(store_status) larl %r2,store_status stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) br %r14 -.align 8 + + .section .bss + .align 8 .Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b371c3..8ac6bfa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -42,6 +42,9 @@ #include <linux/reboot.h> #include <linux/topology.h> #include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> #include <asm/ipl.h> #include <asm/uaccess.h> @@ -57,12 +60,13 @@ #include <asm/ebcdic.h> #include <asm/compat.h> #include <asm/kvm_virtio.h> +#include <asm/diag.h> -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; /* * User copy operations. @@ -274,22 +278,14 @@ early_param("mem", early_parse_mem); unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_primary(void) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; #ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); @@ -325,7 +321,7 @@ early_param("user_mode", early_parse_user_mode); static void setup_addressing_mode(void) { if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + if (set_amode_primary()) pr_info("Address spaces switched, " "mvcos available\n"); else @@ -344,24 +340,25 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; @@ -435,10 +432,14 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: + case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -479,6 +480,7 @@ static void __init setup_memory_end(void) unsigned long max_mem; int i; + #ifdef CONFIG_ZFCPDUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; @@ -545,11 +547,201 @@ static void __init setup_restart_psw(void) * Setup restart PSW for absolute zero lowcore. This is necesary * if PSW restart is done on an offline CPU that has lowcore zero */ - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); +#endif +} + static void __init setup_memory(void) { @@ -580,6 +772,14 @@ setup_memory(void) if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif if (start + INITRD_SIZE > memory_end) { pr_err("initrd extends beyond end of " "memory (0x%08lx > 0x%08lx) " @@ -610,7 +810,8 @@ setup_memory(void) for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE) + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -644,6 +845,15 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { @@ -812,8 +1022,11 @@ setup_arch(char **cmdline_p) setup_ipl(); setup_memory_end(); setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); setup_memory(); setup_resources(); + setup_vmcoreinfo(); setup_restart_psw(); setup_lowcore(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1c..05a85bc 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/compat.h> #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -116,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -143,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(sregs->regs.acrs)); @@ -156,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -288,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -356,6 +363,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -401,7 +409,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +428,45 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->svc_code >> 16); + break; + } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); + } + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +480,32 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->svc_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_SYSCALL); + break; + default: + clear_thread_flag(TIF_SYSCALL); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +513,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6ab16ac..3ea8728 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -38,6 +38,7 @@ #include <linux/timex.h> #include <linux/bootmem.h> #include <linux/slab.h> +#include <linux/crash_dump.h> #include <asm/asm-offsets.h> #include <asm/ipl.h> #include <asm/setup.h> @@ -97,6 +98,29 @@ static inline int cpu_stopped(int cpu) return raw_cpu_stopped(cpu_logical_map(cpu)); } +/* + * Ensure that PSW restart is done on an online CPU + */ +void smp_restart_with_online_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (stap() == __cpu_logical_map[cpu]) { + /* We are online: Enable DAT again and return */ + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + return; + } + } + /* We are not online: Do PSW restart on an online CPU */ + while (sigp(cpu, sigp_restart) == sigp_busy) + cpu_relax(); + /* And stop ourself */ + while (raw_sigp(stap(), sigp_stop) == sigp_busy) + cpu_relax(); + for (;;); +} + void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) { struct _lowcore *lc, *current_lc; @@ -106,14 +130,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) if (smp_processor_id() == 0) func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | + PSW_MASK_EA | PSW_MASK_BA); /* Disable lowcore protection */ __ctl_clear_bit(0, 28); current_lc = lowcore_ptr[smp_processor_id()]; lc = lowcore_ptr[0]; if (!lc) lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; if (!cpu_online(0)) smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); @@ -135,7 +161,7 @@ void smp_send_stop(void) int cpu, rc; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); /* stop all processors */ @@ -161,7 +187,10 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; + if (ext_int_code == 0x1202) + kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + else + kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ @@ -183,12 +212,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ static void smp_ext_bitcall(int cpu, int sig) { + int order; + /* * Set signaling bit in lowcore of target cpu and kick it */ set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) + while (1) { + order = smp_vcpu_scheduled(cpu) ? + sigp_external_call : sigp_emergency_signal; + if (sigp(cpu, order) != sigp_busy) + break; udelay(10); + } } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -281,11 +317,13 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) + return; + if (is_kdump_kernel()) return; if (cpu >= NR_CPUS) { pr_warning("CPU %i exceeds the maximum %i and is excluded from " @@ -403,6 +441,18 @@ static void __init smp_detect_cpus(void) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE && !is_kdump_kernel()) { + struct save_area *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + 0x200, 0); + zfcpdump_save_areas[0] = save_area; + } +#endif /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; @@ -463,7 +513,8 @@ int __cpuinit start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ @@ -511,7 +562,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lowcore->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; if (user_mode != HOME_SPACE_MODE) @@ -712,6 +764,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index b6f9afe..47df775 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include <linux/pfn.h> +#include <linux/suspend.h> #include <linux/mm.h> #include <asm/system.h> diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439..2a94b77 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -442,7 +442,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8d65bd0..ebbfab3 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -48,6 +48,7 @@ #include <asm/timer.h> #include <asm/etr.h> #include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0cd340b7..77b8942 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -299,8 +299,8 @@ out: } __initcall(init_topology_update); -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ffabcd9..a9807dd 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); #endif printk("\n%s GPRS: " FOURLONG, mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); @@ -334,7 +334,8 @@ void __kprobes do_per_trap(struct pt_regs *regs) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void *) current->thread.per_event.address; + info.si_addr = + (void __force __user *) current->thread.per_event.address; force_sig_info(SIGTRAP, &info, current); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2d6228f..bb48977 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void) psw_t psw; /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; idle->nohz_delay = 0; @@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void) * set_cpu_timer(VTIMER_MAX_SLICE); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void) * vq->idle = get_cpu_timer(); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -458,7 +461,7 @@ void add_virt_timer_periodic(void *new) } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) { struct vtimer_queue *vq; unsigned long flags; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c841..87cedd6 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* * diag.c - handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,6 +15,34 @@ #include <linux/kvm_host.h> #include "kvm-s390.h" +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); case 0x308: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9610ba4..0bd3bea 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -71,6 +71,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { NULL } }; diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a65229d..db92f04 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs) u64 clock_saved; u64 end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | + PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); __ctl_store(cr0_saved, 0, 0); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 7483383..342ae35 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9564fc7..1766def 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access, #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { - address = gmap_fault(address, + address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; @@ -393,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, int fault; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code @@ -454,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 5dbbaa6..1cb8427b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/gfp.h> #include <asm/system.h> /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count) __ctl_load(cr0, 0, 0); preempt_enable(); } + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f..f09c748 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <asm/pgalloc.h> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed3..b36537a 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/hugetlb.h> +#include <asm/cacheflush.h> #include <asm/pgtable.h> static void change_page_attr(unsigned long addr, int numpages, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5d56c2b..301c84d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -222,6 +222,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { table = (unsigned long *) page_to_phys(page); if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) @@ -230,6 +231,7 @@ void gmap_free(struct gmap *gmap) gmap_unlink_segment(gmap, table); __free_pages(page, ALLOC_ORDER); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); list_del(&gmap->list); kfree(gmap); @@ -256,6 +258,9 @@ void gmap_disable(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_disable); +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init) { @@ -267,14 +272,12 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - down_read(&gmap->mm->mmap_sem); if (*table & _REGION_ENTRY_INV) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); } else __free_pages(page, ALLOC_ORDER); - up_read(&gmap->mm->mmap_sem); return 0; } @@ -299,6 +302,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -320,6 +324,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) *table = _SEGMENT_ENTRY_INV; } out: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); @@ -350,6 +355,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -373,19 +379,24 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; out_unmap: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); gmap_unmap_segment(gmap, to, len); return -ENOMEM; } EXPORT_SYMBOL_GPL(gmap_map_segment); -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { unsigned long *table, vmaddr, segment; struct mm_struct *mm; @@ -445,16 +456,75 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; rmap->entry = table; + spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ *table = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; +} +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; } EXPORT_SYMBOL_GPL(gmap_fault); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) { struct gmap_rmap *rmap, *next; @@ -662,8 +732,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 781ff51..4799383 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce4..f43c0e4 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -994,7 +994,7 @@ allocate_error: * * Returns 0 on success, !0 on failure. */ -int hwsampler_deallocate() +int hwsampler_deallocate(void) { int rc; @@ -1035,7 +1035,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) return cb->sample_overflow; } -int hwsampler_setup() +int hwsampler_setup(void) { int rc; int cpu; @@ -1102,7 +1102,7 @@ setup_exit: return rc; } -int hwsampler_shutdown() +int hwsampler_shutdown(void) { int rc; @@ -1203,7 +1203,7 @@ start_all_exit: * * Returns 0 on success, !0 on failure. */ -int hwsampler_stop_all() +int hwsampler_stop_all(void) { int tmp_rc, rc, cpu; struct hws_cpu_buffer *cb; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a1d3ddb..65894f0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -11,7 +11,6 @@ #define KMSG_COMPONENT "dasd" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel_stat.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -1594,7 +1593,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, unsigned long long now; int expires; - kstat_cpu(smp_processor_id()).irqs[IOINT_DAS]++; if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: @@ -2061,13 +2059,14 @@ void dasd_add_request_tail(struct dasd_ccw_req *cqr) /* * Wakeup helper for the 'sleep_on' functions. */ -static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) +void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) { spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); cqr->callback_data = DASD_SLEEPON_END_TAG; spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); wake_up(&generic_waitq); } +EXPORT_SYMBOL_GPL(dasd_wakeup_cb); static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) { @@ -2167,7 +2166,9 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) } else wait_event(generic_waitq, !(device->stopped)); - cqr->callback = dasd_wakeup_cb; + if (!cqr->callback) + cqr->callback = dasd_wakeup_cb; + cqr->callback_data = DASD_SLEEPON_START_TAG; dasd_add_request_tail(cqr); if (interruptible) { @@ -2263,7 +2264,11 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) cqr->callback = dasd_wakeup_cb; cqr->callback_data = DASD_SLEEPON_START_TAG; cqr->status = DASD_CQR_QUEUED; - list_add(&cqr->devlist, &device->ccw_queue); + /* + * add new request as second + * first the terminated cqr needs to be finished + */ + list_add(&cqr->devlist, device->ccw_queue.next); /* let the bh start the request to keep them in order */ dasd_schedule_device_bh(device); @@ -3284,6 +3289,9 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) if (IS_ERR(device)) return PTR_ERR(device); + /* mark device as suspended */ + set_bit(DASD_FLAG_SUSPENDED, &device->flags); + if (device->discipline->freeze) rc = device->discipline->freeze(device); @@ -3358,6 +3366,7 @@ int dasd_generic_restore_device(struct ccw_device *cdev) if (device->block) dasd_schedule_block_bh(device->block); + clear_bit(DASD_FLAG_SUSPENDED, &device->flags); dasd_put_device(device); return 0; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6e835c9..6ab2968 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -844,6 +844,30 @@ static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device, set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags); } +/* + * Wakeup helper for read_conf + * if the cqr is not done and needs some error recovery + * the buffer has to be re-initialized with the EBCDIC "V1.0" + * to show support for virtual device SNEQ + */ +static void read_conf_cb(struct dasd_ccw_req *cqr, void *data) +{ + struct ccw1 *ccw; + __u8 *rcd_buffer; + + if (cqr->status != DASD_CQR_DONE) { + ccw = cqr->cpaddr; + rcd_buffer = (__u8 *)((addr_t) ccw->cda); + memset(rcd_buffer, 0, sizeof(*rcd_buffer)); + + rcd_buffer[0] = 0xE5; + rcd_buffer[1] = 0xF1; + rcd_buffer[2] = 0x4B; + rcd_buffer[3] = 0xF0; + } + dasd_wakeup_cb(cqr, data); +} + static int dasd_eckd_read_conf_immediately(struct dasd_device *device, struct dasd_ccw_req *cqr, __u8 *rcd_buffer, @@ -863,6 +887,7 @@ static int dasd_eckd_read_conf_immediately(struct dasd_device *device, clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); set_bit(DASD_CQR_ALLOW_SLOCK, &cqr->flags); cqr->retries = 5; + cqr->callback = read_conf_cb; rc = dasd_sleep_on_immediatly(cqr); return rc; } @@ -900,6 +925,7 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device, goto out_error; } dasd_eckd_fill_rcd_cqr(device, cqr, rcd_buf, lpm); + cqr->callback = read_conf_cb; ret = dasd_sleep_on(cqr); /* * on success we update the user input parms @@ -1075,6 +1101,12 @@ static void do_path_verification_work(struct work_struct *work) data = container_of(work, struct path_verification_work_data, worker); device = data->device; + /* delay path verification until device was resumed */ + if (test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { + schedule_work(work); + return; + } + opm = 0; npm = 0; ppm = 0; @@ -2021,9 +2053,13 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((scsw_dstat(&irb->scsw) & mask) == mask) { - /* for alias only and not in offline processing*/ + /* + * for alias only, not in offline processing + * and only if not suspended + */ if (!device->block && private->lcu && - !test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + !test_bit(DASD_FLAG_OFFLINE, &device->flags) && + !test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { /* * the state change could be caused by an alias * reassignment remove device from alias handling @@ -2350,7 +2386,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( new_track = 1; end_idaw = 0; len_to_track_end = 0; - idaw_dst = 0; + idaw_dst = NULL; idaw_len = 0; rq_for_each_segment(bv, req, iter) { dst = page_address(bv->bv_page) + bv->bv_offset; @@ -2412,7 +2448,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( if (end_idaw) { idaws = idal_create_words(idaws, idaw_dst, idaw_len); - idaw_dst = 0; + idaw_dst = NULL; idaw_len = 0; end_idaw = 0; } @@ -3998,6 +4034,7 @@ static struct ccw_driver dasd_eckd_driver = { .thaw = dasd_generic_restore_device, .restore = dasd_generic_restore_device, .uc_handler = dasd_generic_uc_handler, + .int_class = IOINT_DAS, }; /* diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 4b71b11..a62a753 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -79,6 +79,7 @@ static struct ccw_driver dasd_fba_driver = { .freeze = dasd_generic_pm_freeze, .thaw = dasd_generic_restore_device, .restore = dasd_generic_restore_device, + .int_class = IOINT_DAS, }; static void diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1dd12bd..afe8c33 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -516,6 +516,7 @@ struct dasd_block { */ #define DASD_FLAG_IS_RESERVED 7 /* The device is reserved */ #define DASD_FLAG_LOCK_STOLEN 8 /* The device lock was stolen */ +#define DASD_FLAG_SUSPENDED 9 /* The device was suspended */ void dasd_put_device_wake(struct dasd_device *); @@ -643,6 +644,7 @@ struct dasd_ccw_req * dasd_smalloc_request(int , int, int, struct dasd_device *); void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); +void dasd_wakeup_cb(struct dasd_ccw_req *, void *); static inline int dasd_kmalloc_set_cda(struct ccw1 *ccw, void *cda, struct dasd_device *device) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 694464c..934458a 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -9,7 +9,6 @@ * Dan Morrison, IBM Corporation <dmorriso@cse.buffalo.edu> */ -#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kdev_t.h> @@ -362,7 +361,6 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm, int cstat, dstat; int count; - kstat_cpu(smp_processor_id()).irqs[IOINT_C15]++; raw = dev_get_drvdata(&cdev->dev); req = (struct raw3215_req *) intparm; cstat = irb->scsw.cmd.cstat; @@ -776,6 +774,7 @@ static struct ccw_driver raw3215_ccw_driver = { .freeze = &raw3215_pm_stop, .thaw = &raw3215_pm_start, .restore = &raw3215_pm_start, + .int_class = IOINT_C15, }; #ifdef CONFIG_TN3215_CONSOLE diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 810ac38..e5cb924 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -7,7 +7,6 @@ * Copyright IBM Corp. 2003, 2009 */ -#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/err.h> #include <linux/init.h> @@ -330,7 +329,6 @@ raw3270_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) struct raw3270_request *rq; int rc; - kstat_cpu(smp_processor_id()).irqs[IOINT_C70]++; rp = dev_get_drvdata(&cdev->dev); if (!rp) return; @@ -1398,6 +1396,7 @@ static struct ccw_driver raw3270_ccw_driver = { .freeze = &raw3270_pm_stop, .thaw = &raw3270_pm_start, .restore = &raw3270_pm_start, + .int_class = IOINT_C70, }; static int diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 837e010..0b54a91 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -61,8 +61,8 @@ static int __init sclp_cmd_sync_early(sclp_cmdw_t cmd, void *sccb) rc = sclp_service_call(cmd, sccb); if (rc) goto out; - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_EXT | - PSW_MASK_WAIT | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | + PSW_MASK_BA | PSW_MASK_EXT | PSW_MASK_WAIT); local_irq_disable(); out: /* Contents of the sccb might have changed. */ diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c index a90a02c..87fc0ac 100644 --- a/drivers/s390/char/sclp_quiesce.c +++ b/drivers/s390/char/sclp_quiesce.c @@ -30,7 +30,8 @@ static void do_machine_quiesce(void) psw_t quiesce_psw; smp_send_stop(); - quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + quiesce_psw.mask = + PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_WAIT; quiesce_psw.addr = 0xfff; __load_psw(quiesce_psw); } diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 9eff2df..934ef33 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1330,6 +1330,7 @@ static struct ccw_driver tape_34xx_driver = { .set_online = tape_34xx_online, .set_offline = tape_generic_offline, .freeze = tape_generic_pm_suspend, + .int_class = IOINT_TAP, }; static int diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index a7d5707..49c6aab 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1762,6 +1762,7 @@ static struct ccw_driver tape_3590_driver = { .set_offline = tape_generic_offline, .set_online = tape_3590_online, .freeze = tape_generic_pm_suspend, + .int_class = IOINT_TAP, }; /* diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 7978a0a..b3a3e8e 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -14,7 +14,6 @@ #define KMSG_COMPONENT "tape" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/init.h> // for kernel parameters #include <linux/kmod.h> // for requesting modules @@ -1115,7 +1114,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) struct tape_request *request; int rc; - kstat_cpu(smp_processor_id()).irqs[IOINT_TAP]++; device = dev_get_drvdata(&cdev->dev); if (device == NULL) { return; diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index f6b00c3..d291a54 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -11,7 +11,6 @@ #define KMSG_COMPONENT "vmur" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel_stat.h> #include <linux/cdev.h> #include <linux/slab.h> @@ -74,6 +73,7 @@ static struct ccw_driver ur_driver = { .set_online = ur_set_online, .set_offline = ur_set_offline, .freeze = ur_pm_suspend, + .int_class = IOINT_VMR, }; static DEFINE_MUTEX(vmur_mutex); @@ -305,7 +305,6 @@ static void ur_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct urdev *urd; - kstat_cpu(smp_processor_id()).irqs[IOINT_VMR]++; TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n", intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat, irb->scsw.cmd.count); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3b94044..43068fb 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/miscdevice.h> #include <linux/debugfs.h> +#include <linux/module.h> #include <asm/asm-offsets.h> #include <asm/ipl.h> #include <asm/sclp.h> @@ -142,22 +143,6 @@ static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) return memcpy_hsa(dest, src, count, TO_KERNEL); } -static int memcpy_real_user(void __user *dest, unsigned long src, size_t count) -{ - static char buf[4096]; - int offs = 0, size; - - while (offs < count) { - size = min(sizeof(buf), count - offs); - if (memcpy_real(buf, (void *) src + offs, size)) - return -EFAULT; - if (copy_to_user(dest + offs, buf, size)) - return -EFAULT; - offs += size; - } - return 0; -} - static int __init init_cpu_info(enum arch_id arch) { struct save_area *sa; @@ -346,8 +331,8 @@ static ssize_t zcore_read(struct file *file, char __user *buf, size_t count, /* Copy from real mem */ size = count - mem_offs - hdr_count; - rc = memcpy_real_user(buf + hdr_count + mem_offs, mem_start + mem_offs, - size); + rc = copy_to_user_real(buf + hdr_count + mem_offs, + (void *) mem_start + mem_offs, size); if (rc) goto fail; diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 5c56741..4f1989d 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -29,31 +29,20 @@ /* a device matches a driver if all its slave devices match the same * entry of the driver */ -static int -ccwgroup_bus_match (struct device * dev, struct device_driver * drv) +static int ccwgroup_bus_match(struct device *dev, struct device_driver * drv) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(drv); + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(drv); if (gdev->creator_id == gdrv->driver_id) return 1; return 0; } -static int -ccwgroup_uevent (struct device *dev, struct kobj_uevent_env *env) -{ - /* TODO */ - return 0; -} static struct bus_type ccwgroup_bus_type; -static void -__ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) +static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) { int i; char str[8]; @@ -63,7 +52,6 @@ __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) sysfs_remove_link(&gdev->dev.kobj, str); sysfs_remove_link(&gdev->cdev[i]->dev.kobj, "group_device"); } - } /* @@ -87,6 +75,87 @@ static void __ccwgroup_remove_cdev_refs(struct ccwgroup_device *gdev) } } +static int ccwgroup_set_online(struct ccwgroup_device *gdev) +{ + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); + int ret = 0; + + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) + return -EAGAIN; + if (gdev->state == CCWGROUP_ONLINE) + goto out; + if (gdrv->set_online) + ret = gdrv->set_online(gdev); + if (ret) + goto out; + + gdev->state = CCWGROUP_ONLINE; +out: + atomic_set(&gdev->onoff, 0); + return ret; +} + +static int ccwgroup_set_offline(struct ccwgroup_device *gdev) +{ + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); + int ret = 0; + + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) + return -EAGAIN; + if (gdev->state == CCWGROUP_OFFLINE) + goto out; + if (gdrv->set_offline) + ret = gdrv->set_offline(gdev); + if (ret) + goto out; + + gdev->state = CCWGROUP_OFFLINE; +out: + atomic_set(&gdev->onoff, 0); + return ret; +} + +static ssize_t ccwgroup_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); + unsigned long value; + int ret; + + if (!dev->driver) + return -EINVAL; + if (!try_module_get(gdrv->driver.owner)) + return -EINVAL; + + ret = strict_strtoul(buf, 0, &value); + if (ret) + goto out; + + if (value == 1) + ret = ccwgroup_set_online(gdev); + else if (value == 0) + ret = ccwgroup_set_offline(gdev); + else + ret = -EINVAL; +out: + module_put(gdrv->driver.owner); + return (ret == 0) ? count : ret; +} + +static ssize_t ccwgroup_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + int online; + + online = (gdev->state == CCWGROUP_ONLINE) ? 1 : 0; + + return scnprintf(buf, PAGE_SIZE, "%d\n", online); +} + /* * Provide an 'ungroup' attribute so the user can remove group devices no * longer needed or accidentially created. Saves memory :) @@ -104,14 +173,13 @@ static void ccwgroup_ungroup_callback(struct device *dev) mutex_unlock(&gdev->reg_mutex); } -static ssize_t -ccwgroup_ungroup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t ccwgroup_ungroup_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - struct ccwgroup_device *gdev; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); int rc; - gdev = to_ccwgroupdev(dev); - /* Prevent concurrent online/offline processing and ungrouping. */ if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) return -EAGAIN; @@ -132,24 +200,35 @@ out: } return count; } - static DEVICE_ATTR(ungroup, 0200, NULL, ccwgroup_ungroup_store); +static DEVICE_ATTR(online, 0644, ccwgroup_online_show, ccwgroup_online_store); -static void -ccwgroup_release (struct device *dev) +static struct attribute *ccwgroup_attrs[] = { + &dev_attr_online.attr, + &dev_attr_ungroup.attr, + NULL, +}; +static struct attribute_group ccwgroup_attr_group = { + .attrs = ccwgroup_attrs, +}; +static const struct attribute_group *ccwgroup_attr_groups[] = { + &ccwgroup_attr_group, + NULL, +}; + +static void ccwgroup_release(struct device *dev) { kfree(to_ccwgroupdev(dev)); } -static int -__ccwgroup_create_symlinks(struct ccwgroup_device *gdev) +static int __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) { char str[8]; int i, rc; for (i = 0; i < gdev->count; i++) { - rc = sysfs_create_link(&gdev->cdev[i]->dev.kobj, &gdev->dev.kobj, - "group_device"); + rc = sysfs_create_link(&gdev->cdev[i]->dev.kobj, + &gdev->dev.kobj, "group_device"); if (rc) { for (--i; i >= 0; i--) sysfs_remove_link(&gdev->cdev[i]->dev.kobj, @@ -159,8 +238,8 @@ __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) } for (i = 0; i < gdev->count; i++) { sprintf(str, "cdev%d", i); - rc = sysfs_create_link(&gdev->dev.kobj, &gdev->cdev[i]->dev.kobj, - str); + rc = sysfs_create_link(&gdev->dev.kobj, + &gdev->cdev[i]->dev.kobj, str); if (rc) { for (--i; i >= 0; i--) { sprintf(str, "cdev%d", i); @@ -293,26 +372,17 @@ int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, } dev_set_name(&gdev->dev, "%s", dev_name(&gdev->cdev[0]->dev)); - + gdev->dev.groups = ccwgroup_attr_groups; rc = device_add(&gdev->dev); if (rc) goto error; - get_device(&gdev->dev); - rc = device_create_file(&gdev->dev, &dev_attr_ungroup); - + rc = __ccwgroup_create_symlinks(gdev); if (rc) { - device_unregister(&gdev->dev); + device_del(&gdev->dev); goto error; } - - rc = __ccwgroup_create_symlinks(gdev); - if (!rc) { - mutex_unlock(&gdev->reg_mutex); - put_device(&gdev->dev); - return 0; - } - device_remove_file(&gdev->dev, &dev_attr_ungroup); - device_unregister(&gdev->dev); + mutex_unlock(&gdev->reg_mutex); + return 0; error: for (i = 0; i < num_devices; i++) if (gdev->cdev[i]) { @@ -330,7 +400,15 @@ error: EXPORT_SYMBOL(ccwgroup_create_from_string); static int ccwgroup_notifier(struct notifier_block *nb, unsigned long action, - void *data); + void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_UNBIND_DRIVER) + device_schedule_callback(dev, ccwgroup_ungroup_callback); + + return NOTIFY_OK; +} static struct notifier_block ccwgroup_nb = { .notifier_call = ccwgroup_notifier @@ -362,138 +440,21 @@ module_exit(cleanup_ccwgroup); /************************** driver stuff ******************************/ -static int -ccwgroup_set_online(struct ccwgroup_device *gdev) +static int ccwgroup_probe(struct device *dev) { - struct ccwgroup_driver *gdrv; - int ret; - - if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) - return -EAGAIN; - if (gdev->state == CCWGROUP_ONLINE) { - ret = 0; - goto out; - } - if (!gdev->dev.driver) { - ret = -EINVAL; - goto out; - } - gdrv = to_ccwgroupdrv (gdev->dev.driver); - if ((ret = gdrv->set_online ? gdrv->set_online(gdev) : 0)) - goto out; - - gdev->state = CCWGROUP_ONLINE; - out: - atomic_set(&gdev->onoff, 0); - return ret; -} - -static int -ccwgroup_set_offline(struct ccwgroup_device *gdev) -{ - struct ccwgroup_driver *gdrv; - int ret; - - if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) - return -EAGAIN; - if (gdev->state == CCWGROUP_OFFLINE) { - ret = 0; - goto out; - } - if (!gdev->dev.driver) { - ret = -EINVAL; - goto out; - } - gdrv = to_ccwgroupdrv (gdev->dev.driver); - if ((ret = gdrv->set_offline ? gdrv->set_offline(gdev) : 0)) - goto out; - - gdev->state = CCWGROUP_OFFLINE; - out: - atomic_set(&gdev->onoff, 0); - return ret; -} - -static ssize_t -ccwgroup_online_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - unsigned long value; - int ret; - - if (!dev->driver) - return -ENODEV; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - - if (!try_module_get(gdrv->driver.owner)) - return -EINVAL; - - ret = strict_strtoul(buf, 0, &value); - if (ret) - goto out; - - if (value == 1) - ret = ccwgroup_set_online(gdev); - else if (value == 0) - ret = ccwgroup_set_offline(gdev); - else - ret = -EINVAL; -out: - module_put(gdrv->driver.owner); - return (ret == 0) ? count : ret; -} - -static ssize_t -ccwgroup_online_show (struct device *dev, struct device_attribute *attr, char *buf) -{ - int online; - - online = (to_ccwgroupdev(dev)->state == CCWGROUP_ONLINE); - - return sprintf(buf, online ? "1\n" : "0\n"); -} - -static DEVICE_ATTR(online, 0644, ccwgroup_online_show, ccwgroup_online_store); - -static int -ccwgroup_probe (struct device *dev) -{ - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - - int ret; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - - if ((ret = device_create_file(dev, &dev_attr_online))) - return ret; - - ret = gdrv->probe ? gdrv->probe(gdev) : -ENODEV; - if (ret) - device_remove_file(dev, &dev_attr_online); + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); - return ret; + return gdrv->probe ? gdrv->probe(gdev) : -ENODEV; } -static int -ccwgroup_remove (struct device *dev) +static int ccwgroup_remove(struct device *dev) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - - device_remove_file(dev, &dev_attr_online); - device_remove_file(dev, &dev_attr_ungroup); + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); if (!dev->driver) return 0; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - if (gdrv->remove) gdrv->remove(gdev); @@ -502,15 +463,11 @@ ccwgroup_remove (struct device *dev) static void ccwgroup_shutdown(struct device *dev) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); if (!dev->driver) return; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - if (gdrv->shutdown) gdrv->shutdown(gdev); } @@ -586,26 +543,12 @@ static const struct dev_pm_ops ccwgroup_pm_ops = { static struct bus_type ccwgroup_bus_type = { .name = "ccwgroup", .match = ccwgroup_bus_match, - .uevent = ccwgroup_uevent, .probe = ccwgroup_probe, .remove = ccwgroup_remove, .shutdown = ccwgroup_shutdown, .pm = &ccwgroup_pm_ops, }; - -static int ccwgroup_notifier(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct device *dev = data; - - if (action == BUS_NOTIFY_UNBIND_DRIVER) - device_schedule_callback(dev, ccwgroup_ungroup_callback); - - return NOTIFY_OK; -} - - /** * ccwgroup_driver_register() - register a ccw group driver * @cdriver: driver to be registered @@ -619,9 +562,9 @@ int ccwgroup_driver_register(struct ccwgroup_driver *cdriver) return driver_register(&cdriver->driver); } +EXPORT_SYMBOL(ccwgroup_driver_register); -static int -__ccwgroup_match_all(struct device *dev, void *data) +static int __ccwgroup_match_all(struct device *dev, void *data) { return 1; } @@ -652,6 +595,7 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) put_driver(&cdriver->driver); driver_unregister(&cdriver->driver); } +EXPORT_SYMBOL(ccwgroup_driver_unregister); /** * ccwgroup_probe_ccwdev() - probe function for slave devices @@ -666,6 +610,7 @@ int ccwgroup_probe_ccwdev(struct ccw_device *cdev) { return 0; } +EXPORT_SYMBOL(ccwgroup_probe_ccwdev); /** * ccwgroup_remove_ccwdev() - remove function for slave devices @@ -702,9 +647,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) /* Release ccwgroup device reference for local processing. */ put_device(&gdev->dev); } - -MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(ccwgroup_driver_register); -EXPORT_SYMBOL(ccwgroup_driver_unregister); -EXPORT_SYMBOL(ccwgroup_probe_ccwdev); EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_LICENSE("GPL"); diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index d15f8b4..5156264 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -1,10 +1,13 @@ /* * Handling of internal CCW device requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2011 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ +#define KMSG_COMPONENT "cio" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/types.h> #include <linux/err.h> #include <asm/ccwdev.h> @@ -323,7 +326,21 @@ void ccw_request_timeout(struct ccw_device *cdev) { struct subchannel *sch = to_subchannel(cdev->dev.parent); struct ccw_request *req = &cdev->private->req; - int rc; + int rc = -ENODEV, chp; + + if (cio_update_schib(sch)) + goto err; + + for (chp = 0; chp < 8; chp++) { + if ((0x80 >> chp) & sch->schib.pmcw.lpum) + pr_warning("%s: No interrupt was received within %lus " + "(CS=%02x, DS=%02x, CHPID=%x.%02x)\n", + dev_name(&cdev->dev), req->timeout / HZ, + scsw_cstat(&sch->schib.scsw), + scsw_dstat(&sch->schib.scsw), + sch->schid.cssid, + sch->schib.pmcw.chpid[chp]); + } if (!ccwreq_next_path(cdev)) { /* set the final return code for this request */ @@ -342,7 +359,7 @@ err: * ccw_request_notoper - notoper handler for I/O request procedure * @cdev: ccw device * - * Handle timeout during I/O request procedure. + * Handle notoper during I/O request procedure. */ void ccw_request_notoper(struct ccw_device *cdev) { diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index e950f1a..0c87b0f 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -1,7 +1,7 @@ /* * Driver for s390 chsc subchannels * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2011 * * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> * @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/uaccess.h> #include <linux/miscdevice.h> +#include <linux/kernel_stat.h> #include <asm/compat.h> #include <asm/cio.h> @@ -56,6 +57,8 @@ static void chsc_subchannel_irq(struct subchannel *sch) CHSC_LOG(4, "irb"); CHSC_LOG_HEX(4, irb, sizeof(*irb)); + kstat_cpu(smp_processor_id()).irqs[IOINT_CSC]++; + /* Copy irb to provided request and set done. */ if (!request) { CHSC_MSG(0, "Interrupt on sch 0.%x.%04x with no request\n", diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index eb3140e..dc67c39 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -622,6 +622,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs) sch = (struct subchannel *)(unsigned long)tpi_info->intparm; if (!sch) { /* Clear pending interrupt condition. */ + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; tsch(tpi_info->schid, irb); continue; } @@ -634,7 +635,10 @@ void __irq_entry do_IRQ(struct pt_regs *regs) /* Call interrupt handler if there is one. */ if (sch->driver && sch->driver->irq) sch->driver->irq(sch); - } + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; + } else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; spin_unlock(sch->lock); /* * Are more interrupts pending? @@ -667,18 +671,23 @@ static int cio_tpi(void) tpi_info = (struct tpi_info *)&S390_lowcore.subchannel_id; if (tpi(NULL) != 1) return 0; + kstat_cpu(smp_processor_id()).irqs[IO_INTERRUPT]++; if (tpi_info->adapter_IO) { do_adapter_IO(tpi_info->isc); return 1; } irb = (struct irb *)&S390_lowcore.irb; /* Store interrupt response block to lowcore. */ - if (tsch(tpi_info->schid, irb) != 0) + if (tsch(tpi_info->schid, irb) != 0) { /* Not status pending or not operational. */ + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; return 1; + } sch = (struct subchannel *)(unsigned long)tpi_info->intparm; - if (!sch) + if (!sch) { + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; return 1; + } irq_context = in_interrupt(); if (!irq_context) local_bh_disable(); @@ -687,6 +696,8 @@ static int cio_tpi(void) memcpy(&sch->schib.scsw, &irb->scsw, sizeof(union scsw)); if (sch->driver && sch->driver->irq) sch->driver->irq(sch); + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; spin_unlock(sch->lock); irq_exit(); if (!irq_context) @@ -1058,7 +1069,7 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) { struct subchannel_id schid; - s390_reset_system(); + s390_reset_system(NULL, NULL); if (reipl_find_schid(devid, &schid) != 0) panic("IPL Device not found\n"); do_reipl_asm(*((__u32*)&schid)); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 80ebddd..33bb4d8 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -133,6 +133,8 @@ struct channel_subsystem { extern struct channel_subsystem *channel_subsystems[]; +void channel_subsystem_reinit(void); + /* Helper functions to build lists for the slow path. */ void css_schedule_eval(struct subchannel_id schid); void css_schedule_eval_all(void); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 8e04c00..d734f4a 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -21,6 +21,7 @@ #include <linux/device.h> #include <linux/workqueue.h> #include <linux/timer.h> +#include <linux/kernel_stat.h> #include <asm/ccwdev.h> #include <asm/cio.h> @@ -747,6 +748,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, struct ccw_device *cdev) { cdev->private->cdev = cdev; + cdev->private->int_class = IOINT_CIO; atomic_set(&cdev->private->onoff, 0); cdev->dev.parent = &sch->dev; cdev->dev.release = ccw_device_release; @@ -1010,6 +1012,8 @@ static void io_subchannel_irq(struct subchannel *sch) CIO_TRACE_EVENT(6, dev_name(&sch->dev)); if (cdev) dev_fsm_event(cdev, DEV_EVENT_INTERRUPT); + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; } void io_subchannel_init_config(struct subchannel *sch) @@ -1621,6 +1625,7 @@ ccw_device_probe_console(void) memset(&console_private, 0, sizeof(struct ccw_device_private)); console_cdev.private = &console_private; console_private.cdev = &console_cdev; + console_private.int_class = IOINT_CIO; ret = ccw_device_console_enable(&console_cdev, sch); if (ret) { cio_release_console(); @@ -1702,11 +1707,18 @@ ccw_device_probe (struct device *dev) int ret; cdev->drv = cdrv; /* to let the driver call _set_online */ + /* Note: we interpret class 0 in this context as an uninitialized + * field since it translates to a non-I/O interrupt class. */ + if (cdrv->int_class != 0) + cdev->private->int_class = cdrv->int_class; + else + cdev->private->int_class = IOINT_CIO; ret = cdrv->probe ? cdrv->probe(cdev) : -ENODEV; if (ret) { cdev->drv = NULL; + cdev->private->int_class = IOINT_CIO; return ret; } @@ -1740,6 +1752,7 @@ ccw_device_remove (struct device *dev) } ccw_device_set_timeout(cdev, 0); cdev->drv = NULL; + cdev->private->int_class = IOINT_CIO; return 0; } diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 0b7245c..179824b 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -5,6 +5,7 @@ #include <linux/atomic.h> #include <linux/wait.h> #include <linux/notifier.h> +#include <linux/kernel_stat.h> #include "io_sch.h" /* @@ -56,7 +57,17 @@ extern fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS]; static inline void dev_fsm_event(struct ccw_device *cdev, enum dev_event dev_event) { - dev_jumptable[cdev->private->state][dev_event](cdev, dev_event); + int state = cdev->private->state; + + if (dev_event == DEV_EVENT_INTERRUPT) { + if (state == DEV_STATE_ONLINE) + kstat_cpu(smp_processor_id()). + irqs[cdev->private->int_class]++; + else if (state != DEV_STATE_CMFCHANGE && + state != DEV_STATE_CMFUPDATE) + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; + } + dev_jumptable[state][dev_event](cdev, dev_event); } /* diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index ba31ad8..2ebb492 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <asm/schid.h> #include <asm/ccwdev.h> +#include <asm/irq.h> #include "css.h" #include "orb.h" @@ -157,6 +158,7 @@ struct ccw_device_private { struct list_head cmb_list; /* list of measured devices */ u64 cmb_start_time; /* clock value of cmb reset */ void *cmb_wait; /* deferred cmb enable/disable */ + enum interruption_class int_class; }; static inline int rsch(struct subchannel_id schid) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 3dd8644..b962ffb 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -18,14 +18,6 @@ #define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */ #define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ -/* - * if an asynchronous HiperSockets queue runs full, the 10 seconds timer wait - * till next initiative to give transmitted skbs back to the stack is too long. - * Therefore polling is started in case of multicast queue is filled more - * than 50 percent. - */ -#define QDIO_IQDIO_POLL_LVL 65 /* HS multicast queue */ - enum qdio_irq_states { QDIO_IRQ_STATE_INACTIVE, QDIO_IRQ_STATE_ESTABLISHED, @@ -290,6 +282,9 @@ struct qdio_q { /* error condition during a data transfer */ unsigned int qdio_error; + /* last scan of the queue */ + u64 timestamp; + struct tasklet_struct tasklet; struct qdio_queue_perf_stat q_stats; @@ -423,31 +418,7 @@ static inline int multicast_outbound(struct qdio_q *q) #define queue_irqs_disabled(q) \ (test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state) != 0) -#define TIQDIO_SHARED_IND 63 - -/* device state change indicators */ -struct indicator_t { - u32 ind; /* u32 because of compare-and-swap performance */ - atomic_t count; /* use count, 0 or 1 for non-shared indicators */ -}; - -extern struct indicator_t *q_indicators; - -static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq) -{ - return irq->nr_input_qs > 1; -} - -static inline int references_shared_dsci(struct qdio_irq *irq) -{ - return irq->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; -} - -static inline int shared_ind(struct qdio_q *q) -{ - struct qdio_irq *i = q->irq_ptr; - return references_shared_dsci(i) || has_multiple_inq_on_dsci(i); -} +extern u64 last_ai_time; /* prototypes for thin interrupt */ void qdio_setup_thinint(struct qdio_irq *irq_ptr); @@ -460,7 +431,8 @@ int tiqdio_allocate_memory(void); void tiqdio_free_memory(void); int tiqdio_register_thinints(void); void tiqdio_unregister_thinints(void); - +void clear_nonshared_ind(struct qdio_irq *); +int test_nonshared_ind(struct qdio_irq *); /* prototypes for setup */ void qdio_inbound_processing(unsigned long data); diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index aaf7f93..ed68245 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -54,15 +54,17 @@ static int qstat_show(struct seq_file *m, void *v) if (!q) return 0; - seq_printf(m, "DSCI: %d nr_used: %d\n", - *(u32 *)q->irq_ptr->dsci, atomic_read(&q->nr_buf_used)); - seq_printf(m, "ftc: %d last_move: %d\n", + seq_printf(m, "Timestamp: %Lx Last AI: %Lx\n", + q->timestamp, last_ai_time); + seq_printf(m, "nr_used: %d ftc: %d last_move: %d\n", + atomic_read(&q->nr_buf_used), q->first_to_check, q->last_move); if (q->is_input_q) { seq_printf(m, "polling: %d ack start: %d ack count: %d\n", q->u.in.polling, q->u.in.ack_start, q->u.in.ack_count); - seq_printf(m, "IRQs disabled: %u\n", + seq_printf(m, "DSCI: %d IRQs disabled: %u\n", + *(u32 *)q->irq_ptr->dsci, test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state)); } diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 6547ff4..3ef8d07 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/gfp.h> #include <linux/io.h> -#include <linux/kernel_stat.h> #include <linux/atomic.h> #include <asm/debug.h> #include <asm/qdio.h> @@ -105,9 +104,12 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) /* all done or next buffer state different */ if (ccq == 0 || ccq == 32) return 0; - /* not all buffers processed */ - if (ccq == 96 || ccq == 97) + /* no buffer processed */ + if (ccq == 97) return 1; + /* not all buffers processed */ + if (ccq == 96) + return 2; /* notify devices immediately */ DBF_ERROR("%4x ccq:%3d", SCH_NO(q), ccq); return -EIO; @@ -127,10 +129,8 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, int start, int count, int auto_ack) { + int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0; unsigned int ccq = 0; - int tmp_count = count, tmp_start = start; - int nr = q->nr; - int rc; BUG_ON(!q->irq_ptr->sch_token); qperf_inc(q, eqbs); @@ -141,30 +141,34 @@ again: ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count, auto_ack); rc = qdio_check_ccq(q, ccq); - - /* At least one buffer was processed, return and extract the remaining - * buffers later. - */ - if ((ccq == 96) && (count != tmp_count)) { - qperf_inc(q, eqbs_partial); - return (count - tmp_count); - } + if (!rc) + return count - tmp_count; if (rc == 1) { DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS again:%2d", ccq); goto again; } - if (rc < 0) { - DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); - DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); - q->handler(q->irq_ptr->cdev, - QDIO_ERROR_ACTIVATE_CHECK_CONDITION, - q->nr, q->first_to_kick, count, - q->irq_ptr->int_parm); - return 0; + if (rc == 2) { + BUG_ON(tmp_count == count); + qperf_inc(q, eqbs_partial); + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x", + tmp_count); + /* + * Retry once, if that fails bail out and process the + * extracted buffers before trying again. + */ + if (!retried++) + goto again; + else + return count - tmp_count; } - return count - tmp_count; + + DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); + DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; } /** @@ -197,22 +201,22 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, again: ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); rc = qdio_check_ccq(q, ccq); - if (rc == 1) { + if (!rc) { + WARN_ON(tmp_count); + return count - tmp_count; + } + + if (rc == 1 || rc == 2) { DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "SQBS again:%2d", ccq); qperf_inc(q, sqbs_partial); goto again; } - if (rc < 0) { - DBF_ERROR("%4x SQBS ERROR", SCH_NO(q)); - DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); - q->handler(q->irq_ptr->cdev, - QDIO_ERROR_ACTIVATE_CHECK_CONDITION, - q->nr, q->first_to_kick, count, - q->irq_ptr->int_parm); - return 0; - } - WARN_ON(tmp_count); - return count - tmp_count; + + DBF_ERROR("%4x SQBS ERROR", SCH_NO(q)); + DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; } /* returns number of examined buffers and their common state in *state */ @@ -277,7 +281,7 @@ static inline int set_buf_state(struct qdio_q *q, int bufnr, } /* set slsb states to initial state */ -void qdio_init_buf_states(struct qdio_irq *irq_ptr) +static void qdio_init_buf_states(struct qdio_irq *irq_ptr) { struct qdio_q *q; int i; @@ -446,7 +450,7 @@ static void process_buffer_error(struct qdio_q *q, int count) qperf_inc(q, target_full); DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x", q->first_to_check); - return; + goto set; } DBF_ERROR("%4x BUF ERROR", SCH_NO(q)); @@ -456,6 +460,7 @@ static void process_buffer_error(struct qdio_q *q, int count) q->sbal[q->first_to_check]->element[14].sflags, q->sbal[q->first_to_check]->element[15].sflags); +set: /* * Interrupts may be avoided as long as the error is present * so change the buffer state immediately to avoid starvation. @@ -513,6 +518,8 @@ static int get_inbound_buffer_frontier(struct qdio_q *q) int count, stop; unsigned char state = 0; + q->timestamp = get_clock_fast(); + /* * Don't check 128 buffers, as otherwise qdio_inbound_q_moved * would return 0. @@ -782,6 +789,8 @@ static int get_outbound_buffer_frontier(struct qdio_q *q) int count, stop; unsigned char state = 0; + q->timestamp = get_clock_fast(); + if (need_siga_sync(q)) if (((queue_type(q) != QDIO_IQDIO_QFMT) && !pci_out_supported(q)) || @@ -912,21 +921,13 @@ static void __qdio_outbound_processing(struct qdio_q *q) if (!pci_out_supported(q) && !qdio_outbound_q_done(q)) goto sched; - /* bail out for HiperSockets unicast queues */ - if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q)) - return; - - if ((queue_type(q) == QDIO_IQDIO_QFMT) && - (atomic_read(&q->nr_buf_used)) > QDIO_IQDIO_POLL_LVL) - goto sched; - if (q->u.out.pci_out_enabled) return; /* * Now we know that queue type is either qeth without pci enabled - * or HiperSockets multicast. Make sure buffer switch from PRIMED to - * EMPTY is noticed and outbound_handler is called after some time. + * or HiperSockets. Make sure buffer switch from PRIMED to EMPTY + * is noticed and outbound_handler is called after some time. */ if (qdio_outbound_q_done(q)) del_timer(&q->u.out.timer); @@ -1128,7 +1129,6 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } - kstat_cpu(smp_processor_id()).irqs[IOINT_QDI]++; if (irq_ptr->perf_stat_enabled) irq_ptr->perf_stat.qdio_int++; @@ -1719,9 +1719,7 @@ int qdio_start_irq(struct ccw_device *cdev, int nr) WARN_ON(queue_irqs_enabled(q)); - if (!shared_ind(q)) - xchg(q->irq_ptr->dsci, 0); - + clear_nonshared_ind(irq_ptr); qdio_stop_polling(q); clear_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state); @@ -1729,7 +1727,7 @@ int qdio_start_irq(struct ccw_device *cdev, int nr) * We need to check again to not lose initiative after * resetting the ACK state. */ - if (!shared_ind(q) && *q->irq_ptr->dsci) + if (test_nonshared_ind(irq_ptr)) goto rescan; if (!qdio_inbound_q_done(q)) goto rescan; diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index a3e3949..011eade 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -26,17 +26,24 @@ */ #define TIQDIO_NR_NONSHARED_IND 63 #define TIQDIO_NR_INDICATORS (TIQDIO_NR_NONSHARED_IND + 1) +#define TIQDIO_SHARED_IND 63 + +/* device state change indicators */ +struct indicator_t { + u32 ind; /* u32 because of compare-and-swap performance */ + atomic_t count; /* use count, 0 or 1 for non-shared indicators */ +}; /* list of thin interrupt input queues */ static LIST_HEAD(tiq_list); -DEFINE_MUTEX(tiq_list_lock); +static DEFINE_MUTEX(tiq_list_lock); /* adapter local summary indicator */ static u8 *tiqdio_alsi; -struct indicator_t *q_indicators; +static struct indicator_t *q_indicators; -static u64 last_ai_time; +u64 last_ai_time; /* returns addr for the device state change indicator */ static u32 *get_indicator(void) @@ -90,6 +97,43 @@ void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) synchronize_rcu(); } +static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr) +{ + return irq_ptr->nr_input_qs > 1; +} + +static inline int references_shared_dsci(struct qdio_irq *irq_ptr) +{ + return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static inline int shared_ind(struct qdio_irq *irq_ptr) +{ + return references_shared_dsci(irq_ptr) || + has_multiple_inq_on_dsci(irq_ptr); +} + +void clear_nonshared_ind(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + if (shared_ind(irq_ptr)) + return; + xchg(irq_ptr->dsci, 0); +} + +int test_nonshared_ind(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return 0; + if (shared_ind(irq_ptr)) + return 0; + if (*irq_ptr->dsci) + return 1; + else + return 0; +} + static inline u32 clear_shared_ind(void) { if (!atomic_read(&q_indicators[TIQDIO_SHARED_IND].count)) @@ -119,7 +163,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) q->u.in.queue_start_poll(q->irq_ptr->cdev, q->nr, q->irq_ptr->int_parm); } else { - if (!shared_ind(q)) + if (!shared_ind(q->irq_ptr)) xchg(q->irq_ptr->dsci, 0); /* diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index aec60d5..3c2c923 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -33,7 +33,7 @@ * The pointer to our (page) of device descriptions. */ static void *kvm_devices; -struct work_struct hotplug_work; +static struct work_struct hotplug_work; struct kvm_device { struct virtio_device vdev; @@ -334,10 +334,10 @@ static void scan_devices(void) */ static int match_desc(struct device *dev, void *data) { - if ((ulong)to_kvmdev(dev_to_virtio(dev))->desc == (ulong)data) - return 1; + struct virtio_device *vdev = dev_to_virtio(dev); + struct kvm_device *kdev = to_kvmdev(vdev); - return 0; + return kdev->desc == data; } /* diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index f1fa248..b41fae3 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -63,7 +63,6 @@ #define KMSG_COMPONENT "claw" -#include <linux/kernel_stat.h> #include <asm/ccwdev.h> #include <asm/ccwgroup.h> #include <asm/debug.h> @@ -291,6 +290,7 @@ static struct ccw_driver claw_ccw_driver = { .ids = claw_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_CLW, }; static ssize_t @@ -645,7 +645,6 @@ claw_irq_handler(struct ccw_device *cdev, struct claw_env *p_env; struct chbk *p_ch_r=NULL; - kstat_cpu(smp_processor_id()).irqs[IOINT_CLW]++; CLAW_DBF_TEXT(4, trace, "clawirq"); /* Bypass all 'unsolicited interrupts' */ privptr = dev_get_drvdata(&cdev->dev); diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 426787e..5cb93a8 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -24,7 +24,6 @@ #define KMSG_COMPONENT "ctcm" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> @@ -1203,7 +1202,6 @@ static void ctcm_irq_handler(struct ccw_device *cdev, int cstat; int dstat; - kstat_cpu(smp_processor_id()).irqs[IOINT_CTC]++; CTCM_DBF_TEXT_(TRACE, CTC_DBF_DEBUG, "Enter %s(%s)", CTCM_FUNTAIL, dev_name(&cdev->dev)); @@ -1769,6 +1767,7 @@ static struct ccw_driver ctcm_ccw_driver = { .ids = ctcm_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_CTC, }; static struct ccwgroup_driver ctcm_group_driver = { diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c index 8305319..650aec1 100644 --- a/drivers/s390/net/ctcm_sysfs.c +++ b/drivers/s390/net/ctcm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t ctcm_proto_store(struct device *dev, return count; } -const char *ctcm_type[] = { +static const char *ctcm_type[] = { "not a channel", "CTC/A", "FICON channel", diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index fb246b94..c28713d 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -26,7 +26,6 @@ #define KMSG_COMPONENT "lcs" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel_stat.h> #include <linux/module.h> #include <linux/if.h> #include <linux/netdevice.h> @@ -1399,7 +1398,6 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) int rc, index; int cstat, dstat; - kstat_cpu(smp_processor_id()).irqs[IOINT_LCS]++; if (lcs_check_irb_error(cdev, irb)) return; @@ -1972,7 +1970,7 @@ lcs_portno_store (struct device *dev, struct device_attribute *attr, const char static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store); -const char *lcs_type[] = { +static const char *lcs_type[] = { "not a channel", "2216 parallel", "2216 channel", @@ -2399,6 +2397,7 @@ static struct ccw_driver lcs_ccw_driver = { .ids = lcs_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_LCS, }; /** diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ce73520..e4c1176 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1415,7 +1415,7 @@ static int qeth_l3_send_checksum_command(struct qeth_card *card) return 0; } -int qeth_l3_set_rx_csum(struct qeth_card *card, int on) +static int qeth_l3_set_rx_csum(struct qeth_card *card, int on) { int rc = 0; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 7405407..5c4abce 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -10,6 +10,7 @@ #define ELFCORE_ADDR_ERR (-2ULL) extern unsigned long long elfcorehdr_addr; +extern unsigned long long elfcorehdr_size; extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); diff --git a/include/linux/elf.h b/include/linux/elf.h index 110821c..31f0508 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -395,6 +395,7 @@ typedef struct elf64_shdr { #define NT_S390_CTRS 0x304 /* s390 control registers */ #define NT_S390_PREFIX 0x305 /* s390 prefix register */ #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a3..fe45136 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -33,6 +33,14 @@ #error KEXEC_ARCH not defined #endif +#ifndef KEXEC_CRASH_CONTROL_MEMORY_LIMIT +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT KEXEC_CONTROL_MEMORY_LIMIT +#endif + +#ifndef KEXEC_CRASH_MEM_ALIGN +#define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE +#endif + #define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define KEXEC_CORE_NOTE_NAME "CORE" #define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) @@ -129,6 +137,8 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); +void crash_map_reserved_pages(void); +void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); void vmcoreinfo_append_str(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 5f85690..69ebf33 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -20,8 +20,15 @@ unsigned long saved_max_pfn; unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; /* + * stores the size of elf header of crash image + */ +unsigned long long elfcorehdr_size; + +/* * elfcorehdr= specifies the location of elf core header stored by the crashed * kernel. This option will be passed by kexec loader to the capture kernel. + * + * Syntax: elfcorehdr=[size[KMG]@]offset[KMG] */ static int __init setup_elfcorehdr(char *arg) { @@ -29,6 +36,10 @@ static int __init setup_elfcorehdr(char *arg) if (!arg) return -EINVAL; elfcorehdr_addr = memparse(arg, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } return end > arg ? 0 : -EINVAL; } early_param("elfcorehdr", setup_elfcorehdr); diff --git a/kernel/kexec.c b/kernel/kexec.c index 296fbc8..dc7bc08 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -498,7 +498,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, while (hole_end <= crashk_res.end) { unsigned long i; - if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) + if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) break; if (hole_end > crashk_res.end) break; @@ -999,6 +999,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); + crash_map_reserved_pages(); } if (result) goto out; @@ -1015,6 +1016,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, goto out; } kimage_terminate(image); + if (flags & KEXEC_ON_CRASH) + crash_unmap_reserved_pages(); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); @@ -1026,6 +1029,18 @@ out: return result; } +/* + * Add and remove page tables for crashkernel memory + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +void __weak crash_map_reserved_pages(void) +{} + +void __weak crash_unmap_reserved_pages(void) +{} + #ifdef CONFIG_COMPAT asmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long nr_segments, @@ -1134,14 +1149,16 @@ int crash_shrink_memory(unsigned long new_size) goto unlock; } - start = roundup(start, PAGE_SIZE); - end = roundup(start + new_size, PAGE_SIZE); + start = roundup(start, KEXEC_CRASH_MEM_ALIGN); + end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); + crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) release_resource(&crashk_res); crashk_res.end = end - 1; + crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); @@ -1380,24 +1397,23 @@ int __init parse_crashkernel(char *cmdline, } - -void crash_save_vmcoreinfo(void) +static void update_vmcoreinfo_note(void) { - u32 *buf; + u32 *buf = vmcoreinfo_note; if (!vmcoreinfo_size) return; - - vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); - - buf = (u32 *)vmcoreinfo_note; - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, vmcoreinfo_size); - final_note(buf); } +void crash_save_vmcoreinfo(void) +{ + vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); + update_vmcoreinfo_note(); +} + void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; @@ -1483,6 +1499,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_swapcache); arch_crash_save_vmcoreinfo(); + update_vmcoreinfo_note(); return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2d2ecdc..2fe2bc2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -151,14 +151,6 @@ extern int pwrsw_enabled; extern int unaligned_enabled; #endif -#ifdef CONFIG_S390 -#ifdef CONFIG_MATHEMU -extern int sysctl_ieee_emulation_warnings; -#endif -extern int sysctl_userprocess_debug; -extern int spin_retry; -#endif - #ifdef CONFIG_IA64 extern int no_unaligned_warning; extern int unaligned_dump_stack; |