diff options
Diffstat (limited to 'arch/s390')
36 files changed, 465 insertions, 433 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bb63499..8ca60f8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT @@ -116,7 +117,6 @@ config S390 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP @@ -137,7 +137,6 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING - select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA select NO_BOOTMEM select OLD_SIGACTION diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index fd09a10..3ca1894 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -366,7 +366,6 @@ CONFIG_VIRTIO_BLK=y CONFIG_ENCLOSURE_SERVICES=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y -CONFIG_SCSI_TGT=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m @@ -380,7 +379,6 @@ CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y CONFIG_ISCSI_TCP=m CONFIG_LIBFCOE=m CONFIG_SCSI_DEBUG=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index b061180..4830aa6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -363,7 +363,6 @@ CONFIG_VIRTIO_BLK=y CONFIG_ENCLOSURE_SERVICES=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y -CONFIG_SCSI_TGT=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m @@ -377,7 +376,6 @@ CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y CONFIG_ISCSI_TCP=m CONFIG_LIBFCOE=m CONFIG_SCSI_DEBUG=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index d279baa..61db449 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -361,7 +361,6 @@ CONFIG_VIRTIO_BLK=y CONFIG_ENCLOSURE_SERVICES=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y -CONFIG_SCSI_TGT=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m @@ -375,7 +374,6 @@ CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y CONFIG_ISCSI_TCP=m CONFIG_LIBFCOE=m CONFIG_SCSI_DEBUG=m diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 4181d7b..773bef7 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -305,7 +305,6 @@ struct kvm_s390_local_interrupt { struct list_head list; atomic_t active; struct kvm_s390_float_interrupt *float_int; - int timer_due; /* event indicator for waitqueue below */ wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; @@ -367,7 +366,6 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; - struct tasklet_struct tasklet; struct kvm_s390_pgm_info pgm; union { struct cpuid cpu_id; @@ -418,6 +416,7 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; + int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; spinlock_t start_stop_lock; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fcba5e0..b76317c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -287,7 +287,14 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT + +#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */ +#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */ +#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */ +#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */ +#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */ +#define _SEGMENT_ENTRY_BITS_LARGE 0 +#define _SEGMENT_ENTRY_ORIGIN_LARGE 0 #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) @@ -350,7 +357,7 @@ extern unsigned long MODULES_END; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ @@ -359,30 +366,34 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY (0) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) -#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ -#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ -#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ -#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG +#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ +#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ +#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ +#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ +#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ +#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * ..R...I...y. - * prot-none, old ..0...1...1. - * prot-none, young ..1...1...1. - * read-only, old ..1...1...0. - * read-only, young ..1...0...1. - * read-write, old ..0...1...0. - * read-write, young ..0...0...1. + * dy..R...I...wr + * prot-none, clean, old 00..1...1...00 + * prot-none, clean, young 01..1...1...00 + * prot-none, dirty, old 10..1...1...00 + * prot-none, dirty, young 11..1...1...00 + * read-only, clean, old 00..1...1...01 + * read-only, clean, young 01..1...0...01 + * read-only, dirty, old 10..1...1...01 + * read-only, dirty, young 11..1...0...01 + * read-write, clean, old 00..1...1...11 + * read-write, clean, young 01..1...0...11 + * read-write, dirty, old 10..0...1...11 + * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) */ -#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ - -/* Set of bits not changed in pmd_modify */ -#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ - | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) +#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf000000000000000UL @@ -455,10 +466,11 @@ extern unsigned long MODULES_END; * Segment entry (large page) protection definitions. */ #define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ - _SEGMENT_ENTRY_NONE) -#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \ + _SEGMENT_ENTRY_READ) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -569,25 +581,23 @@ static inline int pmd_none(pmd_t pmd) static inline int pmd_large(pmd_t pmd) { -#ifdef CONFIG_64BIT return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; -#else - return 0; -#endif } -static inline int pmd_prot_none(pmd_t pmd) +static inline int pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && - (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } static inline int pmd_bad(pmd_t pmd) { -#ifdef CONFIG_64BIT if (pmd_large(pmd)) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; -#endif return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -607,20 +617,22 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - if (pmd_prot_none(pmd)) - return 0; - return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + int dirty = 1; + if (pmd_large(pmd)) + dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; + return dirty; } static inline int pmd_young(pmd_t pmd) { - int young = 0; -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) - young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; - else + int young = 1; + if (pmd_large(pmd)) young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; -#endif return young; } @@ -1391,7 +1403,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) @@ -1413,41 +1425,75 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) return pgprot_val(SEGMENT_WRITE); } -static inline pmd_t pmd_mkyoung(pmd_t pmd) +static inline pmd_t pmd_wrprotect(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + return pmd; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - } else { + } + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + if (pmd_large(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY; + if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } + return pmd; +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + if (pmd_large(pmd)) { pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { -#ifdef CONFIG_64BIT - if (pmd_prot_none(pmd)) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - } else { + if (pmd_large(pmd)) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; } -#endif return pmd; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - int young; - - young = pmd_young(pmd); - pmd_val(pmd) &= _SEGMENT_CHG_MASK; + if (pmd_large(pmd)) { + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | + _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + return pmd; + } + pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= massage_pgprot_pmd(newprot); - if (young) - pmd = pmd_mkyoung(pmd); return pmd; } @@ -1455,16 +1501,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return pmd_mkyoung(__pmd); + return __pmd; } -static inline pmd_t pmd_mkwrite(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; - return pmd; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ static inline void __pmdp_csp(pmd_t *pmdp) @@ -1555,34 +1594,21 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) && + (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT); } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) - pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } static inline pmd_t pmd_mkhuge(pmd_t pmd) { pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - return pmd; -} - -static inline pmd_t pmd_wrprotect(pmd_t pmd) -{ - /* Do not clobber PROT_NONE segments! */ - if (!pmd_prot_none(pmd)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; -} - -static inline pmd_t pmd_mkdirty(pmd_t pmd) -{ - /* No dirty bit in the segment table entry. */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1647,11 +1673,6 @@ static inline int has_transparent_hugepage(void) { return MACHINE_HAS_HPAGE ? 1 : 0; } - -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return pmd_val(pmd) >> PAGE_SHIFT; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6f02d45..e568fc8 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -217,7 +217,7 @@ static inline void cpu_relax(void) barrier(); } -#define arch_mutex_cpu_relax() barrier() +#define cpu_relax_lowlatency() barrier() static inline void psw_set_key(unsigned int key) { diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index d786c63..06f3034 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -415,6 +415,10 @@ struct qdio_brinfo_entry_l2 { #define QDIO_FLAG_SYNC_OUTPUT 0x02 #define QDIO_FLAG_PCI_OUT 0x10 +int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count); +void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count); + extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index df38c70..18ea9e3 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc) return 0; asm volatile( - "0: lfpc %1\n" - " la %0,0\n" + " lfpc %1\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index abad78d..5bc1259 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->gprs[2] = error ? -error : val; + regs->gprs[2] = error ? error : val; } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7366373..08fe6da 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -16,6 +16,7 @@ header-y += ioctls.h header-y += ipcbuf.h header-y += kvm.h header-y += kvm_para.h +header-y += kvm_perf.h header-y += kvm_virtio.h header-y += mman.h header-y += monwriter.h diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h new file mode 100644 index 0000000..3972827 --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -0,0 +1,25 @@ +/* + * Definitions for perf-kvm on s390 + * + * Copyright 2014 IBM Corp. + * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef __LINUX_KVM_PERF_S390_H +#define __LINUX_KVM_PERF_S390_H + +#include <asm/sie.h> + +#define DECODE_STR_LEN 40 + +#define VCPU_ID "id" + +#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter" +#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit" +#define KVM_EXIT_REASON "icptcode" + +#endif diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 5d9cc19..d4096fd 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -108,6 +108,7 @@ exit_code_ipa0(0xB2, 0x17, "STETR"), \ exit_code_ipa0(0xB2, 0x18, "PC"), \ exit_code_ipa0(0xB2, 0x20, "SERVC"), \ + exit_code_ipa0(0xB2, 0x21, "IPTE"), \ exit_code_ipa0(0xB2, 0x28, "PT"), \ exit_code_ipa0(0xB2, 0x29, "ISKE"), \ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7ba7d67..e88d35d 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -437,11 +437,11 @@ ENTRY(startup_kdump) #if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efea, 0xf46ce800, 0x00400000 + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efea, 0xf46c0000 + .long 2, 0xc100eff2, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efea, 0xf0680000 + .long 2, 0xc100eff2, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 99b0b09..8eb8244 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -30,6 +30,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); struct irq_class { + int irq; char *name; char *desc; }; @@ -45,9 +46,9 @@ struct irq_class { * up with having a sum which accounts each interrupt twice. */ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { - [EXT_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"}, - [THIN_INTERRUPT] = {.name = "AIO"}, + {.irq = EXT_INTERRUPT, .name = "EXT"}, + {.irq = IO_INTERRUPT, .name = "I/O"}, + {.irq = THIN_INTERRUPT, .name = "AIO"}, }; /* @@ -56,38 +57,38 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { * In addition this list contains non external / I/O events like NMIs. */ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { - [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, - [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, - [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, - [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, - [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, - [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, - [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, - [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, - [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, - [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, - [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, - [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, - [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, - [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, - [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, - [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, - [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, - [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, - [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, - [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, - [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, - [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, - [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, - [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, - [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, - [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, - [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, - [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, - [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, - [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, - [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, - [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, + {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"}, + {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"}, + {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"}, + {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"}, + {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"}, + {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"}, + {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"}, + {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"}, + {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"}, + {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, + {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"}, + {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, + {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, + {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, + {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, + {.irq = IRQIO_CLW, .name = "CLW", .desc = "[I/O] CLAW"}, + {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, + {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"}, + {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, + {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" }, + {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, + {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, + {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; void __init init_IRQ(void) @@ -116,33 +117,37 @@ void do_IRQ(struct pt_regs *regs, int irq) */ int show_interrupts(struct seq_file *p, void *v) { - int irq = *(loff_t *) v; - int cpu; + int index = *(loff_t *) v; + int cpu, irq; get_online_cpus(); - if (irq == 0) { + if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); goto out; } - if (irq < NR_IRQS) { - if (irq >= NR_IRQS_BASE) + if (index < NR_IRQS) { + if (index >= NR_IRQS_BASE) goto out; - seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + /* Adjust index to process irqclass_main_desc array entries */ + index--; + seq_printf(p, "%s: ", irqclass_main_desc[index].name); + irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); goto out; } - for (irq = 0; irq < NR_ARCH_IRQS; irq++) { - seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for (index = 0; index < NR_ARCH_IRQS; index++) { + seq_printf(p, "%s: ", irqclass_sub_desc[index].name); + irq = irqclass_sub_desc[index].irq; for_each_online_cpu(cpu) seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); - if (irqclass_sub_desc[irq].desc) - seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + if (irqclass_sub_desc[index].desc) + seq_printf(p, " %s", irqclass_sub_desc[index].desc); seq_putc(p, '\n'); } out: diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 08dcf21..433c6db 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -21,13 +21,9 @@ ENTRY(_mcount) ENTRY(ftrace_caller) #endif stm %r2,%r5,16(%r15) - bras %r1,2f + bras %r1,1f 0: .long ftrace_trace_function -1: .long function_trace_stop -2: l %r2,1b-0b(%r1) - icm %r2,0xf,0(%r2) - jnz 3f - st %r14,56(%r15) +1: st %r14,56(%r15) lr %r0,%r15 ahi %r15,-96 l %r3,100(%r15) @@ -50,7 +46,7 @@ ENTRY(ftrace_graph_caller) #endif ahi %r15,96 l %r14,56(%r15) -3: lm %r2,%r5,16(%r15) + lm %r2,%r5,16(%r15) br %r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 1c52eae..c67a8bf 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -20,9 +20,6 @@ ENTRY(_mcount) ENTRY(ftrace_caller) #endif - larl %r1,function_trace_stop - icm %r1,0xf,0(%r1) - bnzr %r14 stmg %r2,%r5,32(%r15) stg %r14,112(%r15) lgr %r1,%r15 diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ea75d01..d3194de 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: case PERF_TYPE_HW_CACHE: case PERF_TYPE_RAW: - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - if (is_sampling_event(event)) - return -ENOENT; err = __hw_perf_event_init(event); break; default: @@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void) goto out; } + /* The CPU measurement counter facility does not have overflow + * interrupts to do sampling. Sampling must be provided by + * external means, for example, by timers. + */ + cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2d716734..5dc7ad9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; - if ((data & ~mask) != PSW_USER_BITS) + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ return -EINVAL; if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ return -EINVAL; } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; @@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child, mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~mask) != PSW32_USER_BITS) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1e2264b..ae1d5be 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -501,6 +501,8 @@ static int kdump_mem_notifier(struct notifier_block *nb, { struct memory_notify *arg = data; + if (action != MEM_GOING_OFFLINE) + return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; if (arg->start_pfn > PFN_DOWN(crashk_res.end)) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 0931b11..4cef607 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,26 +214,26 @@ void update_vsyscall(struct timekeeper *tk) { u64 nsecps; - if (tk->clock != &clocksource_tod) + if (tk->tkr.clock != &clocksource_tod) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->clock->cycle_last; + vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; vdso_data->wtom_clock_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); - nsecps = (u64) NSEC_PER_SEC << tk->shift; + vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); + nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; while (vdso_data->wtom_clock_nsec >= nsecps) { vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } - vdso_data->tk_mult = tk->mult; - vdso_data->tk_shift = tk->shift; + vdso_data->tk_mult = tk->tkr.mult; + vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 0161675..59bd8f9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a0b586c..eaf4629 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -56,32 +56,26 @@ static int handle_noop(struct kvm_vcpu *vcpu) static int handle_stop(struct kvm_vcpu *vcpu) { int rc = 0; + unsigned int action_bits; vcpu->stat.exit_stop_request++; - spin_lock_bh(&vcpu->arch.local_int.lock); - trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); - if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { - kvm_s390_vcpu_stop(vcpu); - vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; - VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); - rc = -EOPNOTSUPP; - } + action_bits = vcpu->arch.local_int.action_bits; - if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; - /* store status must be called unlocked. Since local_int.lock - * only protects local_int.* and not guest memory we can give - * up the lock here */ - spin_unlock_bh(&vcpu->arch.local_int.lock); + if (!(action_bits & ACTION_STOP_ON_STOP)) + return 0; + + if (action_bits & ACTION_STORE_ON_STOP) { rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_NOADDR); - if (rc >= 0) - rc = -EOPNOTSUPP; - } else - spin_unlock_bh(&vcpu->arch.local_int.lock); - return rc; + if (rc) + return rc; + } + + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); + return -EOPNOTSUPP; } static int handle_validity(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 90c8de2..92528a0 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -158,6 +158,9 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) LCTL_CR10 | LCTL_CR11); vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -544,13 +547,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) int rc = 0; if (atomic_read(&li->active)) { - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry(inti, &li->list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); } if ((!rc) && atomic_read(&fi->active)) { @@ -585,88 +588,56 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { u64 now, sltime; - DECLARE_WAITQUEUE(wait, current); vcpu->stat.exit_wait_state++; - if (kvm_cpu_has_interrupt(vcpu)) - return 0; - __set_cpu_idle(vcpu); - spin_lock_bh(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 0; - spin_unlock_bh(&vcpu->arch.local_int.lock); + /* fast path */ + if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu)) + return 0; if (psw_interrupts_disabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); - __unset_cpu_idle(vcpu); return -EOPNOTSUPP; /* disabled wait */ } + __set_cpu_idle(vcpu); if (!ckc_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); goto no_timer; } now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; - if (vcpu->arch.sie_block->ckc < now) { - __unset_cpu_idle(vcpu); - return 0; - } - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->wq, &wait); - while (list_empty(&vcpu->arch.local_int.list) && - list_empty(&vcpu->arch.local_int.float_int->list) && - (!vcpu->arch.local_int.timer_due) && - !signal_pending(current) && - !kvm_s390_si_ext_call_pending(vcpu)) { - set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); - schedule(); - spin_lock(&vcpu->arch.local_int.float_int->lock); - spin_lock_bh(&vcpu->arch.local_int.lock); - } + kvm_vcpu_block(vcpu); __unset_cpu_idle(vcpu); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); - spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock(&vcpu->arch.local_int.float_int->lock); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } -void kvm_s390_tasklet(unsigned long parm) +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - - spin_lock(&vcpu->arch.local_int.lock); - vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq)) { + /* + * The vcpu gave up the cpu voluntarily, mark it as a good + * yield-candidate. + */ + vcpu->preempted = true; wake_up_interruptible(&vcpu->wq); - spin_unlock(&vcpu->arch.local_int.lock); + } } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - vcpu->preempted = true; - tasklet_schedule(&vcpu->arch.tasklet); + kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; } @@ -676,13 +647,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_interrupt_info *n, *inti = NULL; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { list_del(&inti->list); kfree(inti); } atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); /* clear pending external calls set by sigp interpretation facility */ atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); @@ -701,7 +672,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); @@ -712,7 +683,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -758,7 +729,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) if (atomic_read(&li->active)) { do { deliver = 0; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_for_each_entry_safe(inti, n, &li->list, list) { if ((inti->type == KVM_S390_MCHK) && __interrupt_is_deliverable(vcpu, inti)) { @@ -770,7 +741,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) } if (list_empty(&li->list)) atomic_set(&li->active, 0); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -817,11 +788,11 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -842,11 +813,11 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, inti->type = KVM_S390_PROGRAM_INT; memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); - spin_lock_bh(&li->lock); + spin_lock(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); BUG_ON(waitqueue_active(li->wq)); - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return 0; } @@ -934,12 +905,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - kvm_get_vcpu(kvm, sigcpu)->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); + kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -1081,7 +1050,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, mutex_lock(&vcpu->kvm->lock); li = &vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (inti->type == KVM_S390_PROGRAM_INT) list_add(&inti->list, &li->list); else @@ -1090,11 +1059,9 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); - vcpu->preempted = true; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); mutex_unlock(&vcpu->kvm->lock); + kvm_s390_vcpu_wakeup(vcpu); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14f..339b34a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -166,7 +166,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -595,7 +597,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); kvm_s390_clear_local_irqs(vcpu); } @@ -647,8 +650,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return rc; } hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, - (unsigned long) vcpu); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; @@ -926,7 +927,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) + if (!is_vcpu_stopped(vcpu)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -980,13 +981,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + /* CHECK_STOP and LOAD are not supported yet */ + return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + int rc = 0; + + /* user space knows about this interface - let it control the state */ + vcpu->kvm->arch.user_cpu_state_ctrl = 1; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_STOPPED: + kvm_s390_vcpu_stop(vcpu); + break; + case KVM_MP_STATE_OPERATING: + kvm_s390_vcpu_start(vcpu); + break; + case KVM_MP_STATE_LOAD: + case KVM_MP_STATE_CHECK_STOP: + /* fall through - CHECK_STOP and LOAD are not supported yet */ + default: + rc = -ENXIO; + } + + return rc; } bool kvm_s390_cmma_enabled(struct kvm *kvm) @@ -1045,6 +1067,9 @@ retry: goto retry; } + /* nothing to do, just clear the request */ + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + return 0; } @@ -1284,7 +1309,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - kvm_s390_vcpu_start(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { + kvm_s390_vcpu_start(vcpu); + } else if (is_vcpu_stopped(vcpu)) { + pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", + vcpu->vcpu_id); + return -EINVAL; + } switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: @@ -1413,11 +1444,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return kvm_s390_store_status_unloaded(vcpu, addr); } -static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) -{ - return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; -} - static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); @@ -1451,7 +1477,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); for (i = 0; i < online_vcpus; i++) { @@ -1477,7 +1503,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) * Let's play safe and flush the VCPU at startup. */ vcpu->arch.sie_block->ihcpu = 0xffff; - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } @@ -1491,10 +1517,18 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); /* Only one cpu at a time may enter/leave the STOPPED state. */ - spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + /* Need to lock access to action_bits to avoid a SIGP race condition */ + spin_lock(&vcpu->arch.local_int.lock); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + + /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ + vcpu->arch.local_int.action_bits &= + ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); + spin_unlock(&vcpu->arch.local_int.lock); + __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -1512,7 +1546,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) __enable_ibs_on_vcpu(started_vcpu); } - spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a8655ed..3862fa2 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -45,9 +45,9 @@ do { \ d_args); \ } while (0) -static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -129,9 +129,15 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +/* are cpu states controlled by user space */ +static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) +{ + return kvm->arch.user_cpu_state_ctrl != 0; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); -void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 43079a4..cf243ba 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -125,8 +125,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } -static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) +static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) { + struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; struct kvm_s390_interrupt_info *inti; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; @@ -135,7 +136,13 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) return -ENOMEM; inti->type = KVM_S390_SIGP_STOP; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) { + /* another SIGP STOP is pending */ + kfree(inti); + rc = SIGP_CC_BUSY; + goto out; + } if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); if ((action & ACTION_STORE_ON_STOP) != 0) @@ -144,19 +151,17 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) } list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + kvm_s390_vcpu_wakeup(dst_vcpu); out: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_local_interrupt *li; struct kvm_vcpu *dst_vcpu = NULL; int rc; @@ -166,9 +171,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - li = &dst_vcpu->arch.local_int; - rc = __inject_sigp_stop(li, action); + rc = __inject_sigp_stop(dst_vcpu, action); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); @@ -238,7 +242,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return SIGP_CC_BUSY; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; @@ -253,13 +257,12 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); + kvm_s390_vcpu_wakeup(dst_vcpu); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -275,9 +278,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; - spin_lock_bh(&dst_vcpu->arch.local_int.lock); + spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); - spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + spin_unlock(&dst_vcpu->arch.local_int.lock); if (!(flags & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -338,10 +341,10 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; - spin_lock_bh(&li->lock); + spin_lock(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - spin_unlock_bh(&li->lock); + spin_unlock(&li->lock); return rc; } @@ -461,12 +464,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); - spin_lock_bh(&dest_vcpu->arch.local_int.lock); - if (waitqueue_active(&dest_vcpu->wq)) - wake_up_interruptible(&dest_vcpu->wq); - dest_vcpu->preempted = true; - spin_unlock_bh(&dest_vcpu->arch.local_int.lock); - + kvm_s390_vcpu_wakeup(dest_vcpu); kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); return 0; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 0ff66a7..389bc17 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -10,42 +10,33 @@ static inline pmd_t __pte_to_pmd(pte_t pte) { - int none, young, prot; pmd_t pmd; /* - * Convert encoding pte bits pmd bits - * .IR...wrdytp ..R...I...y. - * empty .10...000000 -> ..0...1...0. - * prot-none, clean, old .11...000001 -> ..0...1...1. - * prot-none, clean, young .11...000101 -> ..1...1...1. - * prot-none, dirty, old .10...001001 -> ..0...1...1. - * prot-none, dirty, young .10...001101 -> ..1...1...1. - * read-only, clean, old .11...010001 -> ..1...1...0. - * read-only, clean, young .01...010101 -> ..1...0...1. - * read-only, dirty, old .11...011001 -> ..1...1...0. - * read-only, dirty, young .01...011101 -> ..1...0...1. - * read-write, clean, old .11...110001 -> ..0...1...0. - * read-write, clean, young .01...110101 -> ..0...0...1. - * read-write, dirty, old .10...111001 -> ..0...1...0. - * read-write, dirty, young .00...111101 -> ..0...0...1. - * Huge ptes are dirty by definition, a clean pte is made dirty - * by the conversion. + * Convert encoding pte bits pmd bits + * .IR...wrdytp dy..R...I...wr + * empty .10...000000 -> 00..0...1...00 + * prot-none, clean, old .11...000001 -> 00..1...1...00 + * prot-none, clean, young .11...000101 -> 01..1...1...00 + * prot-none, dirty, old .10...001001 -> 10..1...1...00 + * prot-none, dirty, young .10...001101 -> 11..1...1...00 + * read-only, clean, old .11...010001 -> 00..1...1...01 + * read-only, clean, young .01...010101 -> 01..1...0...01 + * read-only, dirty, old .11...011001 -> 10..1...1...01 + * read-only, dirty, young .01...011101 -> 11..1...0...01 + * read-write, clean, old .11...110001 -> 00..0...1...11 + * read-write, clean, young .01...110101 -> 01..0...0...11 + * read-write, dirty, old .10...111001 -> 10..0...1...11 + * read-write, dirty, young .00...111101 -> 11..0...0...11 */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; - if (pte_val(pte) & _PAGE_INVALID) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - none = (pte_val(pte) & _PAGE_PRESENT) && - !(pte_val(pte) & _PAGE_READ) && - !(pte_val(pte) & _PAGE_WRITE); - prot = (pte_val(pte) & _PAGE_PROTECT) && - !(pte_val(pte) & _PAGE_WRITE); - young = pte_val(pte) & _PAGE_YOUNG; - if (none || young) - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - if (prot || (none && young)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; return pmd; @@ -56,34 +47,31 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_t pte; /* - * Convert encoding pmd bits pte bits - * ..R...I...y. .IR...wrdytp - * empty ..0...1...0. -> .10...000000 - * prot-none, old ..0...1...1. -> .10...001001 - * prot-none, young ..1...1...1. -> .10...001101 - * read-only, old ..1...1...0. -> .11...011001 - * read-only, young ..1...0...1. -> .01...011101 - * read-write, old ..0...1...0. -> .10...111001 - * read-write, young ..0...0...1. -> .00...111101 - * Huge ptes are dirty by definition + * Convert encoding pmd bits pte bits + * dy..R...I...wr .IR...wrdytp + * empty 00..0...1...00 -> .10...001100 + * prot-none, clean, old 00..0...1...00 -> .10...000001 + * prot-none, clean, young 01..0...1...00 -> .10...000101 + * prot-none, dirty, old 10..0...1...00 -> .10...001001 + * prot-none, dirty, young 11..0...1...00 -> .10...001101 + * read-only, clean, old 00..1...1...01 -> .11...010001 + * read-only, clean, young 01..1...1...01 -> .11...010101 + * read-only, dirty, old 10..1...1...01 -> .11...011001 + * read-only, dirty, young 11..1...1...01 -> .11...011101 + * read-write, clean, old 00..0...1...11 -> .10...110001 + * read-write, clean, young 01..0...1...11 -> .10...110101 + * read-write, dirty, old 10..0...1...11 -> .10...111001 + * read-write, dirty, young 11..0...1...11 -> .10...111101 */ if (pmd_present(pmd)) { - pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | - (pmd_val(pmd) & PAGE_MASK); - if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) - pte_val(pte) |= _PAGE_INVALID; - if (pmd_prot_none(pmd)) { - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_YOUNG; - } else { - pte_val(pte) |= _PAGE_READ; - if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_WRITE; - if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) - pte_val(pte) |= _PAGE_YOUNG; - } + pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; + pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); + pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -96,6 +84,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { + /* Emulated huge ptes loose the dirty and young bit */ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else @@ -113,6 +102,8 @@ pte_t huge_ptep_get(pte_t *ptep) origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= *(unsigned long *) origin; + /* Emulated huge ptes are young and dirty by definition */ + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; } return __pmd_to_pte(pmd); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37b8241..19daa53 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1279,6 +1279,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, { unsigned long next, *table, *new; struct page *page; + spinlock_t *ptl; pmd_t *pmd; pmd = pmd_offset(pud, addr); @@ -1296,7 +1297,7 @@ again: if (!new) return -ENOMEM; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (likely((unsigned long *) pmd_deref(*pmd) == table)) { /* Nuke pmd entry pointing to the "short" page table */ pmdp_flush_lazy(mm, addr, pmd); @@ -1310,7 +1311,7 @@ again: page_table_free_rcu(tlb, table); new = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (new) { page_table_free_pgste(new); goto again; @@ -1432,6 +1433,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, { VM_BUG_ON(address & ~HPAGE_PMD_MASK); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); if (pmd_same(*pmdp, entry)) return 0; pmdp_invalidate(vma, address, pmdp); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a2cbd87..61e45b7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, return header; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; unsigned long size, prg_len, lit_len; @@ -875,7 +875,7 @@ out: kfree(addrs); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9ddc51e..2fa7b14 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -15,8 +15,8 @@ * Thomas Klein */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/slab.h> @@ -48,13 +48,10 @@ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static void zpci_enable_irq(struct irq_data *data); -static void zpci_disable_irq(struct irq_data *data); - static struct irq_chip zpci_irq_chip = { .name = "zPCI", - .irq_unmask = zpci_enable_irq, - .irq_mask = zpci_disable_irq, + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, }; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); @@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - int offset, pos; - u32 mask_bits; - - if (msi->msi_attrib.is_msix) { - offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else if (msi->msi_attrib.maskbit) { - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else - return 0; - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -static void zpci_enable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 0); -} - -static void zpci_disable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 1); -} - void pcibios_fixup_bus(struct pci_bus *bus) { } @@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ list_for_each_entry(msi, &pdev->msi_list, list) { - zpci_msi_set_mask_bits(msi, 1, 1); + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); irq_set_msi_desc(msi->irq, NULL); irq_free_desc(msi->irq); msi->msg.address_lo = 0; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 96545d7..6e22a24 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -5,8 +5,8 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/slab.h> diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c5c6684..eec598c 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -5,8 +5,8 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/seq_file.h> diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f91c031..4cbb29a 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -16,6 +16,13 @@ static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; +static int s390_iommu_strict; + +static int zpci_refresh_global(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, + zdev->iommu_pages * PAGE_SIZE); +} static unsigned long *dma_alloc_cpu_table(void) { @@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, } /* - * rpcit is not required to establish new translations when previously - * invalid translation-table entries are validated, however it is - * required when altering previously valid entries. + * With zdev->tlb_refresh == 0, rpcit is not required to establish new + * translations when previously invalid translation-table entries are + * validated. With lazy unmap, it also is skipped for previously valid + * entries, but a global rpcit is then required before any address can + * be re-used, i.e. after each iommu bitmap wrap-around. */ if (!zdev->tlb_refresh && - ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - /* - * TODO: also need to check that the old entry is indeed INVALID - * and not only for one page but for the whole range... - * -> now we WARN_ON in that case but with lazy unmap that - * needs to be redone! - */ + (!s390_iommu_strict || + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) goto no_refresh; rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, @@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) { unsigned long offset, flags; + int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); - if (offset == -1) + if (offset == -1) { + /* wrap-around */ offset = __dma_alloc_iommu(zdev, 0, size); + wrap = 1; + } if (offset != -1) { zdev->next_bit = offset + size; - if (zdev->next_bit >= zdev->iommu_pages) - zdev->next_bit = 0; + if (!zdev->tlb_refresh && !s390_iommu_strict && wrap) + /* global flush after wrap-around with lazy unmap */ + zpci_refresh_global(zdev); } spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); return offset; @@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size if (!zdev->iommu_bitmap) goto out; bitmap_clear(zdev->iommu_bitmap, offset, size); - if (offset >= zdev->next_bit) + /* + * Lazy flush for unmap: need to move next_bit to avoid address re-use + * until wrap-around. + */ + if (!s390_iommu_strict && offset >= zdev->next_bit) zdev->next_bit = offset + size; out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); @@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = { /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_dma_ops); + +static int __init s390_iommu_setup(char *str) +{ + if (!strncmp(str, "strict", 6)) + s390_iommu_strict = 1; + return 0; +} + +__setup("s390_iommu=", s390_iommu_setup); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 6d7f5a3..460fdb2 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -5,8 +5,8 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/pci.h> diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 9190214..fa3ce89 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -5,8 +5,8 @@ * Jan Glauber <jang@linux.vnet.ibm.com> */ -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> #include <linux/stat.h> |