summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/configs/default_defconfig2
-rw-r--r--arch/s390/configs/gcov_defconfig2
-rw-r--r--arch/s390/configs/performance_defconfig2
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/pgtable.h197
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/qdio.h4
-rw-r--r--arch/s390/include/asm/switch_to.h4
-rw-r--r--arch/s390/include/asm/syscall.h2
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/kvm_perf.h25
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/kernel/head.S6
-rw-r--r--arch/s390/kernel/irq.c95
-rw-r--r--arch/s390/kernel/mcount.S10
-rw-r--r--arch/s390/kernel/mcount64.S3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c12
-rw-r--r--arch/s390/kernel/ptrace.c12
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/time.c16
-rw-r--r--arch/s390/kvm/diag.c3
-rw-r--r--arch/s390/kvm/intercept.c32
-rw-r--r--arch/s390/kvm/interrupt.c103
-rw-r--r--arch/s390/kvm/kvm-s390.c66
-rw-r--r--arch/s390/kvm/kvm-s390.h12
-rw-r--r--arch/s390/kvm/sigp.c44
-rw-r--r--arch/s390/mm/hugetlbpage.c103
-rw-r--r--arch/s390/mm/pgtable.c8
-rw-r--r--arch/s390/net/bpf_jit_comp.c4
-rw-r--r--arch/s390/pci/pci.c53
-rw-r--r--arch/s390/pci/pci_clp.c4
-rw-r--r--arch/s390/pci/pci_debug.c4
-rw-r--r--arch/s390/pci/pci_dma.c50
-rw-r--r--arch/s390/pci/pci_event.c4
-rw-r--r--arch/s390/pci/pci_sysfs.c4
36 files changed, 465 insertions, 433 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bb63499..8ca60f8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -92,6 +92,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
@@ -116,7 +117,6 @@ config S390
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
- select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
@@ -137,7 +137,6 @@ config S390
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16 if 32BIT
select HAVE_VIRT_CPU_ACCOUNTING
- select KTIME_SCALAR if 32BIT
select MODULES_USE_ELF_RELA
select NO_BOOTMEM
select OLD_SIGACTION
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index fd09a10..3ca1894 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -366,7 +366,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -380,7 +379,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index b061180..4830aa6 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -363,7 +363,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -377,7 +376,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index d279baa..61db449 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -361,7 +361,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -375,7 +374,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4181d7b..773bef7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -305,7 +305,6 @@ struct kvm_s390_local_interrupt {
struct list_head list;
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
- int timer_due; /* event indicator for waitqueue below */
wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
@@ -367,7 +366,6 @@ struct kvm_vcpu_arch {
s390_fp_regs guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
- struct tasklet_struct tasklet;
struct kvm_s390_pgm_info pgm;
union {
struct cpuid cpu_id;
@@ -418,6 +416,7 @@ struct kvm_arch{
int css_support;
int use_irqchip;
int use_cmma;
+ int user_cpu_state_ctrl;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
spinlock_t start_stop_lock;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fcba5e0..b76317c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -287,7 +287,14 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
+
+#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */
+#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */
+#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */
+#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */
+#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */
+#define _SEGMENT_ENTRY_BITS_LARGE 0
+#define _SEGMENT_ENTRY_ORIGIN_LARGE 0
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
@@ -350,7 +357,7 @@ extern unsigned long MODULES_END;
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
@@ -359,30 +366,34 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
-#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
-#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
-#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
-#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
+#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */
+#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
+#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */
+#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * ..R...I...y.
- * prot-none, old ..0...1...1.
- * prot-none, young ..1...1...1.
- * read-only, old ..1...1...0.
- * read-only, young ..1...0...1.
- * read-write, old ..0...1...0.
- * read-write, young ..0...0...1.
+ * dy..R...I...wr
+ * prot-none, clean, old 00..1...1...00
+ * prot-none, clean, young 01..1...1...00
+ * prot-none, dirty, old 10..1...1...00
+ * prot-none, dirty, young 11..1...1...00
+ * read-only, clean, old 00..1...1...01
+ * read-only, clean, young 01..1...0...01
+ * read-only, dirty, old 10..1...1...01
+ * read-only, dirty, young 11..1...0...01
+ * read-write, clean, old 00..1...1...11
+ * read-write, clean, young 01..1...0...11
+ * read-write, dirty, old 10..0...1...11
+ * read-write, dirty, young 11..0...0...11
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
*/
-#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-
-/* Set of bits not changed in pmd_modify */
-#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
- | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
+#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
/* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf000000000000000UL
@@ -455,10 +466,11 @@ extern unsigned long MODULES_END;
* Segment entry (large page) protection definitions.
*/
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
- _SEGMENT_ENTRY_NONE)
-#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_READ)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -569,25 +581,23 @@ static inline int pmd_none(pmd_t pmd)
static inline int pmd_large(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-#else
- return 0;
-#endif
}
-static inline int pmd_prot_none(pmd_t pmd)
+static inline int pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
- (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
}
static inline int pmd_bad(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-#endif
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
@@ -607,20 +617,22 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- if (pmd_prot_none(pmd))
- return 0;
- return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ int dirty = 1;
+ if (pmd_large(pmd))
+ dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+ return dirty;
}
static inline int pmd_young(pmd_t pmd)
{
- int young = 0;
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd))
- young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
- else
+ int young = 1;
+ if (pmd_large(pmd))
young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-#endif
return young;
}
@@ -1391,7 +1403,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -1413,41 +1425,75 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
return pgprot_val(SEGMENT_WRITE);
}
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+ if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ return pmd;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- } else {
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- } else {
+ if (pmd_large(pmd)) {
pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- int young;
-
- young = pmd_young(pmd);
- pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ return pmd;
+ }
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- if (young)
- pmd = pmd_mkyoung(pmd);
return pmd;
}
@@ -1455,16 +1501,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return pmd_mkyoung(__pmd);
+ return __pmd;
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
static inline void __pmdp_csp(pmd_t *pmdp)
@@ -1555,34 +1594,21 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
static inline int pmd_trans_splitting(pmd_t pmd)
{
- return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
- pmd_val(entry) |= _SEGMENT_ENTRY_CO;
*pmdp = entry;
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- return pmd;
-}
-
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
-
-static inline pmd_t pmd_mkdirty(pmd_t pmd)
-{
- /* No dirty bit in the segment table entry. */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1647,11 +1673,6 @@ static inline int has_transparent_hugepage(void)
{
return MACHINE_HAS_HPAGE ? 1 : 0;
}
-
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return pmd_val(pmd) >> PAGE_SHIFT;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6f02d45..e568fc8 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -217,7 +217,7 @@ static inline void cpu_relax(void)
barrier();
}
-#define arch_mutex_cpu_relax() barrier()
+#define cpu_relax_lowlatency() barrier()
static inline void psw_set_key(unsigned int key)
{
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index d786c63..06f3034 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -415,6 +415,10 @@ struct qdio_brinfo_entry_l2 {
#define QDIO_FLAG_SYNC_OUTPUT 0x02
#define QDIO_FLAG_PCI_OUT 0x10
+int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
+
extern int qdio_allocate(struct qdio_initialize *);
extern int qdio_establish(struct qdio_initialize *);
extern int qdio_activate(struct ccw_device *);
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index df38c70..18ea9e3 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc)
return 0;
asm volatile(
- "0: lfpc %1\n"
- " la %0,0\n"
+ " lfpc %1\n"
+ "0: la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index abad78d..5bc1259 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- regs->gprs[2] = error ? -error : val;
+ regs->gprs[2] = error ? error : val;
}
static inline void syscall_get_arguments(struct task_struct *task,
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 7366373..08fe6da 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -16,6 +16,7 @@ header-y += ioctls.h
header-y += ipcbuf.h
header-y += kvm.h
header-y += kvm_para.h
+header-y += kvm_perf.h
header-y += kvm_virtio.h
header-y += mman.h
header-y += monwriter.h
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 0000000..3972827
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 5d9cc19..d4096fd 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -108,6 +108,7 @@
exit_code_ipa0(0xB2, 0x17, "STETR"), \
exit_code_ipa0(0xB2, 0x18, "PC"), \
exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x21, "IPTE"), \
exit_code_ipa0(0xB2, 0x28, "PT"), \
exit_code_ipa0(0xB2, 0x29, "ISKE"), \
exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 7ba7d67..e88d35d 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,11 +437,11 @@ ENTRY(startup_kdump)
#if defined(CONFIG_64BIT)
#if defined(CONFIG_MARCH_ZEC12)
- .long 3, 0xc100efea, 0xf46ce800, 0x00400000
+ .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
#elif defined(CONFIG_MARCH_Z196)
- .long 2, 0xc100efea, 0xf46c0000
+ .long 2, 0xc100eff2, 0xf46c0000
#elif defined(CONFIG_MARCH_Z10)
- .long 2, 0xc100efea, 0xf0680000
+ .long 2, 0xc100eff2, 0xf0680000
#elif defined(CONFIG_MARCH_Z9_109)
.long 1, 0xc100efc2
#elif defined(CONFIG_MARCH_Z990)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 99b0b09..8eb8244 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -30,6 +30,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
struct irq_class {
+ int irq;
char *name;
char *desc;
};
@@ -45,9 +46,9 @@ struct irq_class {
* up with having a sum which accounts each interrupt twice.
*/
static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
- [EXT_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"},
- [THIN_INTERRUPT] = {.name = "AIO"},
+ {.irq = EXT_INTERRUPT, .name = "EXT"},
+ {.irq = IO_INTERRUPT, .name = "I/O"},
+ {.irq = THIN_INTERRUPT, .name = "AIO"},
};
/*
@@ -56,38 +57,38 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
* In addition this list contains non external / I/O events like NMIs.
*/
static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
- [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"},
- [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"},
- [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"},
- [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"},
- [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"},
- [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
- [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"},
- [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"},
- [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"},
- [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"},
- [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
- [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
- [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
- [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
- [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
- [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"},
- [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"},
- [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"},
- [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"},
- [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"},
- [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"},
- [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"},
- [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"},
- [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"},
- [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
- [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
- [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
- [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
- [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
- [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
- [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
- [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
+ {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+ {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+ {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+ {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+ {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+ {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+ {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+ {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+ {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+ {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+ {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
+ {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
+ {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
+ {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
+ {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
+ {.irq = IRQIO_CLW, .name = "CLW", .desc = "[I/O] CLAW"},
+ {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
+ {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+ {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
+ {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
void __init init_IRQ(void)
@@ -116,33 +117,37 @@ void do_IRQ(struct pt_regs *regs, int irq)
*/
int show_interrupts(struct seq_file *p, void *v)
{
- int irq = *(loff_t *) v;
- int cpu;
+ int index = *(loff_t *) v;
+ int cpu, irq;
get_online_cpus();
- if (irq == 0) {
+ if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
goto out;
}
- if (irq < NR_IRQS) {
- if (irq >= NR_IRQS_BASE)
+ if (index < NR_IRQS) {
+ if (index >= NR_IRQS_BASE)
goto out;
- seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
+ /* Adjust index to process irqclass_main_desc array entries */
+ index--;
+ seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+ irq = irqclass_main_desc[index].irq;
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
goto out;
}
- for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
- seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
+ for (index = 0; index < NR_ARCH_IRQS; index++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+ irq = irqclass_sub_desc[index].irq;
for_each_online_cpu(cpu)
seq_printf(p, "%10u ",
per_cpu(irq_stat, cpu).irqs[irq]);
- if (irqclass_sub_desc[irq].desc)
- seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
+ if (irqclass_sub_desc[index].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[index].desc);
seq_putc(p, '\n');
}
out:
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 08dcf21..433c6db 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -21,13 +21,9 @@ ENTRY(_mcount)
ENTRY(ftrace_caller)
#endif
stm %r2,%r5,16(%r15)
- bras %r1,2f
+ bras %r1,1f
0: .long ftrace_trace_function
-1: .long function_trace_stop
-2: l %r2,1b-0b(%r1)
- icm %r2,0xf,0(%r2)
- jnz 3f
- st %r14,56(%r15)
+1: st %r14,56(%r15)
lr %r0,%r15
ahi %r15,-96
l %r3,100(%r15)
@@ -50,7 +46,7 @@ ENTRY(ftrace_graph_caller)
#endif
ahi %r15,96
l %r14,56(%r15)
-3: lm %r2,%r5,16(%r15)
+ lm %r2,%r5,16(%r15)
br %r14
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
index 1c52eae..c67a8bf 100644
--- a/arch/s390/kernel/mcount64.S
+++ b/arch/s390/kernel/mcount64.S
@@ -20,9 +20,6 @@ ENTRY(_mcount)
ENTRY(ftrace_caller)
#endif
- larl %r1,function_trace_stop
- icm %r1,0xf,0(%r1)
- bnzr %r14
stmg %r2,%r5,32(%r15)
stg %r14,112(%r15)
lgr %r1,%r15
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index ea75d01..d3194de 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_RAW:
- /* The CPU measurement counter facility does not have overflow
- * interrupts to do sampling. Sampling must be provided by
- * external means, for example, by timers.
- */
- if (is_sampling_event(event))
- return -ENOENT;
err = __hw_perf_event_init(event);
break;
default:
@@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void)
goto out;
}
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 2d716734..5dc7ad9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
unsigned long mask = PSW_MASK_USER;
mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
- if ((data & ~mask) != PSW_USER_BITS)
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
return -EINVAL;
if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
return -EINVAL;
}
*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
@@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child,
mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
/* Build a 64 bit psw mask from 31 bit mask. */
- if ((tmp & ~mask) != PSW32_USER_BITS)
+ if ((tmp ^ PSW32_USER_BITS) & ~mask)
/* Invalid psw mask. */
return -EINVAL;
+ if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
(regs->psw.mask & PSW_MASK_BA) |
(__u64)(tmp & mask) << 32;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1e2264b..ae1d5be 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -501,6 +501,8 @@ static int kdump_mem_notifier(struct notifier_block *nb,
{
struct memory_notify *arg = data;
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
return NOTIFY_BAD;
if (arg->start_pfn > PFN_DOWN(crashk_res.end))
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 0931b11..4cef607 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -214,26 +214,26 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
- if (tk->clock != &clocksource_tod)
+ if (tk->tkr.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
- vdso_data->xtime_tod_stamp = tk->clock->cycle_last;
+ vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+ vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
- nsecps = (u64) NSEC_PER_SEC << tk->shift;
+ vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
}
- vdso_data->tk_mult = tk->mult;
- vdso_data->tk_shift = tk->shift;
+ vdso_data->tk_mult = tk->tkr.mult;
+ vdso_data->tk_shift = tk->tkr.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 0161675..59bd8f9 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- kvm_s390_vcpu_stop(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a0b586c..eaf4629 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -56,32 +56,26 @@ static int handle_noop(struct kvm_vcpu *vcpu)
static int handle_stop(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ unsigned int action_bits;
vcpu->stat.exit_stop_request++;
- spin_lock_bh(&vcpu->arch.local_int.lock);
-
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
- if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
- kvm_s390_vcpu_stop(vcpu);
- vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
- VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
- rc = -EOPNOTSUPP;
- }
+ action_bits = vcpu->arch.local_int.action_bits;
- if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
- vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
- /* store status must be called unlocked. Since local_int.lock
- * only protects local_int.* and not guest memory we can give
- * up the lock here */
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ if (!(action_bits & ACTION_STOP_ON_STOP))
+ return 0;
+
+ if (action_bits & ACTION_STORE_ON_STOP) {
rc = kvm_s390_vcpu_store_status(vcpu,
KVM_S390_STORE_STATUS_NOADDR);
- if (rc >= 0)
- rc = -EOPNOTSUPP;
- } else
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- return rc;
+ if (rc)
+ return rc;
+ }
+
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ return -EOPNOTSUPP;
}
static int handle_validity(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 90c8de2..92528a0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -158,6 +158,9 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
LCTL_CR10 | LCTL_CR11);
vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
}
+
+ if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP)
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
}
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -544,13 +547,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
int rc = 0;
if (atomic_read(&li->active)) {
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry(inti, &li->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
}
if ((!rc) && atomic_read(&fi->active)) {
@@ -585,88 +588,56 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
u64 now, sltime;
- DECLARE_WAITQUEUE(wait, current);
vcpu->stat.exit_wait_state++;
- if (kvm_cpu_has_interrupt(vcpu))
- return 0;
- __set_cpu_idle(vcpu);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.timer_due = 0;
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ /* fast path */
+ if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
+ return 0;
if (psw_interrupts_disabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
- __unset_cpu_idle(vcpu);
return -EOPNOTSUPP; /* disabled wait */
}
+ __set_cpu_idle(vcpu);
if (!ckc_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
goto no_timer;
}
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
- if (vcpu->arch.sie_block->ckc < now) {
- __unset_cpu_idle(vcpu);
- return 0;
- }
-
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- spin_lock(&vcpu->arch.local_int.float_int->lock);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->wq, &wait);
- while (list_empty(&vcpu->arch.local_int.list) &&
- list_empty(&vcpu->arch.local_int.float_int->list) &&
- (!vcpu->arch.local_int.timer_due) &&
- !signal_pending(current) &&
- !kvm_s390_si_ext_call_pending(vcpu)) {
- set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock(&vcpu->arch.local_int.float_int->lock);
- schedule();
- spin_lock(&vcpu->arch.local_int.float_int->lock);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- }
+ kvm_vcpu_block(vcpu);
__unset_cpu_idle(vcpu);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->wq, &wait);
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
-void kvm_s390_tasklet(unsigned long parm)
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
-
- spin_lock(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq)) {
+ /*
+ * The vcpu gave up the cpu voluntarily, mark it as a good
+ * yield-candidate.
+ */
+ vcpu->preempted = true;
wake_up_interruptible(&vcpu->wq);
- spin_unlock(&vcpu->arch.local_int.lock);
+ }
}
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- vcpu->preempted = true;
- tasklet_schedule(&vcpu->arch.tasklet);
+ kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
}
@@ -676,13 +647,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_interrupt_info *n, *inti = NULL;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry_safe(inti, n, &li->list, list) {
list_del(&inti->list);
kfree(inti);
}
atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
/* clear pending external calls set by sigp interpretation facility */
atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
@@ -701,7 +672,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (atomic_read(&li->active)) {
do {
deliver = 0;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry_safe(inti, n, &li->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
@@ -712,7 +683,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
if (list_empty(&li->list))
atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
@@ -758,7 +729,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
if (atomic_read(&li->active)) {
do {
deliver = 0;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry_safe(inti, n, &li->list, list) {
if ((inti->type == KVM_S390_MCHK) &&
__interrupt_is_deliverable(vcpu, inti)) {
@@ -770,7 +741,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
}
if (list_empty(&li->list))
atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
@@ -817,11 +788,11 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
BUG_ON(waitqueue_active(li->wq));
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return 0;
}
@@ -842,11 +813,11 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
inti->type = KVM_S390_PROGRAM_INT;
memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
BUG_ON(waitqueue_active(li->wq));
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return 0;
}
@@ -934,12 +905,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
- kvm_get_vcpu(kvm, sigcpu)->preempted = true;
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
unlock_fi:
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
@@ -1081,7 +1050,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
mutex_lock(&vcpu->kvm->lock);
li = &vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
if (inti->type == KVM_S390_PROGRAM_INT)
list_add(&inti->list, &li->list);
else
@@ -1090,11 +1059,9 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
- vcpu->preempted = true;
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
+ kvm_s390_vcpu_wakeup(vcpu);
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f3e14f..339b34a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -166,7 +166,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
+ case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@@ -595,7 +597,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
- kvm_s390_vcpu_stop(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
kvm_s390_clear_local_irqs(vcpu);
}
@@ -647,8 +650,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return rc;
}
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
- (unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
@@ -926,7 +927,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
- if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
+ if (!is_vcpu_stopped(vcpu))
rc = -EBUSY;
else {
vcpu->run->psw_mask = psw.mask;
@@ -980,13 +981,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL; /* not implemented yet */
+ /* CHECK_STOP and LOAD are not supported yet */
+ return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL; /* not implemented yet */
+ int rc = 0;
+
+ /* user space knows about this interface - let it control the state */
+ vcpu->kvm->arch.user_cpu_state_ctrl = 1;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_STOPPED:
+ kvm_s390_vcpu_stop(vcpu);
+ break;
+ case KVM_MP_STATE_OPERATING:
+ kvm_s390_vcpu_start(vcpu);
+ break;
+ case KVM_MP_STATE_LOAD:
+ case KVM_MP_STATE_CHECK_STOP:
+ /* fall through - CHECK_STOP and LOAD are not supported yet */
+ default:
+ rc = -ENXIO;
+ }
+
+ return rc;
}
bool kvm_s390_cmma_enabled(struct kvm *kvm)
@@ -1045,6 +1067,9 @@ retry:
goto retry;
}
+ /* nothing to do, just clear the request */
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+
return 0;
}
@@ -1284,7 +1309,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
- kvm_s390_vcpu_start(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+ kvm_s390_vcpu_start(vcpu);
+ } else if (is_vcpu_stopped(vcpu)) {
+ pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+ vcpu->vcpu_id);
+ return -EINVAL;
+ }
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
@@ -1413,11 +1444,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
return kvm_s390_store_status_unloaded(vcpu, addr);
}
-static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
-{
- return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
-}
-
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -1451,7 +1477,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
- spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
for (i = 0; i < online_vcpus; i++) {
@@ -1477,7 +1503,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
* Let's play safe and flush the VCPU at startup.
*/
vcpu->arch.sie_block->ihcpu = 0xffff;
- spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
@@ -1491,10 +1517,18 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
- spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Need to lock access to action_bits to avoid a SIGP race condition */
+ spin_lock(&vcpu->arch.local_int.lock);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+
+ /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+ vcpu->arch.local_int.action_bits &=
+ ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP);
+ spin_unlock(&vcpu->arch.local_int.lock);
+
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -1512,7 +1546,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
__enable_ibs_on_vcpu(started_vcpu);
}
- spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index a8655ed..3862fa2 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -45,9 +45,9 @@ do { \
d_args); \
} while (0)
-static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -129,9 +129,15 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
vcpu->arch.sie_block->gpsw.mask |= cc << 44;
}
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+ return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 43079a4..cf243ba 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -125,8 +125,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
+static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
{
+ struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
struct kvm_s390_interrupt_info *inti;
int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
@@ -135,7 +136,13 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
return -ENOMEM;
inti->type = KVM_S390_SIGP_STOP;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
+ if (li->action_bits & ACTION_STOP_ON_STOP) {
+ /* another SIGP STOP is pending */
+ kfree(inti);
+ rc = SIGP_CC_BUSY;
+ goto out;
+ }
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
kfree(inti);
if ((action & ACTION_STORE_ON_STOP) != 0)
@@ -144,19 +151,17 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
}
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
out:
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
- struct kvm_s390_local_interrupt *li;
struct kvm_vcpu *dst_vcpu = NULL;
int rc;
@@ -166,9 +171,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- li = &dst_vcpu->arch.local_int;
- rc = __inject_sigp_stop(li, action);
+ rc = __inject_sigp_stop(dst_vcpu, action);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
@@ -238,7 +242,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti)
return SIGP_CC_BUSY;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
/* cpu must be in stopped state */
if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
@@ -253,13 +257,12 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
out_li:
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
@@ -275,9 +278,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+ spin_lock(&dst_vcpu->arch.local_int.lock);
flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
- spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+ spin_unlock(&dst_vcpu->arch.local_int.lock);
if (!(flags & CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -338,10 +341,10 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP)
rc = SIGP_CC_BUSY;
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
@@ -461,12 +464,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
- spin_lock_bh(&dest_vcpu->arch.local_int.lock);
- if (waitqueue_active(&dest_vcpu->wq))
- wake_up_interruptible(&dest_vcpu->wq);
- dest_vcpu->preempted = true;
- spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
-
+ kvm_s390_vcpu_wakeup(dest_vcpu);
kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
return 0;
}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 0ff66a7..389bc17 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -10,42 +10,33 @@
static inline pmd_t __pte_to_pmd(pte_t pte)
{
- int none, young, prot;
pmd_t pmd;
/*
- * Convert encoding pte bits pmd bits
- * .IR...wrdytp ..R...I...y.
- * empty .10...000000 -> ..0...1...0.
- * prot-none, clean, old .11...000001 -> ..0...1...1.
- * prot-none, clean, young .11...000101 -> ..1...1...1.
- * prot-none, dirty, old .10...001001 -> ..0...1...1.
- * prot-none, dirty, young .10...001101 -> ..1...1...1.
- * read-only, clean, old .11...010001 -> ..1...1...0.
- * read-only, clean, young .01...010101 -> ..1...0...1.
- * read-only, dirty, old .11...011001 -> ..1...1...0.
- * read-only, dirty, young .01...011101 -> ..1...0...1.
- * read-write, clean, old .11...110001 -> ..0...1...0.
- * read-write, clean, young .01...110101 -> ..0...0...1.
- * read-write, dirty, old .10...111001 -> ..0...1...0.
- * read-write, dirty, young .00...111101 -> ..0...0...1.
- * Huge ptes are dirty by definition, a clean pte is made dirty
- * by the conversion.
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp dy..R...I...wr
+ * empty .10...000000 -> 00..0...1...00
+ * prot-none, clean, old .11...000001 -> 00..1...1...00
+ * prot-none, clean, young .11...000101 -> 01..1...1...00
+ * prot-none, dirty, old .10...001001 -> 10..1...1...00
+ * prot-none, dirty, young .10...001101 -> 11..1...1...00
+ * read-only, clean, old .11...010001 -> 00..1...1...01
+ * read-only, clean, young .01...010101 -> 01..1...0...01
+ * read-only, dirty, old .11...011001 -> 10..1...1...01
+ * read-only, dirty, young .01...011101 -> 11..1...0...01
+ * read-write, clean, old .11...110001 -> 00..0...1...11
+ * read-write, clean, young .01...110101 -> 01..0...0...11
+ * read-write, dirty, old .10...111001 -> 10..0...1...11
+ * read-write, dirty, young .00...111101 -> 11..0...0...11
*/
if (pte_present(pte)) {
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
- if (pte_val(pte) & _PAGE_INVALID)
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- none = (pte_val(pte) & _PAGE_PRESENT) &&
- !(pte_val(pte) & _PAGE_READ) &&
- !(pte_val(pte) & _PAGE_WRITE);
- prot = (pte_val(pte) & _PAGE_PROTECT) &&
- !(pte_val(pte) & _PAGE_WRITE);
- young = pte_val(pte) & _PAGE_YOUNG;
- if (none || young)
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- if (prot || (none && young))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
return pmd;
@@ -56,34 +47,31 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_t pte;
/*
- * Convert encoding pmd bits pte bits
- * ..R...I...y. .IR...wrdytp
- * empty ..0...1...0. -> .10...000000
- * prot-none, old ..0...1...1. -> .10...001001
- * prot-none, young ..1...1...1. -> .10...001101
- * read-only, old ..1...1...0. -> .11...011001
- * read-only, young ..1...0...1. -> .01...011101
- * read-write, old ..0...1...0. -> .10...111001
- * read-write, young ..0...0...1. -> .00...111101
- * Huge ptes are dirty by definition
+ * Convert encoding pmd bits pte bits
+ * dy..R...I...wr .IR...wrdytp
+ * empty 00..0...1...00 -> .10...001100
+ * prot-none, clean, old 00..0...1...00 -> .10...000001
+ * prot-none, clean, young 01..0...1...00 -> .10...000101
+ * prot-none, dirty, old 10..0...1...00 -> .10...001001
+ * prot-none, dirty, young 11..0...1...00 -> .10...001101
+ * read-only, clean, old 00..1...1...01 -> .11...010001
+ * read-only, clean, young 01..1...1...01 -> .11...010101
+ * read-only, dirty, old 10..1...1...01 -> .11...011001
+ * read-only, dirty, young 11..1...1...01 -> .11...011101
+ * read-write, clean, old 00..0...1...11 -> .10...110001
+ * read-write, clean, young 01..0...1...11 -> .10...110101
+ * read-write, dirty, old 10..0...1...11 -> .10...111001
+ * read-write, dirty, young 11..0...1...11 -> .10...111101
*/
if (pmd_present(pmd)) {
- pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
- (pmd_val(pmd) & PAGE_MASK);
- if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
- pte_val(pte) |= _PAGE_INVALID;
- if (pmd_prot_none(pmd)) {
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_YOUNG;
- } else {
- pte_val(pte) |= _PAGE_READ;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_WRITE;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
- pte_val(pte) |= _PAGE_YOUNG;
- }
+ pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+ pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -96,6 +84,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
+ /* Emulated huge ptes loose the dirty and young bit */
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= pte_page(pte)[1].index;
} else
@@ -113,6 +102,8 @@ pte_t huge_ptep_get(pte_t *ptep)
origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= *(unsigned long *) origin;
+ /* Emulated huge ptes are young and dirty by definition */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
}
return __pmd_to_pte(pmd);
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 37b8241..19daa53 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1279,6 +1279,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
{
unsigned long next, *table, *new;
struct page *page;
+ spinlock_t *ptl;
pmd_t *pmd;
pmd = pmd_offset(pud, addr);
@@ -1296,7 +1297,7 @@ again:
if (!new)
return -ENOMEM;
- spin_lock(&mm->page_table_lock);
+ ptl = pmd_lock(mm, pmd);
if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
/* Nuke pmd entry pointing to the "short" page table */
pmdp_flush_lazy(mm, addr, pmd);
@@ -1310,7 +1311,7 @@ again:
page_table_free_rcu(tlb, table);
new = NULL;
}
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
if (new) {
page_table_free_pgste(new);
goto again;
@@ -1432,6 +1433,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
if (pmd_same(*pmdp, entry))
return 0;
pmdp_invalidate(vma, address, pmdp);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd87..61e45b7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
return header;
}
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
@@ -875,7 +875,7 @@ out:
kfree(addrs);
}
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9ddc51e..2fa7b14 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -15,8 +15,8 @@
* Thomas Klein
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -48,13 +48,10 @@
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static void zpci_enable_irq(struct irq_data *data);
-static void zpci_disable_irq(struct irq_data *data);
-
static struct irq_chip zpci_irq_chip = {
.name = "zPCI",
- .irq_unmask = zpci_enable_irq,
- .irq_mask = zpci_disable_irq,
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
};
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
@@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
return rc;
}
-static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
- int offset, pos;
- u32 mask_bits;
-
- if (msi->msi_attrib.is_msix) {
- offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL;
- msi->masked = readl(msi->mask_base + offset);
- writel(flag, msi->mask_base + offset);
- } else if (msi->msi_attrib.maskbit) {
- pos = (long) msi->mask_base;
- pci_read_config_dword(msi->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(msi->dev, pos, mask_bits);
- } else
- return 0;
-
- msi->msi_attrib.maskbit = !!flag;
- return 1;
-}
-
-static void zpci_enable_irq(struct irq_data *data)
-{
- struct msi_desc *msi = irq_get_msi_desc(data->irq);
-
- zpci_msi_set_mask_bits(msi, 1, 0);
-}
-
-static void zpci_disable_irq(struct irq_data *data)
-{
- struct msi_desc *msi = irq_get_msi_desc(data->irq);
-
- zpci_msi_set_mask_bits(msi, 1, 1);
-}
-
void pcibios_fixup_bus(struct pci_bus *bus)
{
}
@@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
list_for_each_entry(msi, &pdev->msi_list, list) {
- zpci_msi_set_mask_bits(msi, 1, 1);
+ if (msi->msi_attrib.is_msix)
+ default_msix_mask_irq(msi, 1);
+ else
+ default_msi_mask_irq(msi, 1, 1);
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
msi->msg.address_lo = 0;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 96545d7..6e22a24 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index c5c6684..eec598c 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/seq_file.h>
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f91c031..4cbb29a 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -16,6 +16,13 @@
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+ zdev->iommu_pages * PAGE_SIZE);
+}
static unsigned long *dma_alloc_cpu_table(void)
{
@@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
}
/*
- * rpcit is not required to establish new translations when previously
- * invalid translation-table entries are validated, however it is
- * required when altering previously valid entries.
+ * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+ * translations when previously invalid translation-table entries are
+ * validated. With lazy unmap, it also is skipped for previously valid
+ * entries, but a global rpcit is then required before any address can
+ * be re-used, i.e. after each iommu bitmap wrap-around.
*/
if (!zdev->tlb_refresh &&
- ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- /*
- * TODO: also need to check that the old entry is indeed INVALID
- * and not only for one page but for the whole range...
- * -> now we WARN_ON in that case but with lazy unmap that
- * needs to be redone!
- */
+ (!s390_iommu_strict ||
+ ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
goto no_refresh;
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
@@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
{
unsigned long offset, flags;
+ int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
- if (offset == -1)
+ if (offset == -1) {
+ /* wrap-around */
offset = __dma_alloc_iommu(zdev, 0, size);
+ wrap = 1;
+ }
if (offset != -1) {
zdev->next_bit = offset + size;
- if (zdev->next_bit >= zdev->iommu_pages)
- zdev->next_bit = 0;
+ if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
+ /* global flush after wrap-around with lazy unmap */
+ zpci_refresh_global(zdev);
}
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
return offset;
@@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size
if (!zdev->iommu_bitmap)
goto out;
bitmap_clear(zdev->iommu_bitmap, offset, size);
- if (offset >= zdev->next_bit)
+ /*
+ * Lazy flush for unmap: need to move next_bit to avoid address re-use
+ * until wrap-around.
+ */
+ if (!s390_iommu_strict && offset >= zdev->next_bit)
zdev->next_bit = offset + size;
out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
@@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = {
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strncmp(str, "strict", 6))
+ s390_iommu_strict = 1;
+ return 0;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 6d7f5a3..460fdb2 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 9190214..fa3ce89 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/stat.h>
OpenPOWER on IntegriCloud