summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/configs/c2k_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c6
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/eeh.h1
-rw-r--r--arch/powerpc/include/asm/gpio.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h12
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h51
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/smp.h4
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/trace.h8
-rw-r--r--arch/powerpc/include/asm/unistd.h2
-rw-r--r--arch/powerpc/include/asm/xics.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h9
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c6
-rw-r--r--arch/powerpc/kernel/eeh_pe.c35
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/misc_64.S28
-rw-r--r--arch/powerpc/kernel/module_64.c14
-rw-r--r--arch/powerpc/kernel/process.c4
-rw-r--r--arch/powerpc/kernel/rtasd.c9
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c3
-rw-r--r--arch/powerpc/kernel/smp.c30
-rw-r--r--arch/powerpc/kernel/traps.c5
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c156
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c330
-rw-r--r--arch/powerpc/kvm/book3s_hv.c233
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c131
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S22
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c35
-rw-r--r--arch/powerpc/kvm/powerpc.c58
-rw-r--r--arch/powerpc/mm/hash64_64k.c8
-rw-r--r--arch/powerpc/mm/hash_utils_64.c36
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c13
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/mem.c6
-rw-r--r--arch/powerpc/mm/mmap.c4
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c3
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_64.c34
-rw-r--r--arch/powerpc/perf/hv-24x7.c8
-rw-r--r--arch/powerpc/perf/power8-pmu.c2
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci.c26
-rw-r--r--arch/powerpc/platforms/powernv/pci.h1
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c10
-rw-r--r--arch/powerpc/sysdev/ppc4xx_gpio.c2
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c2
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c21
65 files changed, 1203 insertions, 253 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e4824fd..a030e5e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -158,6 +158,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -557,7 +558,7 @@ choice
config PPC_4K_PAGES
bool "4k page size"
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_16K_PAGES
bool "16k page size"
@@ -566,7 +567,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
bool "256k page size"
@@ -828,14 +829,10 @@ config PCI_8260
select PPC_INDIRECT_PCI
default y
-source "drivers/pci/pcie/Kconfig"
-
source "drivers/pci/Kconfig"
source "drivers/pcmcia/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
-
config HAS_RAPIDIO
bool
default n
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 9186229..340685c 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -387,7 +387,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index e5d2c3d..99ccbeba 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1175,7 +1175,6 @@ CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_XMON=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 93ee046..ab11319 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -22,6 +22,7 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/xts.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -126,6 +127,11 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
key_len >>= 1;
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 06f17e7..8d1c816 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -50,7 +50,9 @@
* set of bits not changed in pmd_modify.
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED | _PAGE_THP_HUGE)
+ _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+ _PAGE_SOFT_DIRTY)
+
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8204b0c..ac07a30 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
@@ -282,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c5eb86f..867c39b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -81,6 +81,7 @@ struct pci_dn;
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
+#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h
deleted file mode 100644
index b3799d8..0000000
--- a/arch/powerpc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c8..7529aab 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
-#define SPAPR_TCE_SHIFT 12
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 271fefb..d7b3431 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS 512
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -183,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
- u32 window_size;
+ struct rcu_head rcu;
+ u32 page_shift;
+ u64 offset; /* in pages */
+ u64 size; /* window size in pages */
struct page *pages[0];
};
@@ -290,7 +292,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
@@ -630,7 +632,7 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2241d53..2544eda 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args);
+ struct kvm_create_spapr_tce_64 *args);
+extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
+ struct kvm_vcpu *vcpu, unsigned long liobn);
+extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages);
+extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
+ unsigned long tce);
+extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap);
+extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
+ unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
+extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
#else
+static inline void kvmppc_alloc_host_rm_ops(void) {};
+static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+ unsigned long raw;
+ struct {
+ u32 in_host;
+ u32 rm_action;
+ };
+};
+
+struct kvmppc_host_rm_core {
+ union kvmppc_rm_state rm_state;
+ void *rm_data;
+ char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+ struct kvmppc_host_rm_core *rm_core;
+ void (*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 54843ca..78968c1 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -10,7 +10,6 @@
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
-#include <asm-generic/pci-bridge.h>
struct device_node;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a..a6f3ac0 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -20,8 +20,6 @@
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
-
/* Return values for pci_controller_ops.probe_mode function */
#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ac9fb11..47897a3 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
}
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
+
+unsigned long vmalloc_to_phys(void *vmalloc_addr);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c..78083ed 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void);
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 5654ece..3fa9df7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYSCALL(mlock2)
+SYSCALL(copy_file_range)
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 8e86b48..32e36b1 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry,
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6a5ace5..1f2594d 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 379
+#define NR_syscalls 380
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb..2546048 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index ab4d473..c93cf35 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+ __u64 liobn;
+ __u32 page_shift;
+ __u32 flags;
+ __u64 offset; /* in pages */
+ __u64 size; /* in pages */
+};
+
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index dd54f28..1672e33 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -95,4 +95,6 @@
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
+#define SO_CNX_ADVICE 53
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 12a0565..940290d 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -389,5 +389,6 @@
#define __NR_userfaultfd 364
#define __NR_membarrier 365
#define __NR_mlock2 378
+#define __NR_copy_file_range 379
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9387421..650cfb3 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
eeh_pcid_put(dev);
if (driver->err_handler &&
driver->err_handler->error_detected &&
- driver->err_handler->slot_reset &&
- driver->err_handler->resume)
+ driver->err_handler->slot_reset)
return NULL;
}
@@ -564,6 +563,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
pcibios_remove_pci_devices(bus);
pci_unlock_rescan_remove();
@@ -803,6 +803,7 @@ perm_error:
* the their PCI config any more.
*/
if (frozen_bus) {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove();
@@ -886,6 +887,7 @@ static void eeh_handle_special_event(void)
continue;
/* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 8654cb1..98f8180 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
const char *eeh_pe_loc_get(struct eeh_pe *pe)
{
struct pci_bus *bus = eeh_pe_bus_get(pe);
- struct device_node *dn = pci_bus_to_OF_node(bus);
+ struct device_node *dn;
const char *loc = NULL;
- if (!dn)
- goto out;
+ while (bus) {
+ dn = pci_bus_to_OF_node(bus);
+ if (!dn) {
+ bus = bus->parent;
+ continue;
+ }
- /* PHB PE or root PE ? */
- if (pci_is_root_bus(bus)) {
- loc = of_get_property(dn, "ibm,loc-code", NULL);
- if (!loc)
+ if (pci_is_root_bus(bus))
loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+ else
+ loc = of_get_property(dn, "ibm,slot-location-code",
+ NULL);
+
if (loc)
- goto out;
+ return loc;
- /* Check the root port */
- dn = dn->child;
- if (!dn)
- goto out;
+ bus = bus->parent;
}
- loc = of_get_property(dn, "ibm,loc-code", NULL);
- if (!loc)
- loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
- return loc ? loc : "N/A";
+ return "N/A";
}
/**
@@ -931,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
bus = pe->phb->bus;
} else if (pe->type & EEH_PE_BUS ||
pe->type & EEH_PE_DEVICE) {
- if (pe->bus) {
+ if (pe->state & EEH_PE_PRI_BUS) {
bus = pe->bus;
goto out;
}
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804c..aec9a1b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -109,8 +109,9 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* If the breakpoint is unregistered between a hw_breakpoint_handler()
* and the single_step_dabr_instruction(), then cleanup the breakpoint
* restoration variables to prevent dangling pointers.
+ * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task)
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
bp->ctx->task->thread.last_hit_ubp = NULL;
}
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index db475d4..f28754c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
li r5,0
blr /* image->start(physid, image->start, 0); */
#endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
- .llong __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
- .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
- .llong 0 /* .value */
- .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 59663af..08b7a40 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
}
}
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
{
unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
for (i = 1; i < numsyms; i++) {
if (syms[i].st_shndx == SHN_UNDEF) {
char *name = strtab + syms[i].st_name;
- if (name[0] == '.')
- memmove(name, name+1, strlen(name));
+ if (name[0] == '.') {
+ if (strcmp(name+1, "TOC.") == 0)
+ syms[i].st_shndx = SHN_ABS;
+ syms[i].st_name++;
+ }
}
}
}
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
for (i = 1; i < numsyms; i++) {
- if (syms[i].st_shndx == SHN_UNDEF
+ if (syms[i].st_shndx == SHN_ABS
&& strcmp(strtab + syms[i].st_name, "TOC.") == 0)
return &syms[i];
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..3c5736e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1768,9 +1768,9 @@ static inline unsigned long brk_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049..aa610ce 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max;
static unsigned int event_scan;
static unsigned int rtas_event_scan_rate;
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
/* Stop logging to nvram after first fatal error */
static int logging_enabled; /* Until we initialize everything,
@@ -592,11 +592,6 @@ __setup("surveillance=", surveillance_setup);
static int __init rtasmsgs_setup(char *str)
{
- if (strcmp(str, "on") == 0)
- full_rtas_msgs = 1;
- else if (strcmp(str, "off") == 0)
- full_rtas_msgs = 0;
-
- return 1;
+ return (kstrtobool(str, &full_rtas_msgs) == 0);
}
__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db..d544fa3 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
- lockdep_init();
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a..f98be83 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
setup_paca(&boot_paca);
fixup_boot_paca();
- /* Initialize lockdep early or else spinlocks will blow */
- lockdep_init();
-
/* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec20..b7dea05f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
- int messages; /* current messages */
+ long messages; /* current messages */
unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
}
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/
smp_mb();
message[msg] = 1;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+ smp_muxed_ipi_set_message(cpu, msg);
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
}
#ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned int all;
+ unsigned long all;
mb(); /* order any irq clear */
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ /*
+ * Must check for PPC_MSG_RM_HOST_ACTION messages
+ * before PPC_MSG_CALL_FUNCTION messages because when
+ * a VM is destroyed, we call kick_all_cpus_sync()
+ * to ensure that any pending PPC_MSG_RM_HOST_ACTION
+ * messages have completed before we free any VCPUs.
+ */
+ if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+ kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
@@ -727,7 +745,7 @@ void start_secondary(void *unused)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b6becc7..33c47fc 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -203,9 +203,8 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP NR_CPUS=%d ", NR_CPUS);
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_NUMA
printk("NUMA ");
#endif
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 0570eef..7f7b6d8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o
+ $(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 638c6d9..b34220d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253..9bf7031 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@ no_seg_found:
static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
u64 esid, esid_1t;
int slb_nr;
struct kvmppc_slb *slbe;
dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
- vcpu_book3s = to_book3s(vcpu);
-
esid = GET_ESID(rb);
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc..2c2d103 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
+{
+ return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
-static long kvmppc_stt_npages(unsigned long window_size)
+static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+ unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
+ (tce_pages * sizeof(struct page *));
+
+ return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
- struct kvm *kvm = stt->kvm;
- int i;
+ long ret = 0;
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ if (!current || !current->mm)
+ return ret; /* process exited */
+
+ down_write(&current->mm->mmap_sem);
+
+ if (inc) {
+ unsigned long locked, lock_limit;
+
+ locked = current->mm->locked_vm + stt_pages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ current->mm->locked_vm += stt_pages;
+ } else {
+ if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
+ stt_pages = current->mm->locked_vm;
+
+ current->mm->locked_vm -= stt_pages;
+ }
+
+ pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
+ inc ? '+' : '-',
+ stt_pages << PAGE_SHIFT,
+ current->mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK),
+ ret ? " - exceeded" : "");
+
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+static void release_spapr_tce_table(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_table *stt = container_of(head,
+ struct kvmppc_spapr_tce_table, rcu);
+ unsigned long i, npages = kvmppc_tce_pages(stt->size);
+
+ for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
- kvm_put_kvm(kvm);
+ kfree(stt);
}
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page;
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff];
@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
- release_spapr_tce_table(stt);
+ list_del_rcu(&stt->list);
+
+ kvm_put_kvm(stt->kvm);
+
+ kvmppc_account_memlimit(
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+ call_rcu(&stt->rcu, release_spapr_tce_table);
+
return 0;
}
@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
};
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
+ struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
+ unsigned long npages, size;
int ret = -ENOMEM;
int i;
+ if (!args->size)
+ return -EINVAL;
+
/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}
- npages = kvmppc_stt_npages(args->window_size);
+ size = args->size;
+ npages = kvmppc_tce_pages(size);
+ ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+ if (ret) {
+ stt = NULL;
+ goto fail;
+ }
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
stt->liobn = args->liobn;
- stt->window_size = args->window_size;
+ stt->page_shift = args->page_shift;
+ stt->offset = args->offset;
+ stt->size = size;
stt->kvm = kvm;
for (i = 0; i < npages; i++) {
@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm);
mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock);
@@ -148,3 +208,59 @@ fail:
}
return ret;
}
+
+long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS, idx;
+ unsigned long entry, ua = 0;
+ u64 __user *tces, tce;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * SPAPR spec says that the maximum size of the list is 512 TCEs
+ * so the whole table fits in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tces = (u64 __user *) ua;
+
+ for (i = 0; i < npages; ++i) {
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
+
+ kvmppc_tce_put(stt, entry + i, tce);
+ }
+
+unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 89e96b3..44be73e 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -30,76 +31,321 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu-hash64.h>
+#include <asm/mmu_context.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-/* WARNING: This will be called in real-mode on HV KVM and virtual
+/*
+ * Finds a TCE table descriptor by LIOBN.
+ *
+ * WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
-long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba, unsigned long tce)
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+ unsigned long liobn)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
+ if (stt->liobn == liobn)
+ return stt;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_find_table);
+
+/*
+ * Validates IO address.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages)
+{
+ unsigned long mask = (1ULL << stt->page_shift) - 1;
+ unsigned long idx = ioba >> stt->page_shift;
+
+ if ((ioba & mask) || (idx < stt->offset) ||
+ (idx - stt->offset + npages > stt->size) ||
+ (idx + npages < idx))
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
+
+/*
+ * Validates TCE address.
+ * At the moment flags and page mask are validated.
+ * As the host kernel does not access those addresses (just puts them
+ * to the table and user space is supposed to process them), we can skip
+ * checking other things (such as TCE is a guest RAM address or the page
+ * was actually allocated).
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+{
+ unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
+ unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+
+ if (tce & mask)
+ return H_PARAMETER;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
+
+/* Note on the use of page_address() in real mode,
+ *
+ * It is safe to use page_address() in real mode on ppc64 because
+ * page_address() is always defined as lowmem_page_address()
+ * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
+ * operation and does not access page struct.
+ *
+ * Theoretically page_address() could be defined different
+ * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
+ * would have to be enabled.
+ * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
+ * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
+ * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
+ * is not expected to be enabled on ppc32, page_address()
+ * is safe for ppc32 as well.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+static u64 *kvmppc_page_address(struct page *page)
+{
+#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
+#error TODO: fix to avoid page_address() here
+#endif
+ return (u64 *) page_address(page);
+}
+
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Called in both real and virtual modes.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ *
+ * WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+
+ idx -= stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = kvmppc_page_address(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+EXPORT_SYMBOL_GPL(kvmppc_tce_put);
+
+long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
-
- /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
- /* liobn, stt, stt->window_size); */
- if (ioba >= stt->window_size)
- return H_PARAMETER;
-
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
-
- /* FIXME: Need to validate the TCE itself */
- /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
- tbl[idx % TCES_PER_PAGE] = tce;
- return H_SUCCESS;
- }
- }
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
-long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
+ unsigned long ua, unsigned long *phpa)
+{
+ pte_t *ptep, pte;
+ unsigned shift = 0;
+
+ ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
+ if (!ptep || !pte_present(*ptep))
+ return -ENXIO;
+ pte = *ptep;
+
+ if (!shift)
+ shift = PAGE_SHIFT;
+
+ /* Avoid handling anything potentially complicated in realmode */
+ if (shift > PAGE_SHIFT)
+ return -EAGAIN;
+
+ if (!pte_young(pte))
+ return -EAGAIN;
+
+ *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
+ (ua & ~PAGE_MASK);
+
+ return 0;
+}
+
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
{
- struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS;
+ unsigned long tces, entry, ua = 0;
+ unsigned long *rmap = NULL;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * The spec says that the maximum size of the list is 512 TCEs
+ * so the whole table addressed resides in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ return H_TOO_HARD;
- if (ioba >= stt->window_size)
- return H_PARAMETER;
+ rmap = (void *) vmalloc_to_phys(rmap);
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
+ /*
+ * Synchronize with the MMU notifier callbacks in
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * While we have the rmap lock, code running on other CPUs
+ * cannot finish unmapping the host real page that backs
+ * this guest real page, so we are OK to access the host
+ * real page.
+ */
+ lock_rmap(rmap);
+ if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
- vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
- return H_SUCCESS;
- }
+ kvmppc_tce_put(stt, entry + i, tce);
}
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
+unlock_exit:
+ unlock_rmap(rmap);
+
+ return ret;
+}
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret;
+
+ stt = kvmppc_find_table(vcpu, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check permission bits only to allow userspace poison TCE for debug */
+ if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+ return H_PARAMETER;
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ long ret;
+ unsigned long idx;
+ struct page *page;
+ u64 *tbl;
+
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+
+ return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
+
+#endif /* KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cff207b..84fb4fc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+ .set = param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,11 +125,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swait_active(wqp)) {
+ swake_up(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -701,8 +712,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
tvcpu->arch.prodded = 1;
smp_mb();
if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq)) {
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
- } /* fallthrough */
+ }
+ return RESUME_HOST;
+ case H_PUT_TCE:
+ ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PUT_TCE_INDIRECT:
+ ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_STUFF_TCE:
+ ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
default:
return RESUME_HOST;
}
@@ -833,6 +868,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.sum_exits++;
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (vcpu->arch.shregs.msr & MSR_HV) {
+ printk(KERN_EMERG "KVM trap in HV mode!\n");
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ kvmppc_dump_regs(vcpu);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ return RESUME_HOST;
+ }
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
@@ -1441,7 +1494,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_waitqueue_head(&vcore->wq);
+ init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
@@ -2261,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
/*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline void kvmppc_clear_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+ /*
+ * Memory barrier can be omitted here as we will do a smp_wmb()
+ * later in kvmppc_start_thread and we need ensure that state is
+ * visible to other CPUs only after we enter guest.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline void kvmppc_set_host_core(int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return;
+
+ /*
+ * Memory barrier can be omitted here because we do a spin_unlock
+ * immediately after this which provides the memory barrier.
+ */
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+}
+
+/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
*/
@@ -2372,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
+ kvmppc_clear_host_core(pcpu);
+
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2468,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i);
}
+ kvmppc_set_host_core(pcpu);
+
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
@@ -2513,10 +2610,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int do_sleep = 1;
+ DECLARE_SWAITQUEUE(wait);
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
/*
* Check one last time for pending exceptions and ceded state after
@@ -2530,7 +2626,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
if (!do_sleep) {
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
return;
}
@@ -2538,7 +2634,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -2594,7 +2690,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ swake_up(&vc->wq);
}
}
@@ -2966,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
+#ifdef CONFIG_KVM_XICS
+static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ kvmppc_set_host_core(cpu);
+ break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ kvmppc_clear_host_core(cpu);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvmppc_cpu_notifier = {
+ .notifier_call = kvmppc_cpu_notify,
+};
+
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+ struct kvmppc_host_rm_ops *ops;
+ unsigned long l_ops;
+ int cpu, core;
+ int size;
+
+ /* Not the first time here ? */
+ if (kvmppc_host_rm_ops_hv != NULL)
+ return;
+
+ ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+ if (!ops)
+ return;
+
+ size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+ ops->rm_core = kzalloc(size, GFP_KERNEL);
+
+ if (!ops->rm_core) {
+ kfree(ops);
+ return;
+ }
+
+ get_online_cpus();
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
+ if (!cpu_online(cpu))
+ continue;
+
+ core = cpu >> threads_shift;
+ ops->rm_core[core].rm_state.in_host = 1;
+ }
+
+ ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
+ /*
+ * Make the contents of the kvmppc_host_rm_ops structure visible
+ * to other CPUs before we assign it to the global variable.
+ * Do an atomic assignment (no locks used here), but if someone
+ * beats us to it, just free our copy and return.
+ */
+ smp_wmb();
+ l_ops = (unsigned long) ops;
+
+ if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+ put_online_cpus();
+ kfree(ops->rm_core);
+ kfree(ops);
+ return;
+ }
+
+ register_cpu_notifier(&kvmppc_cpu_notifier);
+
+ put_online_cpus();
+}
+
+void kvmppc_free_host_rm_ops(void)
+{
+ if (kvmppc_host_rm_ops_hv) {
+ unregister_cpu_notifier(&kvmppc_cpu_notifier);
+ kfree(kvmppc_host_rm_ops_hv->rm_core);
+ kfree(kvmppc_host_rm_ops_hv);
+ kvmppc_host_rm_ops_hv = NULL;
+ }
+}
+#endif
+
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
@@ -2978,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
+ kvmppc_alloc_host_rm_ops();
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -3211,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fd7006b..5f0380d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee);
}
}
+
+struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f5807..980d8a6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,12 +17,16 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
+#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+ int hcpu;
+
+ hcpu = hcore << threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+ smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+ icp_native_cause_ipi_rm(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+ struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+ bool success;
+ int core;
+ union kvmppc_rm_state old, new;
+
+ for (core = start + 1; core < max; core++) {
+ old = new = READ_ONCE(rm_core[core].rm_state);
+
+ if (!old.in_host || old.rm_action)
+ continue;
+
+ /* Try to grab this host core if not taken already. */
+ new.rm_action = action;
+
+ success = cmpxchg64(&rm_core[core].rm_state.raw,
+ old.raw, new.raw) == old.raw;
+ if (success) {
+ /*
+ * Make sure that the store to the rm_action is made
+ * visible before we return to caller (and the
+ * subsequent store to rm_data) to synchronize with
+ * the IPI handler.
+ */
+ smp_wmb();
+ return core;
+ }
+ }
+
+ return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+ int core;
+ int my_core = smp_processor_id() >> threads_shift;
+ struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+ core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+ if (core == -1)
+ core = grab_next_hostcore(core, rm_core, my_core, action);
+
+ return core;
+}
+
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
+ int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- /* Check if the core is loaded, if not, too hard */
+ /*
+ * Check if the core is loaded,
+ * if not, find an available host core to post to wake the VCPU,
+ * if we can't find one, set up state to eventually return too hard.
+ */
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
+ hcore = -1;
+ if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
+ hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
+ if (hcore != -1) {
+ icp_send_hcore_msg(hcore, vcpu);
+ } else {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ }
return;
}
@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail:
return check_too_hard(xics, icp);
}
+
+/* --- Non-real mode XICS-related built-in routines --- */
+
+/**
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+ switch (action) {
+ case XICS_RM_KICK_VCPU:
+ kvmppc_host_rm_ops_hv->vcpu_kick(data);
+ break;
+ default:
+ WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+ break;
+ }
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+ int core;
+ unsigned int cpu = smp_processor_id();
+ struct kvmppc_host_rm_core *rm_corep;
+
+ core = cpu >> threads_shift;
+ rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+ if (rm_corep->rm_data) {
+ rm_host_ipi_action(rm_corep->rm_state.rm_action,
+ rm_corep->rm_data);
+ /* Order these stores against the real mode KVM */
+ rm_corep->rm_data = NULL;
+ smp_wmb();
+ rm_corep->rm_state.rm_action = 0;
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badc..85b32f1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_ACOP(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
+ /*
+ * Restore various registers to 0, where non-zero values
+ * set by the guest could disrupt the host.
+ */
+ li r0, 0
+ mtspr SPRN_IAMR, r0
+ mtspr SPRN_CIABR, r0
+ mtspr SPRN_DAWRX, r0
+ mtspr SPRN_TCSCR, r0
+ mtspr SPRN_WORT, r0
+ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+ li r0, 1
+ sldi r0, r0, 31
+ mtspr SPRN_MMCRS, r0
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -2006,8 +2020,8 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
- .long 0 /* 0x138 */
- .long 0 /* 0x13c */
+ .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */
.long 0 /* 0x144 */
.long 0 /* 0x148 */
@@ -2153,7 +2167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
- rlwimi r5, r4, 1, DAWRX_WT
+ rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
std r5, VCPU_DAWRX(r3)
@@ -2404,6 +2418,8 @@ machine_check_realmode:
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
+ rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+ bne mc_cont /* if so, exit to host */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f2c75a1..02176fd 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
+ tce, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
+ case H_PUT_TCE_INDIRECT:
+ return kvmppc_h_pr_put_tce_indirect(vcpu);
+ case H_STUFF_TCE:
+ return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405..19aa59b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -33,6 +33,7 @@
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
+#include <asm/iommu.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
+#ifdef CONFIG_KVM_XICS
+ /*
+ * We call kick_all_cpus_sync() to ensure that all
+ * CPUs have executed any pending IPIs before we
+ * continue and free VCPUs structures below.
+ */
+ if (is_kvmppc_hv_enabled(kvm))
+ kick_all_cpus_sync();
+#endif
+
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
+ case KVM_CAP_SPAPR_MULTITCE:
+ r = 1;
+ break;
#endif
default:
r = 0;
@@ -919,21 +934,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
break;
case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
break;
#endif /* CONFIG_ALTIVEC */
default:
@@ -974,17 +985,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
default:
@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CREATE_SPAPR_TCE_64: {
+ struct kvm_create_spapr_tce_64 create_tce_64;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
+ goto out;
+ if (create_tce_64.flags) {
+ r = -EINVAL;
+ goto out;
+ }
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+ goto out;
+ }
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
+ struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
- r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+
+ create_tce_64.liobn = create_tce.liobn;
+ create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.offset = 0;
+ create_tce_64.size = create_tce.window_size >>
+ IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.flags = 0;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_PPC_GET_SMMU_INFO: {
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e..edb0991 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/
if (!(old_pte & _PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
- old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+ /*
+ * clear the old slot details from the old and new pte.
+ * On hash insert failure we use old pte value and we don't
+ * want slot information there if we have a insert failure.
+ */
+ old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+ new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba59d59..1005281 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -255,8 +255,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (ret < 0)
break;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
+ if (debug_pagealloc_enabled() &&
+ (paddr >> PAGE_SHIFT) < linear_map_hash_count)
linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
#endif /* CONFIG_DEBUG_PAGEALLOC */
}
@@ -512,17 +514,17 @@ static void __init htab_init_page_sizes(void)
if (mmu_has_feature(MMU_FTR_16M_PAGE))
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
- found:
-#ifndef CONFIG_DEBUG_PAGEALLOC
- /*
- * Pick a size for the linear mapping. Currently, we only support
- * 16M, 1M and 4K which is the default
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_linear_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_linear_psize = MMU_PAGE_1M;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+found:
+ if (!debug_pagealloc_enabled()) {
+ /*
+ * Pick a size for the linear mapping. Currently, we only
+ * support 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+ }
#ifdef CONFIG_PPC_64K_PAGES
/*
@@ -721,10 +723,12 @@ static void __init htab_initialize(void)
prot = pgprot_val(PAGE_KERNEL);
#ifdef CONFIG_DEBUG_PAGEALLOC
- linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
- 1, ppc64_rma_size));
- memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ if (debug_pagealloc_enabled()) {
+ linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ linear_map_hash_slots = __va(memblock_alloc_base(
+ linear_map_hash_count, 1, ppc64_rma_size));
+ memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ }
#endif /* CONFIG_DEBUG_PAGEALLOC */
/* On U3 based machines, we need to reserve the DART area and
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b..eb2accd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* base page size. This is because demote_segment won't flush
* hash page table entries.
*/
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags);
+ /*
+ * With THP, we also clear the slot information with
+ * respect to all the 64K hash pte mapping the 16MB
+ * page. They are all invalid now. This make sure we
+ * don't find the slot valid when we fault with 4k
+ * base page size.
+ *
+ */
+ memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+ }
}
valid = hpte_valid(hpte_slot_array, index);
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d088..83a8be7 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,8 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <asm/mmu.h>
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
static inline int tlb1_next(void)
@@ -60,6 +62,14 @@ static inline void book3e_tlb_lock(void)
unsigned long tmp;
int token = smp_processor_id() + 1;
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
asm volatile("1: lbarx %0, 0, %1;"
"cmpwi %0, 0;"
"bne 2f;"
@@ -80,6 +90,9 @@ static inline void book3e_tlb_unlock(void)
{
struct paca_struct *paca = get_paca();
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
isync();
paca->tcd_ptr->lock = 0;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index a10be66..c2b7716 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -112,10 +112,10 @@ void __init MMU_setup(void)
if (strstr(boot_command_line, "noltlbs")) {
__map_without_ltlbs = 1;
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
- __map_without_bats = 1;
- __map_without_ltlbs = 1;
-#endif
+ if (debug_pagealloc_enabled()) {
+ __map_without_bats = 1;
+ __map_without_ltlbs = 1;
+ }
}
/*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3..f078a1f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void)
res->name = "System RAM";
res->start = base;
res->end = base + size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0);
}
}
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
*/
int devmem_is_allowed(unsigned long pfn)
{
+ if (page_is_rtas_user_buf(pfn))
+ return 1;
if (iomem_is_exclusive(PFN_PHYS(pfn)))
return 0;
if (!page_is_ram(pfn))
return 1;
- if (page_is_rtas_user_buf(pfn))
- return 1;
return 0;
}
#endif /* CONFIG_STRICT_DEVMEM */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e..4087705 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+ rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
else
- rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+ rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 4e4efbc..9ca6fe1 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -118,8 +118,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
- if (!count) {
+ if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
pgtable_page_dtor(page);
free_hot_cold_page(page, 0);
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd79..de37ff4 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
+unsigned long vmalloc_to_phys(void *va)
+{
+ unsigned long pfn = vmalloc_to_pfn(va);
+
+ BUG_ON(!pfn);
+ return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
+}
+EXPORT_SYMBOL_GPL(vmalloc_to_phys);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3124a20..d9cc66c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -403,7 +403,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- atomic_set(&page->_count, PTE_FRAG_NR);
+ set_page_count(page, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
return pgtable;
}
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+ /*
+ * We can't mark the pmd none here, because that will cause a race
+ * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+ * we spilt, but at the same time we wan't rest of the ppc64 code
+ * not to insert hash pte on this, because we will be modifying
+ * the deposited pgtable in the caller of this function. Hence
+ * clear the _PAGE_USER so that we move the fault handling to
+ * higher level function and that will serialize against ptl.
+ * We need to flush existing hash pte entries here even though,
+ * the translation is still valid, because we will withdraw
+ * pgtable_t after this.
+ */
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+}
+
+
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+ /*
+ * This ensures that generic code that rely on IRQ disabling
+ * to prevent a parallel THP split work as expected.
+ */
+ kick_all_cpus_sync();
}
/*
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda..3b09ecf 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
}
}
-static unsigned long vmalloc_to_phys(void *v)
-{
- struct page *p = vmalloc_to_page(v);
-
- BUG_ON(!p);
- return page_to_phys(p) + offset_in_page(v);
-}
-
/* */
struct event_uniq {
struct rb_node node;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 7d5e295..9958ba8 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
.get_constraint = power8_get_constraint,
.get_alternatives = power8_get_alternatives,
.disable_pmc = power8_disable_pmc,
- .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
.cache_events = &power8_cache_events,
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 711f3d3..452da23 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -188,7 +188,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
static inline void mpc512x_free_bootmem(struct page *page)
{
BUG_ON(PageTail(page));
- BUG_ON(atomic_read(&page->_count) > 1);
+ BUG_ON(page_ref_count(page) > 1);
free_reserved_page(page);
}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5f152b9..87f47e5 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* PCI devices of the PE are expected to be removed prior
* to PE reset.
*/
- if (!edev->pe->bus)
+ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
edev->pe->bus = pci_find_bus(hose->global_number,
pdn->busno);
+ if (edev->pe->bus)
+ edev->pe->state |= EEH_PE_PRI_BUS;
+ }
/*
* Enable EEH explicitly so that we will do EEH check
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 573ae19..f90dc04 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3180,6 +3180,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2f55c86..b1ef84a 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -599,6 +599,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
long i;
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
+
for (i = 0; i < npages; i++) {
unsigned long newtce = proto_tce |
((rpn + i) << tbl->it_page_shift);
@@ -620,6 +623,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
*direction = iommu_tce_direction(oldtce);
@@ -760,6 +766,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
phb->dma_dev_setup(phb, pdev);
}
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
+
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
+}
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7f56313..00691a9 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -242,6 +242,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 32274f7..282837a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
/*
* Enable/disable cede_offline when available.
*/
static int __init setup_cede_offline(char *str)
{
- if (!strcmp(str, "off"))
- cede_offline_enabled = 0;
- else if (!strcmp(str, "on"))
- cede_offline_enabled = 1;
- else
- return 0;
- return 1;
+ return (kstrtobool(str, &cede_offline_enabled) == 0);
}
__setup("cede_offline=", setup_cede_offline);
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/sysdev/ppc4xx_gpio.c
index fc65ad1..d7a7ef13 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/sysdev/ppc4xx_gpio.c
@@ -78,7 +78,7 @@ static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
- return in_be32(&regs->ir) & GPIO_MASK(gpio);
+ return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
}
static inline void
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ff5e732..56ce8ca 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -46,7 +46,7 @@ static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- return in_8(mm_gc->regs) & u8_pin2mask(gpio);
+ return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
}
static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index eae3265..afdf62f 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+ /*
+ * Currently not used to send IPIs to another CPU
+ * on the same core. Only caller is KVM real mode.
+ * Need the physical address of the XICS to be
+ * previously saved in kvm_hstate in the paca.
+ */
+ unsigned long xics_phys;
+
+ /*
+ * Just like the cause_ipi functions, it is required to
+ * include a full barrier (out8 includes a sync) before
+ * causing the IPI.
+ */
+ xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+#endif
+
/*
* Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode.
OpenPOWER on IntegriCloud