diff options
Diffstat (limited to 'arch/s390')
43 files changed, 767 insertions, 468 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index da183c5..22f75b5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -301,7 +301,6 @@ config HOTPLUG_CPU def_bool y prompt "Support for hot-pluggable CPUs" depends on SMP - select HOTPLUG help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 7ef60b5..42be537 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -32,7 +32,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_mem_data { +struct appldata_mem_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -63,7 +63,7 @@ static struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; /* @@ -118,7 +118,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -131,7 +130,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -142,6 +151,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 2d224b9..66037d2 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -29,7 +29,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_net_sum_data { +struct appldata_net_sum_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -51,7 +51,7 @@ static struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; /* @@ -121,7 +121,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -134,7 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -145,6 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 7fd3690..138893e 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -651,9 +651,7 @@ static int hypfs_create_cpu_files(struct super_block *sb, } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_lpar_files(struct super_block *sb, @@ -702,9 +700,7 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb, return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_phys_files(struct super_block *sb, diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 9819891..4066cee 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,9 +9,18 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H -typedef void (*adapter_int_handler_t)(void *, void *); +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; -void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); -void s390_unregister_adapter_interrupt(void *, u8); +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 2f8c1ab..3fbc67d 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -53,7 +53,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); - return (dma_addr == DMA_ERROR_CODE); + return dma_addr == DMA_ERROR_CODE; } static inline void *dma_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 2ee66a6..0aa6a7e 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -13,6 +13,16 @@ #define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + /* * The test_facility function uses the bit odering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as @@ -20,12 +30,7 @@ */ static inline int test_facility(unsigned long nr) { - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; + return __test_facility(nr, &S390_lowcore.stfle_fac_list); } /** diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fd9be01..cd6b9ee 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -13,28 +13,6 @@ #include <asm/page.h> #include <asm/pci_io.h> -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void * address) -{ - unsigned long real_address; - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_address) : "a" (address) : "cc"); - return real_address; -} -#define virt_to_phys virt_to_phys - -static inline void * phys_to_virt(unsigned long address) -{ - return (void *) address; -} - void *xlate_dev_mem_ptr(unsigned long phys); #define xlate_dev_mem_ptr xlate_dev_mem_ptr void unxlate_dev_mem_ptr(unsigned long phys, void *addr); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d1..3238d40 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -62,13 +62,20 @@ struct sca_block { #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 #define CPUSTAT_P 0x00000001 struct kvm_s390_sie_block { atomic_t cpuflags; /* 0x0000 */ __u32 prefix; /* 0x0004 */ - __u8 reserved8[32]; /* 0x0008 */ + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ @@ -90,7 +97,8 @@ struct kvm_s390_sie_block { __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ __u8 ecb; /* 0x0061 */ - __u8 reserved62[2]; /* 0x0062 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ @@ -130,6 +138,7 @@ struct kvm_vcpu_stat { u32 deliver_program_int; u32 deliver_io_int; u32 exit_wait_state; + u32 instruction_pfmf; u32 instruction_stidp; u32 instruction_spx; u32 instruction_stpx; @@ -166,7 +175,7 @@ struct kvm_s390_ext_info { }; #define PGM_OPERATION 0x01 -#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 #define PGM_PROTECTION 0x04 #define PGM_ADDRESSING 0x05 @@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt { atomic_t active; struct kvm_s390_float_interrupt *float_int; int timer_due; /* event indicator for waitqueue below */ - wait_queue_head_t wq; + wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; }; @@ -266,4 +275,5 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; #endif diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6c18012..6e577ba 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -120,7 +120,6 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; - struct dentry *debugfs_debug; }; struct pci_hp_callback_ops { @@ -143,7 +142,6 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); -int zpci_scan_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 5f0173a..1141fb3 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -14,3 +14,13 @@ /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 + +#ifdef CONFIG_64BIT + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 590c321..e1408dd 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); + static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e8b6e5b..75fb726 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -58,9 +58,6 @@ extern unsigned long zero_page_mask; #define __HAVE_COLOR_ZERO_PAGE /* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #endif /* !__ASSEMBLY__ */ /* @@ -299,18 +296,16 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf0000000UL -#define RCP_FP_BIT 0x08000000UL -#define RCP_PCL_BIT 0x00800000UL -#define RCP_HR_BIT 0x00400000UL -#define RCP_HC_BIT 0x00200000UL -#define RCP_GR_BIT 0x00040000UL -#define RCP_GC_BIT 0x00020000UL -#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x00008000UL -#define KVM_UC_BIT 0x00004000UL +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_UR_BIT 0x00008000UL +#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -367,18 +362,16 @@ extern unsigned long MODULES_END; | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL -#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_UR_BIT 0x0000800000000000UL +#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -618,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) asm( " lg %0,%2\n" "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ - " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ " csg %0,%1,%2\n" " jl 0b\n" : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) @@ -632,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE asm( - " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " nihh %1,0xff7f\n" /* clear PCL bit */ " stg %1,%0\n" : "=Q" (ptep[PTRS_PER_PTE]) : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) @@ -665,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) else if (bits) page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); - pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ pgste_val(pgste) |= (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; @@ -693,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) /* Get referenced bit from storage key */ young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); if (young) - pgste_val(pgste) |= RCP_GR_BIT; + pgste_val(pgste) |= PGSTE_GR_BIT; /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & RCP_HR_BIT) { - pgste_val(pgste) &= ~RCP_HR_BIT; + if (pgste_val(pgste) & PGSTE_HR_BIT) { + pgste_val(pgste) &= ~PGSTE_HR_BIT; young = 1; } /* Transfer referenced bit to kvm user bits and pte */ if (young) { - pgste_val(pgste) |= KVM_UR_BIT; + pgste_val(pgste) |= PGSTE_UR_BIT; pte_val(*ptep) |= _PAGE_SWR; } #endif @@ -723,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) * The guest C/R information is still in the PGSTE, set real * key C/R to 0. */ - nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; page_set_storage_key(address, nkey, 0); #endif } @@ -753,6 +746,7 @@ struct gmap { struct mm_struct *mm; unsigned long *table; unsigned long asce; + void *private; struct list_head crst_list; }; @@ -811,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & RCP_IN_BIT) { - pgste_val(pgste) &= ~RCP_IN_BIT; + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; gmap_do_ipte_notify(mm, addr, ptep); } #endif @@ -980,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & KVM_UC_BIT); - pgste_val(pgste) &= ~KVM_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -1000,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & KVM_UR_BIT); - pgste_val(pgste) &= ~KVM_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_UR_BIT); + pgste_val(pgste) &= ~PGSTE_UR_BIT; pgste_set_unlock(ptep, pgste); } return young; @@ -1370,10 +1364,11 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 559512a..52b5653 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,6 +24,7 @@ struct pt_regs unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; unsigned int int_code; + unsigned int int_parm; unsigned long int_parm_long; }; diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 9ccd190..6a9a9eb 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -35,6 +35,7 @@ header-y += siginfo.h header-y += signal.h header-y += socket.h header-y += sockios.h +header-y += sclp_ctl.h header-y += stat.h header-y += statfs.h header-y += swab.h diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index 1c6a7f8..65dc694 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -29,6 +29,16 @@ struct chsc_async_area { __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; } __attribute__ ((packed)); +struct chsc_header { + __u16 length; + __u16 code; +} __attribute__ ((packed)); + +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); + struct chsc_response_struct { __u16 length; __u16 code; @@ -126,5 +136,8 @@ struct chsc_cpd_info { #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) #endif diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 38eca3b..5812a3b 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data { #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ #define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) /* retrieve API version number */ diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 0000000..f281861 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 7a82f9f..2416138 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C #include <linux/kbuild.h> +#include <linux/kvm_host.h> #include <linux/sched.h> #include <asm/cputime.h> #include <asm/vdso.h> @@ -47,6 +48,7 @@ int main(void) DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -161,6 +163,8 @@ int main(void) DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4d5e6f8..be7a408 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -429,11 +429,19 @@ io_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -573,10 +581,10 @@ ext_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 # call do_extint j io_return diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index aa0ab02e..3ddbc26 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -54,7 +54,7 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka, void do_notify_resume(struct pt_regs *regs); struct ext_code; -void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); +void do_extint(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4c17eec..1c039d0 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #endif .endm - .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck + .macro HANDLE_SIE_INTERCEPT scratch,reason #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) tmhh %r8,0x0001 # interrupting from user ? - jnz .+42 + jnz .+62 lgr \scratch,%r9 - slg \scratch,BASED(.Lsie_loop) - clg \scratch,BASED(.Lsie_length) - .if \pgmcheck + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 # Some program interrupts are suppressing (e.g. protection). # We must also check the instruction after SIE in that case. # do_protection_exception will rewind to rewind_pad - jh .+22 + jh .+42 .else - jhe .+22 + jhe .+42 .endif - lg %r9,BASED(.Lsie_loop) - LPP BASED(.Lhost_id) # set host id + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason #endif .endm @@ -450,7 +453,7 @@ ENTRY(io_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_IO_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,2 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user? jz io_skip @@ -460,10 +463,18 @@ io_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -595,7 +606,7 @@ ENTRY(ext_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_EXT_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,3 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user ? jz ext_skip @@ -605,13 +616,13 @@ ext_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - llgf %r4,__LC_EXT_PARAMS # get external parameter - lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return @@ -643,7 +654,7 @@ ENTRY(mcck_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_MCK_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_panic # yes -> rest of mcck code invalid lghi %r14,__LC_CPU_TIMER_SAVE_AREA @@ -937,56 +948,50 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason lmg %r0,%r13,0(%r3) # load guest gprs 0-13 -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions in the sie_loop should not cause program interrupts. So -# lets use a nop (47 00 00 00) as a landing pad. -# See also HANDLE_SIE_INTERCEPT -rewind_pad: - nop 0 -sie_loop: - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - tm __TI_flags+7(%r14),_TIF_EXIT_SIE - jnz sie_exit lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz sie_gmap lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: LPP __SF_EMPTY+16(%r15) # set host id - lg %r14,__LC_THREAD_INFO # pointer thread_info struct -sie_exit: + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions beween sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,0 + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code br %r14 sie_fault: - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,-EFAULT - br %r14 + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit .align 8 -.Lsie_loop: - .quad sie_loop -.Lsie_length: - .quad sie_done - sie_loop -.Lhost_id: - .quad 0 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap EX_TABLE(rewind_pad,sie_fault) - EX_TABLE(sie_loop,sie_fault) + EX_TABLE(sie_exit,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index dd3c199..54b0995 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -234,9 +234,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, - unsigned int param32, unsigned long param64) +void __irq_entry do_extint(struct pt_regs *regs) { + struct ext_code ext_code; struct pt_regs *old_regs; struct ext_int_info *p; int index; @@ -248,6 +248,7 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, clock_comparator_work(); } kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); + ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; @@ -255,7 +256,8 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, rcu_read_lock(); list_for_each_entry_rcu(p, &ext_int_hash[index], entry) if (likely(p->code == ext_code.code)) - p->handler(ext_code, param32, param64); + p->handler(ext_code, regs->int_parm, + regs->int_parm_long); rcu_read_unlock(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index f58f37f..a6fc037 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> +#include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/export.h> #include <asm/irq.h> @@ -39,6 +40,57 @@ int perf_num_counters(void) } EXPORT_SYMBOL(perf_num_counters); +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + unsigned long ip = instruction_pointer(regs); + + if (user_mode(regs)) + return false; + + return ip == (unsigned long) &sie_exit; +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + void perf_event_print_debug(void) { struct cpumf_ctr_info cf_info; diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9bdbcef..3bac589 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount); #endif #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095..497451e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4f977d0..15a016c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -49,7 +49,6 @@ enum { ec_schedule = 0, - ec_call_function, ec_call_function_single, ec_stop_cpu, }; @@ -438,8 +437,6 @@ static void smp_handle_ext_call(void) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) scheduler_ipi(); - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); } @@ -456,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65..40b4c64 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1c01a99..3074475 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + trace_kvm_s390_handle_diag(vcpu, code); switch (code) { case 0x10: diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b7d1b2e..5ee56e5 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,87 +22,6 @@ #include "trace.h" #include "trace-s390.h" -static int handle_lctlg(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - int reg, rc; - - vcpu->stat.instruction_lctlg++; - - useraddr = kvm_s390_get_base_disp_rsy(vcpu); - - if (useraddr & 7) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - reg = reg1; - - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); - - do { - rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], - (u64 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - useraddr += 8; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static int handle_lctl(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - u32 val = 0; - int reg, rc; - - vcpu->stat.instruction_lctl++; - - useraddr = kvm_s390_get_base_disp_rs(vcpu); - - if (useraddr & 3) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); - - reg = reg1; - do { - rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - useraddr += 4; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x8a] = kvm_s390_handle_priv_eb, -}; - -static int handle_eb(struct kvm_vcpu *vcpu) -{ - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; -} static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, @@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = { [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, - [0xb7] = handle_lctl, + [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, - [0xeb] = handle_eb, + [0xeb] = kvm_s390_handle_eb, }; static int handle_noop(struct kvm_vcpu *vcpu) @@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { - unsigned long vmaddr; int viwhy = vcpu->arch.sie_block->ipb >> 16; - int rc; vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - if (viwhy == 0x37) { - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - } else - rc = -EOPNOTSUPP; - -out: - if (rc) - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return rc; + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c94817..7f35cb3 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->arch.local_int.wq, &wait); + add_wait_queue(&vcpu->wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && (!vcpu->arch.local_int.timer_due) && @@ -452,7 +452,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); @@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm) spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->arch.local_int.wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock(&vcpu->arch.local_int.lock); } @@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) spin_lock_bh(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); - BUG_ON(waitqueue_active(&li->wq)); + BUG_ON(waitqueue_active(li->wq)); spin_unlock_bh(&li->lock); return 0; } @@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, li = fi->local_int[sigcpu]; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c68..ba694d2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, @@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { }; static unsigned long long *facilities; +static struct gmap_notifier gmap_notifier; /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) @@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage) { } +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + int kvm_arch_hardware_setup(void) { + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { + gmap_unregister_ipte_notifier(&gmap_notifier); } void kvm_arch_check_processor_compat(void *rtn) @@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.gmap = gmap_alloc(current->mm); if (!kvm->arch.gmap) goto out_nogmap; + kvm->arch.gmap->private = kvm; } kvm->arch.css_support = 0; @@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; return 0; } @@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | - CPUSTAT_STOPPED); + CPUSTAT_STOPPED | + CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); @@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = -ENOMEM; - vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) goto out; @@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; - init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; spin_unlock(&kvm->arch.float_int.lock); @@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); out: return ERR_PTR(rc); } @@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ @@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + vcpu->arch.sie_block->prefix, + PAGE_SIZE * 2); + if (rc) + return rc; + s390_vcpu_unblock(vcpu); + } + return 0; +} + static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc; @@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) if (!kvm_is_ucontrol(vcpu->kvm)) kvm_s390_deliver_pending_interrupts(vcpu); + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + vcpu->arch.sie_block->icptcode = 0; preempt_disable(); kvm_guest_enter(); @@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) trace_kvm_s390_sie_enter(vcpu, atomic_read(&vcpu->arch.sie_block->cpuflags)); rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - if (rc) { + if (rc > 0) + rc = 0; + if (rc < 0) { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; } else { @@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[0] &= 0xff82fff3f47c0000ULL; facilities[1] &= 0x001c000000000000ULL; return 0; } @@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void) module_init(kvm_s390_init); module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index efc14f6..028ca9f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) @@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) +{ + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} + static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; @@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5..0da3e6e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,7 +1,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -20,6 +20,9 @@ #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> #include "gaccess.h" @@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_spx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stpx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stap++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + useraddr = kvm_s390_get_base_disp_s(vcpu); if (useraddr & 1) @@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int handle_skey(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_storage_key++; - vcpu->arch.sie_block->gpsw.addr -= 4; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) + || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) + || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } else { /* * Store the three-word I/O interruption code into @@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + if (vcpu->kvm->arch.css_support) { /* * Most I/O instructions will be handled by userspace. @@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_stfl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; + facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); @@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) u64 addr; if (gpsw->mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stidp++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); if (operand2 & 7) @@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); - operand2 = kvm_s390_get_base_disp_s(vcpu); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (fc > 3) { + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ + return 0; + } - if (operand2 & 0xfff && fc > 0) + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - switch (fc) { - case 0: + if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: mem = get_zeroed_page(GFP_KERNEL); @@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; - default: - goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { @@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) intercept_handler_t handler; /* - * a lot of B2 instructions are priviledged. We first check for - * the privileged ones, that we can handle in the kernel. If the - * kernel can handle this instruction, we check for the problem - * state bit and (a) handle the instruction or (b) send a code 2 - * program check. - * Anything else goes to userspace.*/ + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } @@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu) { int reg1, reg2; - reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; - reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; @@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu) return 0; } +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && + S390_lowcore.stfl_fac_list & 0x00020000) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr; + + useraddr = gmap_translate(start, vcpu->arch.gmap); + if (IS_ERR((void *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; + } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; + return 0; +} + static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, [0x9c] = handle_io_inst, + [0xaf] = handle_pfmf, }; int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) @@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) /* This is handled just as for the B2 instructions. */ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if ((handler != handle_epsw) && - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rs(vcpu); + + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); + + reg = reg1; + do { + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rsy(vcpu); + + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); + + do { + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, [0x8a] = handle_io_inst, }; -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { intercept_handler_t handler; - /* All eb instructions that end up here are privileged. */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; if (handler) return handler(vcpu); @@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu) vcpu->stat.instruction_tprot++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); /* we only handle the Linux memory detection case: @@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) u32 value; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) return kvm_s390_inject_program_int(vcpu, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1c48ab2..bec398c 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); @@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); @@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); out: spin_unlock_bh(&li->lock); @@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); @@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) /* sigp in userspace can exit */ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4..ce36ea8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -135,30 +135,17 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); @@ -166,13 +153,14 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a938b54..a8154a1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) entry = *ptep; if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= RCP_IN_BIT; + pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); start += PAGE_SIZE; len -= PAGE_SIZE; @@ -771,6 +771,54 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits; + unsigned char skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Set storage key ACC and FP */ + page_set_storage_key(address, + (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), + !nq); + + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + /* Transfer skey changed & referenced bit to kvm user bits */ + pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + #else /* CONFIG_PGSTE */ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, @@ -1117,7 +1165,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1131,7 +1180,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 1912f3b..0022e1e 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -81,8 +81,8 @@ struct hws_data_entry { unsigned int:16; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ - unsigned long long lpp; /* Logical-Partition Program Param. */ - unsigned long long vpp; /* Virtual-Machine Program Param. */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ }; struct hws_trailer_entry { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index f1e5be8..e2956ad 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -82,10 +82,13 @@ struct intr_bucket { static struct intr_bucket *bucket; -/* Adapter local summary indicator */ -static u8 *zpci_irq_si; +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); -static atomic_t irq_retries = ATOMIC_INIT(0); +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; /* I/O Map */ static DEFINE_SPINLOCK(zpci_iomap_lock); @@ -404,7 +407,7 @@ static struct pci_ops pci_root_ops = { /* store the last handled bit to implement fair scheduling of devices */ static DEFINE_PER_CPU(unsigned long, next_sbit); -static void zpci_irq_handler(void *dont, void *need) +static void zpci_irq_handler(struct airq_struct *airq) { unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); int rescan = 0, max = aisb_max; @@ -452,7 +455,6 @@ scan: max = aisb_max; sbit = find_first_bit_left(bucket->aisb, max); if (sbit != max) { - atomic_inc(&irq_retries); rescan++; goto scan; } @@ -565,7 +567,21 @@ static void zpci_map_resources(struct zpci_dev *zdev) pr_debug("BAR%i: -> start: %Lx end: %Lx\n", i, pdev->resource[i].start, pdev->resource[i].end); } -}; +} + +static void zpci_unmap_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pci_iounmap(pdev, (void *) pdev->resource[i].start); + } +} struct zpci_dev *zpci_alloc_device(void) { @@ -701,25 +717,20 @@ static int __init zpci_irq_init(void) goto out_alloc; } - isc_register(PCI_ISC); - zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC); - if (IS_ERR(zpci_irq_si)) { - rc = PTR_ERR(zpci_irq_si); - zpci_irq_si = NULL; + rc = register_adapter_interrupt(&zpci_airq); + if (rc) goto out_ai; - } + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; for_each_online_cpu(cpu) per_cpu(next_sbit, cpu) = 0; spin_lock_init(&bucket->lock); - /* set summary to 1 to be called every time for the ISC */ - *zpci_irq_si = 1; set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: - isc_unregister(PCI_ISC); free_page((unsigned long) bucket->alloc); out_alloc: free_page((unsigned long) bucket->aisb); @@ -732,21 +743,10 @@ static void zpci_irq_exit(void) { free_page((unsigned long) bucket->alloc); free_page((unsigned long) bucket->aisb); - s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC); - isc_unregister(PCI_ISC); + unregister_adapter_interrupt(&zpci_airq); kfree(bucket); } -void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m) -{ - if (!zdev) - return; - - seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries)); - seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n", - get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb); -} - static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, unsigned long flags, int domain) { @@ -810,6 +810,16 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } +void pcibios_release_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; @@ -950,25 +960,6 @@ void zpci_stop_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_stop_device); -int zpci_scan_device(struct zpci_dev *zdev) -{ - zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN); - if (!zdev->pdev) { - pr_err("pci_scan_single_device failed for fid: 0x%x\n", - zdev->fid); - goto out; - } - - pci_bus_add_devices(zdev->bus); - - return 0; -out: - zpci_dma_exit_device(zdev); - clp_disable_fh(zdev); - return -EIO; -} -EXPORT_SYMBOL_GPL(zpci_scan_device); - static inline int barsize(u8 size) { return (size) ? (1 << size) >> 10 : 0; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index bd34359..2e95396 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh); rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); if (!rc) /* Success -> store disabled handle in zdev */ diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 771b823..75c69b4 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -static int pci_debug_show(struct seq_file *m, void *v) -{ - struct zpci_dev *zdev = m->private; - - zpci_debug_info(zdev, m); - return 0; -} - -static int pci_debug_seq_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, pci_debug_show, - file_inode(filp)->i_private); -} - -static const struct file_operations debugfs_pci_debug_fops = { - .open = pci_debug_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void zpci_debug_init_device(struct zpci_dev *zdev) { zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), @@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev) &debugfs_pci_perf_fops); if (IS_ERR(zdev->debugfs_perf)) zdev->debugfs_perf = NULL; - - zdev->debugfs_debug = debugfs_create_file("debug", - S_IFREG | S_IRUGO | S_IWUSR, - zdev->debugfs_dev, zdev, - &debugfs_pci_debug_fops); - if (IS_ERR(zdev->debugfs_debug)) - zdev->debugfs_debug = NULL; } void zpci_debug_exit_device(struct zpci_dev *zdev) { debugfs_remove(zdev->debugfs_perf); - debugfs_remove(zdev->debugfs_debug); debugfs_remove(zdev->debugfs_dev); } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f8e69d5..a2343c1 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -263,7 +263,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; @@ -304,7 +304,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long iommu_page_index; int npages; @@ -323,7 +323,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); struct page *page; unsigned long pa; dma_addr_t map; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a42cce6..e99a255 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -15,40 +15,36 @@ static ssize_t show_fid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fid); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fid); } static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL); static ssize_t show_fh(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fh); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fh); } static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL); static ssize_t show_pchid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%04x\n", zdev->pchid); - return strlen(buf); + return sprintf(buf, "0x%04x\n", zdev->pchid); } static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL); static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%02x\n", zdev->pfgid); - return strlen(buf); + return sprintf(buf, "0x%02x\n", zdev->pfgid); } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); |