diff options
72 files changed, 1135 insertions, 1116 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6967add..37abe86 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -64,6 +64,7 @@ config ARCH_SUPPORTS_UPROBES config S390 def_bool y + select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL @@ -184,7 +185,7 @@ config SCHED_OMIT_FRAME_POINTER config PGTABLE_LEVELS int - default 4 + default 5 source "init/Kconfig" diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 678d986..ad4bd77 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o -obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o +obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o +obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 9317b3e..36aefc0 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/atomic.h> +#include <linux/random.h> #include <linux/static_key.h> #include <asm/cpacf.h> diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 45092b1..b3c8847 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,10 +1,12 @@ generic-y += asm-offsets.h generic-y += cacheflush.h generic-y += clkdev.h +generic-y += device.h generic-y += dma-contiguous.h generic-y += div64.h generic-y += emergency-restart.h generic-y += export.h +generic-y += fb.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kmap_types.h diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h deleted file mode 100644 index 5203fc8..0000000 --- a/arch/s390/include/asm/device.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -struct dev_archdata { -}; - -struct pdev_archdata { -}; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index e8f6230..ec024c0 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -117,6 +117,9 @@ #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 +/* s390 specific phdr types */ +#define PT_S390_PGSTE 0x70000000 + /* * ELF register definitions.. */ @@ -151,6 +154,35 @@ extern unsigned int vdso_enabled; && (x)->e_ident[EI_CLASS] == ELF_CLASS) #define compat_start_thread start_thread31 +struct arch_elf_state { + int rc; +}; + +#define INIT_ARCH_ELF_STATE { .rc = 0 } + +#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0) +#ifdef CONFIG_PGSTE +#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ +({ \ + struct arch_elf_state *_state = state; \ + if ((phdr)->p_type == PT_S390_PGSTE && \ + !page_table_allocate_pgste && \ + !test_thread_flag(TIF_PGSTE) && \ + !current->mm->context.alloc_pgste) { \ + set_thread_flag(TIF_PGSTE); \ + set_pt_regs_flag(task_pt_regs(current), \ + PIF_SYSCALL_RESTART); \ + _state->rc = -EAGAIN; \ + } \ + _state->rc; \ +}) +#else +#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ +({ \ + (state)->rc; \ +}) +#endif + /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ #define ELF_PLAT_INIT(_r, load_addr) \ diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h deleted file mode 100644 index c7df380..0000000 --- a/arch/s390/include/asm/fb.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ -#include <linux/fb.h> - -#define fb_pgprotect(...) do {} while (0) - -static inline int fb_is_primary_device(struct fb_info *info) -{ - return 0; -} - -#endif /* _ASM_FB_H_ */ diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 437e9af..904e4b3 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -25,8 +25,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 -#ifdef CONFIG_PCI - #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache #define ioremap_wt ioremap_nocache @@ -49,6 +47,8 @@ static inline void ioport_unmap(void __iomem *p) { } +#ifdef CONFIG_PCI + /* * s390 needs a private implementation of pci_iomap since ioremap with its * offset parameter isn't sufficient. That's because BAR spaces are not diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 65d07ac..6baae23 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -107,6 +107,20 @@ struct esca_block { struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; } __packed; +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -264,7 +278,8 @@ struct kvm_s390_itdb { struct sie_page { struct kvm_s390_sie_block sie_block; - __u8 reserved200[1024]; /* 0x0200 */ + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[1000]; /* 0x0218 */ struct kvm_s390_itdb itdb; /* 0x0600 */ __u8 reserved700[2304]; /* 0x0700 */ } __packed; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8712e11..4541ac4 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -25,7 +25,9 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE - mm->context.alloc_pgste = page_table_allocate_pgste; + mm->context.alloc_pgste = page_table_allocate_pgste || + test_thread_flag(TIF_PGSTE) || + current->mm->context.alloc_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index e3e8895..13623b9 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -14,7 +14,14 @@ #include <linux/const.h> #include <linux/types.h> +#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \ + 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \ + 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \ + 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \ + 1ULL<<45 | 1ULL<<44) #define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63) +#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5) +#define MCCK_CODE_CP _BITUL(63 - 9) #define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46) #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 69b8a41..624deaa 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long p4d; } p4d_t; typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; @@ -82,12 +83,14 @@ typedef pte_t *pgtable_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) +#define p4d_val(x) ((x).p4d) #define pgd_val(x) ((x).pgd) #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) } ) +#define __p4d(x) ((p4d_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 4e31866..f36b4b7 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -70,11 +70,10 @@ struct zpci_fmb { } __packed __aligned(128); enum zpci_state { - ZPCI_FN_STATE_RESERVED, - ZPCI_FN_STATE_STANDBY, - ZPCI_FN_STATE_CONFIGURED, - ZPCI_FN_STATE_ONLINE, - NR_ZPCI_FN_STATES, + ZPCI_FN_STATE_STANDBY = 0, + ZPCI_FN_STATE_CONFIGURED = 1, + ZPCI_FN_STATE_RESERVED = 2, + ZPCI_FN_STATE_ONLINE = 3, }; struct zpci_bar_struct { @@ -109,7 +108,7 @@ struct zpci_dev { u64 msi_addr; /* MSI address */ unsigned int max_msi; /* maximum number of MSI's */ struct airq_iv *aibv; /* adapter interrupt bit vector */ - unsigned int aisb; /* number of the summary bit */ + unsigned long aisb; /* number of the summary bit */ /* DMA stuff */ unsigned long *dma_table; @@ -159,11 +158,12 @@ extern const struct attribute_group *zpci_attr_groups[]; ----------------------------------------------------------------------------- */ /* Base stuff */ int zpci_create_device(struct zpci_dev *); +void zpci_remove_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); -void zpci_stop_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); +void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); @@ -172,6 +172,7 @@ int clp_rescan_pci_devices_simple(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); +int clp_get_state(u32 fid, enum zpci_state *state); #ifdef CONFIG_PCI /* Error handling and recovery */ diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 649eb62..34abcf2 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -76,7 +76,7 @@ struct zpci_fib { u32 gd; } __packed __aligned(8); -int zpci_mod_fc(u64 req, struct zpci_fib *fib); +u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status); int zpci_refresh_trans(u64 fn, u64 addr, u64 range); int zpci_load(u64 *data, u64 req, u64 offset); int zpci_store(u64 data, u64 req, u64 offset); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 166f703..bb0ff1b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _SEGMENT_ENTRY_EMPTY; if (mm->context.asce_limit <= (1UL << 42)) return _REGION3_ENTRY_EMPTY; - return _REGION2_ENTRY_EMPTY; + if (mm->context.asce_limit <= (1UL << 53)) + return _REGION2_ENTRY_EMPTY; + return _REGION1_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *); +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); void crst_table_downgrade(struct mm_struct *); +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) +{ + unsigned long *table = crst_table_alloc(mm); + + if (table) + crst_table_init(table, _REGION2_ENTRY_EMPTY); + return (p4d_t *) table; +} +#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); @@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) crst_table_free(mm, (unsigned long *) pmd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); +} + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); + p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e6e3b88..57057fb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,7 +24,6 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ -#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> @@ -87,12 +86,15 @@ extern unsigned long zero_page_mask; */ #define PMD_SHIFT 20 #define PUD_SHIFT 31 -#define PGDIR_SHIFT 42 +#define P4D_SHIFT 42 +#define PGDIR_SHIFT 53 #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -105,6 +107,7 @@ extern unsigned long zero_page_mask; #define PTRS_PER_PTE 256 #define PTRS_PER_PMD 2048 #define PTRS_PER_PUD 2048 +#define PTRS_PER_P4D 2048 #define PTRS_PER_PGD 2048 #define FIRST_USER_ADDRESS 0UL @@ -115,6 +118,8 @@ extern unsigned long zero_page_mask; printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) #define pud_ERROR(e) \ printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) +#define p4d_ERROR(e) \ + printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) @@ -296,8 +301,6 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ -#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */ - #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ @@ -310,8 +313,8 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif -#define _REGION_ENTRY_BITS 0xfffffffffffff227UL -#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL +#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -560,18 +563,23 @@ static inline void crdte(unsigned long old, unsigned long new, } /* - * pgd/pmd/pte query functions + * pgd/p4d/pud/pmd/pte query functions */ +static inline int pgd_folded(pgd_t pgd) +{ + return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1; +} + static inline int pgd_present(pgd_t pgd) { - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + if (pgd_folded(pgd)) return 1; return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pgd_none(pgd_t pgd) { - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + if (pgd_folded(pgd)) return 0; return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } @@ -589,16 +597,48 @@ static inline int pgd_bad(pgd_t pgd) return (pgd_val(pgd) & mask) != 0; } +static inline int p4d_folded(p4d_t p4d) +{ + return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2; +} + +static inline int p4d_present(p4d_t p4d) +{ + if (p4d_folded(p4d)) + return 1; + return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int p4d_none(p4d_t p4d) +{ + if (p4d_folded(p4d)) + return 0; + return p4d_val(p4d) == _REGION2_ENTRY_EMPTY; +} + +static inline unsigned long p4d_pfn(p4d_t p4d) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT; +} + +static inline int pud_folded(pud_t pud) +{ + return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3; +} + static inline int pud_present(pud_t pud) { - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + if (pud_folded(pud)) return 1; return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pud_none(pud_t pud) { - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + if (pud_folded(pud)) return 0; return pud_val(pud) == _REGION3_ENTRY_EMPTY; } @@ -614,7 +654,7 @@ static inline unsigned long pud_pfn(pud_t pud) { unsigned long origin_mask; - origin_mask = _REGION3_ENTRY_ORIGIN; + origin_mask = _REGION_ENTRY_ORIGIN; if (pud_large(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; @@ -641,6 +681,13 @@ static inline int pud_bad(pud_t pud) return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; } +static inline int p4d_bad(p4d_t p4d) +{ + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return pud_bad(__pud(p4d_val(p4d))); + return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; +} + static inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; @@ -794,8 +841,14 @@ static inline int pte_unused(pte_t pte) static inline void pgd_clear(pgd_t *pgd) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; +} + +static inline void p4d_clear(p4d_t *p4d) +{ + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; } static inline void pud_clear(pud_t *pud) @@ -1089,6 +1142,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) } #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) @@ -1098,19 +1152,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) +#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { - pud_t *pud = (pud_t *) pgd; - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pud = (pud_t *) pgd_deref(*pgd); - return pud + pud_index(address); + p4d_t *p4d = (p4d_t *) pgd; + + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + p4d = (p4d_t *) pgd_deref(*pgd); + return p4d + p4d_index(address); +} + +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + pud_t *pud = (pud_t *) p4d; + + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pud = (pud_t *) p4d_deref(*p4d); + return pud + pud_index(address); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pmd = (pmd_t *) pud_deref(*pud); return pmd + pmd_index(address); @@ -1122,6 +1188,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) #define pud_page(pud) pfn_to_page(pud_pfn(pud)) +#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index aeac013..c25d57e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -20,6 +20,7 @@ #define CIF_FPU 4 /* restore FPU registers */ #define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */ #define CIF_ENABLED_WAIT 6 /* in enabled wait state */ +#define CIF_MCCK_GUEST 7 /* machine check happening in guest */ #define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) #define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY) @@ -28,6 +29,7 @@ #define _CIF_FPU _BITUL(CIF_FPU) #define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) #define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) +#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST) #ifndef __ASSEMBLY__ @@ -92,11 +94,11 @@ extern void execve_tail(void); */ #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ - (1UL << 31) : (1UL << 53)) + (1UL << 31) : -PAGE_SIZE) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) -#define TASK_SIZE_MAX (1UL << 53) +#define TASK_SIZE_MAX (-PAGE_SIZE) #define STACK_TOP (test_thread_flag(TIF_31BIT) ? \ (1UL << 31) : (1UL << 42)) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 99bc456..853b012 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -11,9 +11,11 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ +#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ #define _PIF_SYSCALL _BITUL(PIF_SYSCALL) #define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) +#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) #ifndef __ASSEMBLY__ @@ -24,38 +26,38 @@ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) struct psw_bits { - unsigned long : 1; - unsigned long r : 1; /* PER-Mask */ - unsigned long : 3; - unsigned long t : 1; /* DAT Mode */ - unsigned long i : 1; /* Input/Output Mask */ - unsigned long e : 1; /* External Mask */ - unsigned long key : 4; /* PSW Key */ - unsigned long : 1; - unsigned long m : 1; /* Machine-Check Mask */ - unsigned long w : 1; /* Wait State */ - unsigned long p : 1; /* Problem State */ - unsigned long as : 2; /* Address Space Control */ - unsigned long cc : 2; /* Condition Code */ - unsigned long pm : 4; /* Program Mask */ - unsigned long ri : 1; /* Runtime Instrumentation */ - unsigned long : 6; - unsigned long eaba : 2; /* Addressing Mode */ - unsigned long : 31; - unsigned long ia : 64; /* Instruction Address */ + unsigned long : 1; + unsigned long per : 1; /* PER-Mask */ + unsigned long : 3; + unsigned long dat : 1; /* DAT Mode */ + unsigned long io : 1; /* Input/Output Mask */ + unsigned long ext : 1; /* External Mask */ + unsigned long key : 4; /* PSW Key */ + unsigned long : 1; + unsigned long mcheck : 1; /* Machine-Check Mask */ + unsigned long wait : 1; /* Wait State */ + unsigned long pstate : 1; /* Problem State */ + unsigned long as : 2; /* Address Space Control */ + unsigned long cc : 2; /* Condition Code */ + unsigned long pm : 4; /* Program Mask */ + unsigned long ri : 1; /* Runtime Instrumentation */ + unsigned long : 6; + unsigned long eaba : 2; /* Addressing Mode */ + unsigned long : 31; + unsigned long ia : 64; /* Instruction Address */ }; enum { - PSW_AMODE_24BIT = 0, - PSW_AMODE_31BIT = 1, - PSW_AMODE_64BIT = 3 + PSW_BITS_AMODE_24BIT = 0, + PSW_BITS_AMODE_31BIT = 1, + PSW_BITS_AMODE_64BIT = 3 }; enum { - PSW_AS_PRIMARY = 0, - PSW_AS_ACCREG = 1, - PSW_AS_SECONDARY = 2, - PSW_AS_HOME = 3 + PSW_BITS_AS_PRIMARY = 0, + PSW_BITS_AS_ACCREG = 1, + PSW_BITS_AS_SECONDARY = 2, + PSW_BITS_AS_HOME = 3 }; #define psw_bits(__psw) (*({ \ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 72df5f2..020a881 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -59,7 +59,7 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, int cc; cc = ____pcpu_sigp(addr, order, parm, &_status); - if (status && cc == 1) + if (status && cc == SIGP_CC_STATUS_STORED) *status = _status; return cc; } diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0b3ee08..1aecf43 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -58,6 +58,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_UPROBE 3 /* breakpointed or single-stepping */ #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ +#define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 853b2a3..7317b31 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -137,6 +137,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, } /* + * p4d_free_tlb frees a pud table and clears the CRSTE for the + * region second table entry from the tlb. + * If the mm uses a four level page table the single p4d is freed + * as the pgd. p4d_free_tlb checks the asce_limit against 8PB + * to avoid the double free of the p4d in this case. + */ +static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, + unsigned long address) +{ + if (tlb->mm->context.asce_limit <= (1UL << 53)) + return; + tlb_remove_table(tlb, p4d); +} + +/* * pud_free_tlb frees a pud table and clears the CRSTE for the * region third table entry from the tlb. * If the mm uses a three level page table the single pud is freed diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6bb2963..b65c414 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -58,6 +58,9 @@ int main(void) OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); OFFSET(__SF_GPRS, stack_frame, gprs); OFFSET(__SF_EMPTY, stack_frame, empty1); + OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); + OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); + OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); BLANK(); /* timeval/timezone offsets for use by vdso */ OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 829e1c5..dab78ba 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -98,8 +98,10 @@ static int show_address(void *data, unsigned long address, int reliable) return 0; } -static void show_trace(struct task_struct *task, unsigned long sp) +void show_stack(struct task_struct *task, unsigned long *stack) { + unsigned long sp = (unsigned long) stack; + if (!sp) sp = task ? task->thread.ksp : current_stack_pointer(); printk("Call Trace:\n"); @@ -109,29 +111,6 @@ static void show_trace(struct task_struct *task, unsigned long sp) debug_show_held_locks(task); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - unsigned long *stack; - int i; - - stack = sp; - if (!stack) { - if (!task) - stack = (unsigned long *)current_stack_pointer(); - else - stack = (unsigned long *)task->thread.ksp; - } - printk(KERN_DEFAULT "Stack:\n"); - for (i = 0; i < 20; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if (i % 4 == 0) - printk(KERN_DEFAULT " "); - pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' '); - } - show_trace(task, (unsigned long)sp); -} - static void show_last_breaking_event(struct pt_regs *regs) { printk("Last Breaking-Event-Address:\n"); @@ -149,8 +128,8 @@ void show_registers(struct pt_regs *regs) pr_cont(" (%pSR)", (void *)regs->psw.addr); pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, - psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext, + psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm); pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); @@ -169,7 +148,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15]); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6315037..21900e1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,7 +52,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ _CIF_ASCE_SECONDARY | _CIF_FPU) -_PIF_WORK = (_PIF_PER_TRAP) +_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) #define BASED(name) name-cleanup_critical(%r13) @@ -225,6 +225,7 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed +.Lsie_entry: sie 0(%r14) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE @@ -334,6 +335,8 @@ ENTRY(system_call) jo .Lsysc_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART + jo .Lsysc_syscall_restart #ifdef CONFIG_UPROBES TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify @@ -347,6 +350,8 @@ ENTRY(system_call) jo .Lsysc_patch_pending # handle live patching just before # signals and possible syscall restart #endif + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART + jo .Lsysc_syscall_restart TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME @@ -448,6 +453,15 @@ ENTRY(system_call) jg do_per_trap # +# _PIF_SYSCALL_RESTART is set, repeat the current system call +# +.Lsysc_syscall_restart: + ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART + lmg %r1,%r7,__PT_R1(%r11) # load svc arguments + lg %r2,__PT_ORIG_GPR2(%r11) + j .Lsysc_do_svc + +# # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before # and after the system call # @@ -881,9 +895,7 @@ ENTRY(save_fpu_regs) oi __LC_CPU_FLAGS+7,_CIF_FPU br %r14 .Lsave_fpu_regs_end: -#if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(save_fpu_regs) -#endif /* * Load floating-point controls and floating-point or vector registers. @@ -1111,7 +1123,13 @@ cleanup_critical: .quad .Lsie_done .Lcleanup_sie: - lg %r9,__SF_EMPTY(%r15) # get control block pointer + cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? + je 1f + slg %r9,BASED(.Lsie_crit_mcck_start) + clg %r9,BASED(.Lsie_crit_mcck_length) + jh 1f + oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST +1: lg %r9,__SF_EMPTY(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit @@ -1296,6 +1314,10 @@ cleanup_critical: .quad .Lsie_gmap .Lsie_critical_length: .quad .Lsie_done - .Lsie_gmap +.Lsie_crit_mcck_start: + .quad .Lsie_entry +.Lsie_crit_mcck_length: + .quad .Lsie_skip - .Lsie_entry #endif .section .rodata, "a" diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9855895..31d03a8 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -25,6 +25,8 @@ #include <asm/crw.h> #include <asm/switch_to.h> #include <asm/ctl_reg.h> +#include <asm/asm-offsets.h> +#include <linux/kvm_host.h> struct mcck_struct { unsigned int kill_task : 1; @@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode) return kill_task; } +/* + * Backup the guest's machine check info to its description block + */ +static void notrace s390_backup_mcck_info(struct pt_regs *regs) +{ + struct mcck_volatile_info *mcck_backup; + struct sie_page *sie_page; + + /* r14 contains the sie block, which was set in sie64a */ + struct kvm_s390_sie_block *sie_block = + (struct kvm_s390_sie_block *) regs->gprs[14]; + + if (sie_block == NULL) + /* Something's seriously wrong, stop system. */ + s390_handle_damage(); + + sie_page = container_of(sie_block, struct sie_page, sie_block); + mcck_backup = &sie_page->mcck_info; + mcck_backup->mcic = S390_lowcore.mcck_interruption_code & + ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE); + mcck_backup->ext_damage_code = S390_lowcore.external_damage_code; + mcck_backup->failing_storage_address + = S390_lowcore.failing_storage_address; +} + #define MAX_IPD_COUNT 29 #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ #define ED_STP_ISLAND 6 /* External damage STP island check */ #define ED_STP_SYNC 7 /* External damage STP sync check */ +#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE) + /* * machine check handler. */ @@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) struct mcck_struct *mcck; unsigned long long tmp; union mci mci; + unsigned long mcck_dam_code; nmi_enter(); inc_irq_stat(NMI_NMI); @@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs) /* System damage -> stopping machine */ s390_handle_damage(); } - if (mci.pd) { + + /* + * Reinject the instruction processing damages' machine checks + * including Delayed Access Exception into the guest + * instead of damaging the host if they happen in the guest. + */ + if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) { if (mci.b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; @@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->mcck_code = mci.val; set_cpu_flag(CIF_MCCK_PENDING); } + + /* + * Backup the machine check's info if it happens when the guest + * is running. + */ + if (test_cpu_flag(CIF_MCCK_GUEST)) + s390_backup_mcck_info(regs); + if (mci.cd) { /* Timing facility damage */ s390_handle_damage(); @@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mcck->stp_queue) set_cpu_flag(CIF_MCCK_PENDING); } - if (mci.se) - /* Storage error uncorrected */ - s390_handle_damage(); - if (mci.ke) - /* Storage key-error uncorrected */ - s390_handle_damage(); - if (mci.ds && mci.fa) - /* Storage degradation */ - s390_handle_damage(); + + /* + * Reinject storage related machine checks into the guest if they + * happen when the guest is running. + */ + if (!test_cpu_flag(CIF_MCCK_GUEST)) { + if (mci.se) + /* Storage error uncorrected */ + s390_handle_damage(); + if (mci.ke) + /* Storage key-error uncorrected */ + s390_handle_damage(); + if (mci.ds && mci.fa) + /* Storage degradation */ + s390_handle_damage(); + } if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; @@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->warning = 1; set_cpu_flag(CIF_MCCK_PENDING); } + + /* + * If there are only Channel Report Pending and External Damage + * machine checks, they will not be reinjected into the guest + * because they refer to host conditions only. + */ + mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); + if (test_cpu_flag(CIF_MCCK_GUEST) && + (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { + /* Set exit reason code for host's later handling */ + *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + } + clear_cpu_flag(CIF_MCCK_GUEST); nmi_exit(); } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ca960d0..0c82f79 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -995,11 +995,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - psw_bits(regs.psw).ia = sfr->basic.ia; - psw_bits(regs.psw).t = sfr->basic.T; - psw_bits(regs.psw).w = sfr->basic.W; - psw_bits(regs.psw).p = sfr->basic.P; - psw_bits(regs.psw).as = sfr->basic.AS; + psw_bits(regs.psw).ia = sfr->basic.ia; + psw_bits(regs.psw).dat = sfr->basic.T; + psw_bits(regs.psw).wait = sfr->basic.W; + psw_bits(regs.psw).per = sfr->basic.P; + psw_bits(regs.psw).as = sfr->basic.AS; /* * Use the hardware provided configuration level to decide if the diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 955a7b6..93a386f 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -245,6 +245,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%04llx,name=%s\n", - pmu_attr->id, attr->attr.name); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 488c5bb..252ed61 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1160,6 +1160,8 @@ static int s390_gs_cb_get(struct task_struct *target, return -ENODEV; if (!data) return -ENODATA; + if (target == current) + save_gs_cb(data); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, sizeof(struct gs_cb)); } @@ -1170,6 +1172,7 @@ static int s390_gs_cb_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { struct gs_cb *data = target->thread.gs_cb; + int rc; if (!MACHINE_HAS_GS) return -ENODEV; @@ -1177,10 +1180,18 @@ static int s390_gs_cb_set(struct task_struct *target, data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->gsd = 25; target->thread.gs_cb = data; + if (target == current) + __ctl_set_bit(2, 4); + } else if (target == current) { + save_gs_cb(data); } - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); + if (target == current) + restore_gs_cb(data); + return rc; } static int s390_gs_bc_get(struct task_struct *target, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 363000a..1020a11 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -26,6 +26,7 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/kernel_stat.h> +#include <linux/kmemleak.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqflags.h> @@ -207,6 +208,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); if (!mcesa_origin) goto out; + /* The pointer is stored with mcesa_bits ORed in */ + kmemleak_not_leak((void *) mcesa_origin); mcesa_bits = MACHINE_HAS_GS ? 11 : 0; } } else { diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index f787b9d..442e542 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -21,6 +21,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/cpu.h> #include <asm/fpu/api.h> #include "entry.h" diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 314e0ee..d94baa8 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -27,12 +27,12 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) return -EINVAL; - if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) + if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; clear_pt_regs_flag(regs, PIF_PER_TRAP); - auprobe->saved_per = psw_bits(regs->psw).r; + auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; regs->psw.addr = current->utask->xol_vaddr; @@ -81,7 +81,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); update_cr_regs(current); - psw_bits(regs->psw).r = auprobe->saved_per; + psw_bits(regs->psw).per = auprobe->saved_per; regs->int_code = auprobe->saved_int_code; if (fixup & FIXUP_PSW_NORMAL) @@ -372,8 +372,8 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) || - ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) && + if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) || + ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) && !is_compat_task())) { regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10516ae..b89d19f6 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -50,6 +50,56 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; +static int vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page **vdso_pagelist; + unsigned long vdso_pages; + + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif + + if (vmf->pgoff >= vdso_pages) + return VM_FAULT_SIGBUS; + + vmf->page = vdso_pagelist[vmf->pgoff]; + get_page(vmf->page); + return 0; +} + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *vma) +{ + unsigned long vdso_pages; + + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + vdso_pages = vdso32_pages; +#endif + + if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) + return -EINVAL; + + if (WARN_ON_ONCE(current->mm != vma->vm_mm)) + return -EFAULT; + + current->mm->context.vdso_base = vma->vm_start; + return 0; +} + +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; + static int __init vdso_setup(char *s) { unsigned long val; @@ -181,7 +231,7 @@ static void vdso_init_cr5(void) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; + struct vm_area_struct *vma; unsigned long vdso_pages; unsigned long vdso_base; int rc; @@ -194,13 +244,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!uses_interp) return 0; - vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) { - vdso_pagelist = vdso32_pagelist; + if (is_compat_task()) vdso_pages = vdso32_pages; - } #endif /* * vDSO has a problem and was disabled, just don't "enable" it for @@ -209,8 +256,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (vdso_pages == 0) return 0; - current->mm->context.vdso_base = 0; - /* * pick a base address for the vDSO in process space. We try to put * it at vdso_base which is the "natural" base for it, but we might @@ -225,13 +270,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). - */ - current->mm->context.vdso_base = vdso_base; - - /* * our vma flags don't have VM_WRITE so by default, the process * isn't allowed to write those pages. * gdb can break that with ptrace interface, and thus trigger COW @@ -241,24 +279,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) - current->mm->context.vdso_base = 0; + vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_mapping); + if (IS_ERR(vma)) { + rc = PTR_ERR(vma); + goto out_up; + } + + current->mm->context.vdso_base = vdso_base; + rc = 0; + out_up: up_write(&mm->mmap_sem); return rc; } -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - static int __init vdso_init(void) { int i; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 072d84b..dd7178f 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -110,11 +110,10 @@ static inline u64 scale_vtime(u64 vtime) return vtime; } -static void account_system_index_scaled(struct task_struct *p, - u64 cputime, u64 scaled, +static void account_system_index_scaled(struct task_struct *p, u64 cputime, enum cpu_usage_stat index) { - p->stimescaled += cputime_to_nsecs(scaled); + p->stimescaled += cputime_to_nsecs(scale_vtime(cputime)); account_system_index_time(p, cputime_to_nsecs(cputime), index); } @@ -176,14 +175,11 @@ static int do_account_vtime(struct task_struct *tsk) } if (system) - account_system_index_scaled(tsk, system, scale_vtime(system), - CPUTIME_SYSTEM); + account_system_index_scaled(tsk, system, CPUTIME_SYSTEM); if (hardirq) - account_system_index_scaled(tsk, hardirq, scale_vtime(hardirq), - CPUTIME_IRQ); + account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ); if (softirq) - account_system_index_scaled(tsk, softirq, scale_vtime(softirq), - CPUTIME_SOFTIRQ); + account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ); steal = S390_lowcore.steal_timer; if ((s64) steal > 0) { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 3b297fa..875f8be 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -551,26 +551,26 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); - if (!psw.t) { + if (!psw.dat) { asce->val = 0; asce->r = 1; return 0; } - if (mode == GACC_IFETCH) - psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME)) + psw.as = PSW_BITS_AS_PRIMARY; switch (psw.as) { - case PSW_AS_PRIMARY: + case PSW_BITS_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; - case PSW_AS_SECONDARY: + case PSW_BITS_AS_SECONDARY: asce->val = vcpu->arch.sie_block->gcr[7]; return 0; - case PSW_AS_HOME: + case PSW_BITS_AS_HOME: asce->val = vcpu->arch.sie_block->gcr[13]; return 0; - case PSW_AS_ACCREG: + case PSW_BITS_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); if (rc > 0) return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); @@ -771,7 +771,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, if (!ctlreg0.lap) return 0; - if (psw_bits(*psw).t && asce.p) + if (psw_bits(*psw).dat && asce.p) return 0; return 1; } @@ -790,7 +790,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, PROT_TYPE_LA); ga &= PAGE_MASK; - if (psw_bits(*psw).t) { + if (psw_bits(*psw).dat) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; @@ -831,7 +831,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, pages = vmalloc(nr_pages * sizeof(unsigned long)); if (!pages) return -ENOMEM; - need_ipte_lock = psw_bits(*psw).t && !asce.r; + need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); @@ -899,7 +899,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, mode, PROT_TYPE_LA); } - if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ + if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 7ce47fd..bec42b8 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -57,9 +57,9 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, { psw_t *psw = &vcpu->arch.sie_block->gpsw; - if (psw_bits(*psw).eaba == PSW_AMODE_64BIT) + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) return ga; - if (psw_bits(*psw).eaba == PSW_AMODE_31BIT) + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) return ga & ((1UL << 31) - 1); return ga & ((1UL << 24) - 1); } diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 23d9a4e..c2e0ddc 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -613,15 +613,15 @@ int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) * instruction. Check primary and home space-switch-event * controls. (theoretically home -> home produced no event) */ - if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) && - (pssec(vcpu) || hssec(vcpu))) + if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) && + (pssec(vcpu) || hssec(vcpu))) vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; /* * PT, PTI, PR, PC instruction operate on primary AS only. Check * if the primary-space-switch-event control was or got set. */ - if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) && + if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) && (pssec(vcpu) || old_ssec(vcpu))) vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f28e2e7..b0d7de5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2067,6 +2067,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (!vcpu) goto out; + BUILD_BUG_ON(sizeof(struct sie_page) != 4096); sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); if (!sie_page) goto out_free_cpu; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c03106c..e53292a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -361,7 +361,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) } } if (m3 & SSKE_MB) { - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK; else vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL; @@ -374,7 +374,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) static int handle_ipte_interlock(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_ipte_interlock++; - if (psw_bits(vcpu->arch.sie_block->gpsw).p) + if (psw_bits(vcpu->arch.sie_block->gpsw).pstate) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); kvm_s390_retry_instr(vcpu); @@ -901,7 +901,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) /* only support 2G frame size if EDAT2 is available and we are not in 24-bit addressing mode */ if (!test_kvm_facility(vcpu->kvm, 78) || - psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT) + psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); end = (start + (1UL << 31)) & ~((1UL << 31) - 1); break; @@ -938,7 +938,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start += PAGE_SIZE; } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { vcpu->run->s.regs.gprs[reg2] = end; } else { vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 1b553d8..049c3c4 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } static void walk_pud_level(struct seq_file *m, struct pg_state *st, - pgd_t *pgd, unsigned long addr) + p4d_t *p4d, unsigned long addr) { unsigned int prot; pud_t *pud; @@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { st->current_address = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { prot = pud_val(*pud) & @@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, } } +static void walk_p4d_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + p4d_t *p4d; + int i; + + for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { + st->current_address = addr; + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) + walk_pud_level(m, st, p4d, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += P4D_SIZE; + } +} + static void walk_pgd_level(struct seq_file *m) { unsigned long addr = 0; @@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m) st.current_address = addr; pgd = pgd_offset_k(addr); if (!pgd_none(*pgd)) - walk_pud_level(m, &st, pgd, addr); + walk_p4d_level(m, &st, pgd, addr); else note_page(m, &st, _PAGE_INVALID, 1); addr += PGDIR_SIZE; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 5845d30..14f2579 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -130,7 +130,7 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long asce, unsigned long address) { - unsigned long *table = __va(asce & PAGE_MASK); + unsigned long *table = __va(asce & _ASCE_ORIGIN); pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 7f6db1e..4fb3d3c 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) unsigned long *table; spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; int rc; @@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); + p4d = p4d_offset(pgd, vmaddr); + VM_BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, vmaddr); VM_BUG_ON(pud_none(*pud)); /* large puds cannot yet be handled */ if (pud_large(*pud)) @@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { - void **slot; + void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index b7b779c..8ecc25e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, return 1; } -static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, +static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp, pud; - pudp = (pud_t *) pgdp; - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pudp = (pud_t *) pgd_deref(pgd); + pudp = (pud_t *) p4dp; + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) p4d_deref(p4d); pudp += pud_index(addr); do { pud = *pudp; @@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } +static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp, p4d; + + p4dp = (p4d_t *) pgdp; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + p4dp = (p4d_t *) pgd_deref(pgd); + p4dp += p4d_index(addr); + do { + p4d = *p4dp; + barrier(); + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except its IRQ-safe in that it won't fall * back to the regular GUP. @@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) break; - if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 9b4050c..d3a5e39 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); - pudp = pud_alloc(mm, pgdp, addr); - if (pudp) { - if (sz == PUD_SIZE) - return (pte_t *) pudp; - else if (sz == PMD_SIZE) - pmdp = pmd_alloc(mm, pudp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (p4dp) { + pudp = pud_alloc(mm, p4dp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } } return (pte_t *) pmdp; } @@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { - pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) { - if (pud_large(*pudp)) - return (pte_t *) pudp; - pmdp = pmd_offset(pudp, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_present(*p4dp)) { + pudp = pud_offset(p4dp, addr); + if (pud_present(*pudp)) { + if (pud_large(*pudp)) + return (pte_t *) pudp; + pmdp = pmd_offset(pudp, addr); + } } } return (pte_t *) pmdp; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ee6a1d3..3348e60 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -81,6 +81,7 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long pgd_type, asce_bits; + psw_t psw; init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > (1UL << 42)) { @@ -100,7 +101,10 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); - __arch_local_irq_stosm(0x04); + psw.mask = __extract_psw(); + psw_bits(psw).dat = 1; + psw_bits(psw).as = PSW_BITS_AS_HOME; + __load_psw_mask(psw.mask); sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index b854b1d..2e10d2b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, check_asce_limit: if (addr + len > current->mm->context.asce_limit) { - rc = crst_table_upgrade(mm); + rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; } @@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, check_asce_limit: if (addr + len > current->mm->context.asce_limit) { - rc = crst_table_upgrade(mm); + rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 49e721f..1804815 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } -static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long flags) { unsigned long next; pud_t *pudp; int rc = 0; - pudp = pud_offset(pgd, addr); + pudp = pud_offset(p4d, addr); do { if (pud_none(*pudp)) return -EINVAL; @@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, return rc; } +static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long flags) +{ + unsigned long next; + p4d_t *p4dp; + int rc = 0; + + p4dp = p4d_offset(pgd, addr); + do { + if (p4d_none(*p4dp)) + return -EINVAL; + next = p4d_addr_end(addr, end); + rc = walk_pud_level(p4dp, addr, next, flags); + p4dp++; + addr = next; + cond_resched(); + } while (addr < end && !rc); + return rc; +} + static DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, @@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (pgd_none(*pgdp)) break; next = pgd_addr_end(addr, end); - rc = walk_pud_level(pgdp, addr, next, flags); + rc = walk_p4d_level(pgdp, addr, next, flags); if (rc) break; cond_resched(); @@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) unsigned long address; int nr, i, j; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); nr = (unsigned long)pte >> ilog2(sizeof(long)); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index f502cbe..18918e3 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg) __tlb_flush_local(); } -int crst_table_upgrade(struct mm_struct *mm) +int crst_table_upgrade(struct mm_struct *mm, unsigned long end) { unsigned long *table, *pgd; + int rc, notify; - /* upgrade should only happen from 3 to 4 levels */ - BUG_ON(mm->context.asce_limit != (1UL << 42)); - - table = crst_table_alloc(mm); - if (!table) + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ + BUG_ON(mm->context.asce_limit < (1UL << 42)); + if (end >= TASK_SIZE_MAX) return -ENOMEM; - - spin_lock_bh(&mm->page_table_lock); - pgd = (unsigned long *) mm->pgd; - crst_table_init(table, _REGION2_ENTRY_EMPTY); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->context.asce_limit = 1UL << 53; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - spin_unlock_bh(&mm->page_table_lock); - - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; + rc = 0; + notify = 0; + while (mm->context.asce_limit < end) { + table = crst_table_alloc(mm); + if (!table) { + rc = -ENOMEM; + break; + } + spin_lock_bh(&mm->page_table_lock); + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit == (1UL << 42)) { + crst_table_init(table, _REGION2_ENTRY_EMPTY); + p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + } else { + crst_table_init(table, _REGION1_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = -PAGE_SIZE; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + } + notify = 1; + spin_unlock_bh(&mm->page_table_lock); + } + if (notify) + on_each_cpu(__crst_table_upgrade, mm, 0); + return rc; } void crst_table_downgrade(struct mm_struct *mm) @@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table) struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); switch (mask) { - case 0: /* pmd or pud */ + case 0: /* pmd, pud, or p4d */ free_pages((unsigned long) table, 2); break; case 1: /* lower 2K of a 4K page table */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 947b66a..d4d409b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgste_t pgste; @@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) bool dirty; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return false; + pud = pud_alloc(mm, p4d, addr); if (!pud) return false; pmd = pmd_alloc(mm, pud, addr); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c33c94b..d839896 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_alloc(size, size); } +static inline p4d_t *vmem_p4d_alloc(void) +{ + p4d_t *p4d = NULL; + + p4d = vmem_alloc_pages(2); + if (!p4d) + return NULL; + clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); + return p4d; +} + static inline pud_t *vmem_pud_alloc(void) { pud_t *pud = NULL; @@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size) while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { @@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size) address += PGDIR_SIZE; continue; } - pu_dir = pud_offset(pg_dir, address); + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { + address += P4D_SIZE; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { address += PUD_SIZE; continue; @@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) unsigned long pgt_prot, sgt_prot; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8051df1..7b30af5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -86,6 +86,25 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) return zdev; } +void zpci_remove_reserved_devices(void) +{ + struct zpci_dev *tmp, *zdev; + enum zpci_state state; + LIST_HEAD(remove); + + spin_lock(&zpci_list_lock); + list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) { + if (zdev->state == ZPCI_FN_STATE_STANDBY && + !clp_get_state(zdev->fid, &state) && + state == ZPCI_FN_STATE_RESERVED) + list_move_tail(&zdev->entry, &remove); + } + spin_unlock(&zpci_list_lock); + + list_for_each_entry_safe(zdev, tmp, &remove, entry) + zpci_remove_device(zdev); +} + static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) { return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; @@ -108,6 +127,7 @@ static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib fib = {0}; + u8 status; fib.isc = PCI_ISC; fib.sum = 1; /* enable summary notifications */ @@ -117,60 +137,58 @@ static int zpci_set_airq(struct zpci_dev *zdev) fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; fib.aisbo = zdev->aisb & 63; - return zpci_mod_fc(req, &fib); + return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } -struct mod_pci_args { - u64 base; - u64 limit; - u64 iota; - u64 fmb_addr; -}; - -static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args) +/* Modify PCI: Unregister adapter interruptions */ +static int zpci_clear_airq(struct zpci_dev *zdev) { - u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn); + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); struct zpci_fib fib = {0}; + u8 cc, status; - fib.pba = args->base; - fib.pal = args->limit; - fib.iota = args->iota; - fib.fmb_addr = args->fmb_addr; + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3 || (cc == 1 && status == 24)) + /* Function already gone or IRQs already deregistered. */ + cc = 0; - return zpci_mod_fc(req, &fib); + return cc ? -EIO : 0; } /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, u64 base, u64 limit, u64 iota) { - struct mod_pci_args args = { base, limit, iota, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); + struct zpci_fib fib = {0}; + u8 status; WARN_ON_ONCE(iota & 0x3fff); - args.iota |= ZPCI_IOTA_RTTO_FLAG; - return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args); + fib.pba = base; + fib.pal = limit; + fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } /* Modify PCI: Unregister I/O address translation parameters */ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) { - struct mod_pci_args args = { 0, 0, 0, 0 }; - - return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args); -} - -/* Modify PCI: Unregister adapter interruptions */ -static int zpci_clear_airq(struct zpci_dev *zdev) -{ - struct mod_pci_args args = { 0, 0, 0, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT); + struct zpci_fib fib = {0}; + u8 cc, status; - return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args); + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3) /* Function already gone. */ + cc = 0; + return cc ? -EIO : 0; } /* Modify PCI: Set PCI function measurement parameters */ int zpci_fmb_enable_device(struct zpci_dev *zdev) { - struct mod_pci_args args = { 0, 0, 0, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) return -EINVAL; @@ -185,25 +203,35 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) atomic64_set(&zdev->mapped_pages, 0); atomic64_set(&zdev->unmapped_pages, 0); - args.fmb_addr = virt_to_phys(zdev->fmb); - return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + fib.fmb_addr = virt_to_phys(zdev->fmb); + cc = zpci_mod_fc(req, &fib, &status); + if (cc) { + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + } + return cc ? -EIO : 0; } /* Modify PCI: Disable PCI function measurement */ int zpci_fmb_disable_device(struct zpci_dev *zdev) { - struct mod_pci_args args = { 0, 0, 0, 0 }; - int rc; + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; if (!zdev->fmb) return -EINVAL; /* Function measurement is disabled if fmb address is zero */ - rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3) /* Function already gone. */ + cc = 0; - kmem_cache_free(zdev_fmb_cache, zdev->fmb); - zdev->fmb = NULL; - return rc; + if (!cc) { + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + } + return cc ? -EIO : 0; } static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) @@ -372,22 +400,21 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) struct msi_msg msg; int rc, irq; + zdev->aisb = -1UL; if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); /* Allocate adapter summary indicator bit */ - rc = -EIO; aisb = airq_iv_alloc_bit(zpci_aisb_iv); if (aisb == -1UL) - goto out; + return -EIO; zdev->aisb = aisb; /* Create adapter interrupt vector */ - rc = -ENOMEM; zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); if (!zdev->aibv) - goto out_si; + return -ENOMEM; /* Wire up shortcut pointer */ zpci_aibv[aisb] = zdev->aibv; @@ -398,10 +425,10 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -EIO; irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ if (irq < 0) - goto out_msi; + return -ENOMEM; rc = irq_set_msi_desc(irq, msi); if (rc) - goto out_msi; + return rc; irq_set_chip_and_handler(irq, &zpci_irq_chip, handle_simple_irq); msg.data = hwirq; @@ -415,27 +442,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) /* Enable adapter interrupts */ rc = zpci_set_airq(zdev); if (rc) - goto out_msi; + return rc; return (msi_vecs == nvec) ? 0 : msi_vecs; - -out_msi: - for_each_pci_msi_entry(msi, pdev) { - if (hwirq-- == 0) - break; - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; - } - zpci_aibv[aisb] = NULL; - airq_iv_release(zdev->aibv); -out_si: - airq_iv_free_bit(zpci_aisb_iv, aisb); -out: - return rc; } void arch_teardown_msi_irqs(struct pci_dev *pdev) @@ -451,6 +460,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ for_each_pci_msi_entry(msi, pdev) { + if (!msi->irq) + continue; if (msi->msi_attrib.is_msix) __pci_msix_desc_mask_irq(msi, 1); else @@ -463,9 +474,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) msi->irq = 0; } - zpci_aibv[zdev->aisb] = NULL; - airq_iv_release(zdev->aibv); - airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); + if (zdev->aisb != -1UL) { + zpci_aibv[zdev->aisb] = NULL; + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); + zdev->aisb = -1UL; + } + if (zdev->aibv) { + airq_iv_release(zdev->aibv); + zdev->aibv = NULL; + } } static void zpci_map_resources(struct pci_dev *pdev) @@ -719,6 +736,16 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) { if (zpci_unique_uid) { zdev->domain = (u16) zdev->uid; + if (zdev->domain >= ZPCI_NR_DEVICES) + return 0; + + spin_lock(&zpci_domain_lock); + if (test_bit(zdev->domain, zpci_domain)) { + spin_unlock(&zpci_domain_lock); + return -EEXIST; + } + set_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); return 0; } @@ -735,7 +762,7 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) static void zpci_free_domain(struct zpci_dev *zdev) { - if (zpci_unique_uid) + if (zdev->domain >= ZPCI_NR_DEVICES) return; spin_lock(&zpci_domain_lock); @@ -755,6 +782,7 @@ void pcibios_remove_bus(struct pci_bus *bus) list_del(&zdev->entry); spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } @@ -847,15 +875,14 @@ out: return rc; } -void zpci_stop_device(struct zpci_dev *zdev) +void zpci_remove_device(struct zpci_dev *zdev) { - zpci_dma_exit_device(zdev); - /* - * Note: SCLP disables fh via set-pci-fn so don't - * do that here. - */ + if (!zdev->bus) + return; + + pci_stop_root_bus(zdev->bus); + pci_remove_root_bus(zdev->bus); } -EXPORT_SYMBOL_GPL(zpci_stop_device); int zpci_report_error(struct pci_dev *pdev, struct zpci_report_error_header *report) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 1c3332a..bd534b4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -193,12 +193,12 @@ out: int clp_add_pci_device(u32 fid, u32 fh, int configured) { struct zpci_dev *zdev; - int rc; + int rc = -ENOMEM; zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); if (!zdev) - return -ENOMEM; + goto error; zdev->fh = fh; zdev->fid = fid; @@ -219,6 +219,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) return 0; error: + zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); kfree(zdev); return rc; } @@ -295,8 +296,8 @@ int clp_disable_fh(struct zpci_dev *zdev) return rc; } -static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, - void (*cb)(struct clp_fh_list_entry *entry)) +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data, + void (*cb)(struct clp_fh_list_entry *, void *)) { u64 resume_token = 0; int entries, i, rc; @@ -327,21 +328,13 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, resume_token = rrb->response.resume_token; for (i = 0; i < entries; i++) - cb(&rrb->response.fh_list[i]); + cb(&rrb->response.fh_list[i], data); } while (resume_token); out: return rc; } -static void __clp_add(struct clp_fh_list_entry *entry) -{ - if (!entry->vendor_id) - return; - - clp_add_pci_device(entry->fid, entry->fh, entry->config_state); -} - -static void __clp_rescan(struct clp_fh_list_entry *entry) +static void __clp_add(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; @@ -349,22 +342,11 @@ static void __clp_rescan(struct clp_fh_list_entry *entry) return; zdev = get_zdev_by_fid(entry->fid); - if (!zdev) { + if (!zdev) clp_add_pci_device(entry->fid, entry->fh, entry->config_state); - return; - } - - if (!entry->config_state) { - /* - * The handle is already disabled, that means no iota/irq freeing via - * the firmware interfaces anymore. Need to free resources manually - * (DMA memory, debug, sysfs)... - */ - zpci_stop_device(zdev); - } } -static void __clp_update(struct clp_fh_list_entry *entry) +static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; @@ -387,7 +369,7 @@ int clp_scan_pci_devices(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_add); + rc = clp_list_pci(rrb, NULL, __clp_add); clp_free_block(rrb); return rc; @@ -398,11 +380,13 @@ int clp_rescan_pci_devices(void) struct clp_req_rsp_list_pci *rrb; int rc; + zpci_remove_reserved_devices(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_rescan); + rc = clp_list_pci(rrb, NULL, __clp_add); clp_free_block(rrb); return rc; @@ -417,7 +401,40 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_update); + rc = clp_list_pci(rrb, NULL, __clp_update); + + clp_free_block(rrb); + return rc; +} + +struct clp_state_data { + u32 fid; + enum zpci_state state; +}; + +static void __clp_get_state(struct clp_fh_list_entry *entry, void *data) +{ + struct clp_state_data *sd = data; + + if (entry->fid != sd->fid) + return; + + sd->state = entry->config_state; +} + +int clp_get_state(u32 fid, enum zpci_state *state) +{ + struct clp_req_rsp_list_pci *rrb; + struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED}; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, &sd, __clp_get_state); + if (!rc) + *state = sd.state; clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9081a57..8eb1cc3 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -601,7 +601,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) */ WARN_ON(zdev->s390_domain); - zpci_unregister_ioat(zdev, 0); + if (zpci_unregister_ioat(zdev, 0)) + return; + dma_cleanup_tables(zdev->dma_table); zdev->dma_table = NULL; vfree(zdev->iommu_bitmap); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index c2b27ad..0bbc04a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -74,6 +74,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; + enum zpci_state state; int ret; if (zdev) @@ -108,6 +109,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0303: /* Deconfiguration requested */ + if (!zdev) + break; if (pdev) pci_stop_and_remove_bus_device_locked(pdev); @@ -121,7 +124,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->state = ZPCI_FN_STATE_STANDBY; break; - case 0x0304: /* Configured -> Standby */ + case 0x0304: /* Configured -> Standby|Reserved */ + if (!zdev) + break; if (pdev) { /* Give the driver a hint that the function is * already unusable. */ @@ -132,6 +137,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->fh = ccdf->fh; zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; + if (!clp_get_state(ccdf->fid, &state) && + state == ZPCI_FN_STATE_RESERVED) { + zpci_remove_device(zdev); + } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ clp_rescan_pci_devices(); @@ -139,8 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - pci_stop_root_bus(zdev->bus); - pci_remove_root_bus(zdev->bus); + zpci_remove_device(zdev); break; default: break; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index fa8d7d4..ea34086 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -40,20 +40,20 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int zpci_mod_fc(u64 req, struct zpci_fib *fib) +u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status) { - u8 cc, status; + u8 cc; do { - cc = __mpcifc(req, fib, &status); + cc = __mpcifc(req, fib, status); if (cc == 2) msleep(ZPCI_INSN_BUSY_DELAY); } while (cc == 2); if (cc) - zpci_err_insn(cc, status, req, 0); + zpci_err_insn(cc, *status, req, 0); - return (cc) ? -EIO : 0; + return cc; } /* Refresh PCI Translations */ diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index be63fbd..025ea20 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -34,8 +34,6 @@ static struct facility_def facility_defs[] = { 18, /* long displacement facility */ #endif #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - 7, /* stfle */ - 17, /* message security assist */ 21, /* extended-immediate facility */ 25, /* store clock fast */ #endif diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index fb1e60f..9c7951b 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -89,6 +89,20 @@ config PKEY requires to have at least one CEX card in coprocessor mode available at runtime. +config CRYPTO_PAES_S390 + tristate "PAES cipher algorithms" + depends on S390 + depends on ZCRYPT + depends on PKEY + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + help + This is the s390 hardware accelerated implementation of the + AES cipher algorithms for use with protected key. + + Select this option if you want to use the paes cipher + for example to use protected key encrypted devices. + config CRYPTO_SHA1_S390 tristate "SHA1 digest algorithm" depends on S390 @@ -137,7 +151,6 @@ config CRYPTO_AES_S390 depends on S390 select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER - select PKEY help This is the s390 hardware accelerated implementation of the AES cipher algorithms (FIPS-197). diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 0acb8c2..31f014b 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -82,10 +82,3 @@ config SCM_BLOCK To compile this driver as a module, choose M here: the module will be called scm_block. - -config SCM_BLOCK_CLUSTER_WRITE - def_bool y - prompt "SCM force cluster writes" - depends on SCM_BLOCK - help - Force writes to Storage Class Memory (SCM) to be in done in clusters. diff --git a/drivers/s390/block/Makefile b/drivers/s390/block/Makefile index c2f4e67..b64e2b3 100644 --- a/drivers/s390/block/Makefile +++ b/drivers/s390/block/Makefile @@ -19,7 +19,4 @@ obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o obj-$(CONFIG_DCSSBLK) += dcssblk.o scm_block-objs := scm_drv.o scm_blk.o -ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE -scm_block-objs += scm_blk_cluster.o -endif obj-$(CONFIG_SCM_BLOCK) += scm_block.o diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index b7cbd5d..0f1fe4f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1965,8 +1965,12 @@ static int __dasd_device_is_unusable(struct dasd_device *device, { int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM); - if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { - /* dasd is being set offline. */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags) && + !test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + /* + * dasd is being set offline + * but it is no safe offline where we have to allow I/O + */ return 1; } if (device->stopped) { @@ -3570,57 +3574,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev) else pr_warn("%s: The DASD cannot be set offline while it is in use\n", dev_name(&cdev->dev)); - clear_bit(DASD_FLAG_OFFLINE, &device->flags); - goto out_busy; + rc = -EBUSY; + goto out_err; } } - if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { - /* - * safe offline already running - * could only be called by normal offline so safe_offline flag - * needs to be removed to run normal offline and kill all I/O - */ - if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) - /* Already doing normal offline processing */ - goto out_busy; - else - clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); - } else { - if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) - /* Already doing offline processing */ - goto out_busy; + /* + * Test if the offline processing is already running and exit if so. + * If a safe offline is being processed this could only be a normal + * offline that should be able to overtake the safe offline and + * cancel any I/O we do not want to wait for any longer + */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, + &device->flags); + } else { + rc = -EBUSY; + goto out_err; + } } - set_bit(DASD_FLAG_OFFLINE, &device->flags); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); /* - * if safe_offline called set safe_offline_running flag and + * if safe_offline is called set safe_offline_running flag and * clear safe_offline so that a call to normal offline * can overrun safe_offline processing */ if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) && !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + /* need to unlock here to wait for outstanding I/O */ + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); /* * If we want to set the device safe offline all IO operations * should be finished before continuing the offline process * so sync bdev first and then wait for our queues to become * empty */ - /* sync blockdev and partitions */ if (device->block) { rc = fsync_bdev(device->block->bdev); if (rc != 0) goto interrupted; } - /* schedule device tasklet and wait for completion */ dasd_schedule_device_bh(device); rc = wait_event_interruptible(shutdown_waitq, _wait_for_empty_queues(device)); if (rc != 0) goto interrupted; + + /* + * check if a normal offline process overtook the offline + * processing in this case simply do nothing beside returning + * that we got interrupted + * otherwise mark safe offline as not running any longer and + * continue with normal offline + */ + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + rc = -ERESTARTSYS; + goto out_err; + } + clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags); } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); dasd_set_target_state(device, DASD_STATE_NEW); /* dasd_delete_device destroys the device reference. */ @@ -3632,22 +3648,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev) */ if (block) dasd_free_block(block); + return 0; interrupted: /* interrupted by signal */ - clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags); clear_bit(DASD_FLAG_OFFLINE, &device->flags); - dasd_put_device(device); - - return rc; - -out_busy: +out_err: dasd_put_device(device); spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - - return -EBUSY; + return rc; } EXPORT_SYMBOL_GPL(dasd_generic_set_offline); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 1164b51..7c73512 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -315,45 +315,58 @@ static int __init dasd_parse_range(const char *range) char *features_str = NULL; char *from_str = NULL; char *to_str = NULL; - size_t len = strlen(range) + 1; - char tmp[len]; + int rc = 0; + char *tmp; - strlcpy(tmp, range, len); + tmp = kstrdup(range, GFP_KERNEL); + if (!tmp) + return -ENOMEM; - if (dasd_evaluate_range_param(tmp, &from_str, &to_str, &features_str)) - goto out_err; + if (dasd_evaluate_range_param(tmp, &from_str, &to_str, &features_str)) { + rc = -EINVAL; + goto out; + } - if (dasd_busid(from_str, &from_id0, &from_id1, &from)) - goto out_err; + if (dasd_busid(from_str, &from_id0, &from_id1, &from)) { + rc = -EINVAL; + goto out; + } to = from; to_id0 = from_id0; to_id1 = from_id1; if (to_str) { - if (dasd_busid(to_str, &to_id0, &to_id1, &to)) - goto out_err; + if (dasd_busid(to_str, &to_id0, &to_id1, &to)) { + rc = -EINVAL; + goto out; + } if (from_id0 != to_id0 || from_id1 != to_id1 || from > to) { pr_err("%s is not a valid device range\n", range); - goto out_err; + rc = -EINVAL; + goto out; } } features = dasd_feature_list(features_str); - if (features < 0) - goto out_err; + if (features < 0) { + rc = -EINVAL; + goto out; + } /* each device in dasd= parameter should be set initially online */ features |= DASD_FEATURE_INITIAL_ONLINE; while (from <= to) { sprintf(bus_id, "%01x.%01x.%04x", from_id0, from_id1, from++); devmap = dasd_add_busid(bus_id, features); - if (IS_ERR(devmap)) - return PTR_ERR(devmap); + if (IS_ERR(devmap)) { + rc = PTR_ERR(devmap); + goto out; + } } - return 0; +out: + kfree(tmp); -out_err: - return -EINVAL; + return rc; } /* @@ -735,13 +748,22 @@ static ssize_t dasd_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dasd_devmap *devmap; - int ro_flag; + struct dasd_device *device; + int ro_flag = 0; devmap = dasd_find_busid(dev_name(dev)); - if (!IS_ERR(devmap)) - ro_flag = (devmap->features & DASD_FEATURE_READONLY) != 0; - else - ro_flag = (DASD_FEATURE_DEFAULT & DASD_FEATURE_READONLY) != 0; + if (IS_ERR(devmap)) + goto out; + + ro_flag = !!(devmap->features & DASD_FEATURE_READONLY); + + spin_lock(&dasd_devmap_lock); + device = devmap->device; + if (device) + ro_flag |= test_bit(DASD_FLAG_DEVICE_RO, &device->flags); + spin_unlock(&dasd_devmap_lock); + +out: return snprintf(buf, PAGE_SIZE, ro_flag ? "1\n" : "0\n"); } @@ -764,7 +786,7 @@ dasd_ro_store(struct device *dev, struct device_attribute *attr, device = dasd_device_from_cdev(cdev); if (IS_ERR(device)) - return PTR_ERR(device); + return count; spin_lock_irqsave(get_ccwdev_lock(cdev), flags); val = val || test_bit(DASD_FLAG_DEVICE_RO, &device->flags); @@ -928,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr, { struct ccw_device *cdev = to_ccwdev(dev); struct dasd_device *device; + unsigned long flags; int rc; - device = dasd_device_from_cdev(cdev); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + device = dasd_device_from_cdev_locked(cdev); if (IS_ERR(device)) { rc = PTR_ERR(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); goto out; } @@ -940,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr, test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { /* Already doing offline processing */ dasd_put_device(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); rc = -EBUSY; goto out; } set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); dasd_put_device(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); rc = ccw_device_set_offline(cdev); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 3c2c84b..42018a2 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -13,6 +13,7 @@ #include <linux/mempool.h> #include <linux/module.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/genhd.h> #include <linux/slab.h> #include <linux/list.h> @@ -42,7 +43,6 @@ static void __scm_free_rq(struct scm_request *scmrq) struct aob_rq_header *aobrq = to_aobrq(scmrq); free_page((unsigned long) scmrq->aob); - __scm_free_rq_cluster(scmrq); kfree(scmrq->request); kfree(aobrq); } @@ -82,9 +82,6 @@ static int __scm_alloc_rq(void) if (!scmrq->request) goto free; - if (__scm_alloc_rq_cluster(scmrq)) - goto free; - INIT_LIST_HEAD(&scmrq->list); spin_lock_irq(&list_lock); list_add(&scmrq->list, &inactive_requests); @@ -114,13 +111,13 @@ static struct scm_request *scm_request_fetch(void) { struct scm_request *scmrq = NULL; - spin_lock(&list_lock); + spin_lock_irq(&list_lock); if (list_empty(&inactive_requests)) goto out; scmrq = list_first_entry(&inactive_requests, struct scm_request, list); list_del(&scmrq->list); out: - spin_unlock(&list_lock); + spin_unlock_irq(&list_lock); return scmrq; } @@ -234,130 +231,123 @@ static inline void scm_request_init(struct scm_blk_dev *bdev, scmrq->error = BLK_STS_OK; /* We don't use all msbs - place aidaws at the end of the aob page. */ scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io]; - scm_request_cluster_init(scmrq); } -static void scm_ensure_queue_restart(struct scm_blk_dev *bdev) -{ - if (atomic_read(&bdev->queued_reqs)) { - /* Queue restart is triggered by the next interrupt. */ - return; - } - blk_delay_queue(bdev->rq, SCM_QUEUE_DELAY); -} - -void scm_request_requeue(struct scm_request *scmrq) +static void scm_request_requeue(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; int i; - scm_release_cluster(scmrq); for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) - blk_requeue_request(bdev->rq, scmrq->request[i]); + blk_mq_requeue_request(scmrq->request[i], false); atomic_dec(&bdev->queued_reqs); scm_request_done(scmrq); - scm_ensure_queue_restart(bdev); + blk_mq_kick_requeue_list(bdev->rq); } -void scm_request_finish(struct scm_request *scmrq) +static void scm_request_finish(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; int i; - scm_release_cluster(scmrq); - for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) - blk_end_request_all(scmrq->request[i], scmrq->error); + for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { + if (scmrq->error) + blk_mq_end_request(scmrq->request[i], scmrq->error); + else + blk_mq_complete_request(scmrq->request[i]); + } atomic_dec(&bdev->queued_reqs); scm_request_done(scmrq); } -static int scm_request_start(struct scm_request *scmrq) +static void scm_request_start(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; - int ret; atomic_inc(&bdev->queued_reqs); - if (!scmrq->aob->request.msb_count) { - scm_request_requeue(scmrq); - return -EINVAL; - } - - ret = eadm_start_aob(scmrq->aob); - if (ret) { + if (eadm_start_aob(scmrq->aob)) { SCM_LOG(5, "no subchannel"); scm_request_requeue(scmrq); } - return ret; } -static void scm_blk_request(struct request_queue *rq) +struct scm_queue { + struct scm_request *scmrq; + spinlock_t lock; +}; + +static int scm_blk_request(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *qd) { - struct scm_device *scmdev = rq->queuedata; + struct scm_device *scmdev = hctx->queue->queuedata; struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev); - struct scm_request *scmrq = NULL; - struct request *req; + struct scm_queue *sq = hctx->driver_data; + struct request *req = qd->rq; + struct scm_request *scmrq; - while ((req = blk_peek_request(rq))) { - if (!scm_permit_request(bdev, req)) - goto out; + spin_lock(&sq->lock); + if (!scm_permit_request(bdev, req)) { + spin_unlock(&sq->lock); + return BLK_MQ_RQ_QUEUE_BUSY; + } + scmrq = sq->scmrq; + if (!scmrq) { + scmrq = scm_request_fetch(); if (!scmrq) { - scmrq = scm_request_fetch(); - if (!scmrq) { - SCM_LOG(5, "no request"); - goto out; - } - scm_request_init(bdev, scmrq); + SCM_LOG(5, "no request"); + spin_unlock(&sq->lock); + return BLK_MQ_RQ_QUEUE_BUSY; } - scm_request_set(scmrq, req); + scm_request_init(bdev, scmrq); + sq->scmrq = scmrq; + } + scm_request_set(scmrq, req); - if (!scm_reserve_cluster(scmrq)) { - SCM_LOG(5, "cluster busy"); - scm_request_set(scmrq, NULL); - if (scmrq->aob->request.msb_count) - goto out; + if (scm_request_prepare(scmrq)) { + SCM_LOG(5, "aidaw alloc failed"); + scm_request_set(scmrq, NULL); - scm_request_done(scmrq); - return; - } + if (scmrq->aob->request.msb_count) + scm_request_start(scmrq); - if (scm_need_cluster_request(scmrq)) { - if (scmrq->aob->request.msb_count) { - /* Start cluster requests separately. */ - scm_request_set(scmrq, NULL); - if (scm_request_start(scmrq)) - return; - } else { - atomic_inc(&bdev->queued_reqs); - blk_start_request(req); - scm_initiate_cluster_request(scmrq); - } - scmrq = NULL; - continue; - } + sq->scmrq = NULL; + spin_unlock(&sq->lock); + return BLK_MQ_RQ_QUEUE_BUSY; + } + blk_mq_start_request(req); - if (scm_request_prepare(scmrq)) { - SCM_LOG(5, "aidaw alloc failed"); - scm_request_set(scmrq, NULL); - goto out; - } - blk_start_request(req); + if (qd->last || scmrq->aob->request.msb_count == nr_requests_per_io) { + scm_request_start(scmrq); + sq->scmrq = NULL; + } + spin_unlock(&sq->lock); + return BLK_MQ_RQ_QUEUE_OK; +} - if (scmrq->aob->request.msb_count < nr_requests_per_io) - continue; +static int scm_blk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int idx) +{ + struct scm_queue *qd = kzalloc(sizeof(*qd), GFP_KERNEL); - if (scm_request_start(scmrq)) - return; + if (!qd) + return -ENOMEM; - scmrq = NULL; - } -out: - if (scmrq) - scm_request_start(scmrq); - else - scm_ensure_queue_restart(bdev); + spin_lock_init(&qd->lock); + hctx->driver_data = qd; + + return 0; +} + +static void scm_blk_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx) +{ + struct scm_queue *qd = hctx->driver_data; + + WARN_ON(qd->scmrq); + kfree(hctx->driver_data); + hctx->driver_data = NULL; } static void __scmrq_log_error(struct scm_request *scmrq) @@ -377,21 +367,6 @@ static void __scmrq_log_error(struct scm_request *scmrq) scmrq->error); } -void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error) -{ - struct scm_request *scmrq = data; - struct scm_blk_dev *bdev = scmrq->bdev; - - scmrq->error = error; - if (error) - __scmrq_log_error(scmrq); - - spin_lock(&bdev->lock); - list_add_tail(&scmrq->list, &bdev->finished_requests); - spin_unlock(&bdev->lock); - tasklet_hi_schedule(&bdev->tasklet); -} - static void scm_blk_handle_error(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; @@ -419,54 +394,46 @@ restart: return; requeue: - spin_lock_irqsave(&bdev->rq_lock, flags); scm_request_requeue(scmrq); - spin_unlock_irqrestore(&bdev->rq_lock, flags); } -static void scm_blk_tasklet(struct scm_blk_dev *bdev) +void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error) { - struct scm_request *scmrq; - unsigned long flags; - - spin_lock_irqsave(&bdev->lock, flags); - while (!list_empty(&bdev->finished_requests)) { - scmrq = list_first_entry(&bdev->finished_requests, - struct scm_request, list); - list_del(&scmrq->list); - spin_unlock_irqrestore(&bdev->lock, flags); + struct scm_request *scmrq = data; - if (scmrq->error && scmrq->retries-- > 0) { + scmrq->error = error; + if (error) { + __scmrq_log_error(scmrq); + if (scmrq->retries-- > 0) { scm_blk_handle_error(scmrq); - - /* Request restarted or requeued, handle next. */ - spin_lock_irqsave(&bdev->lock, flags); - continue; + return; } + } - if (scm_test_cluster_request(scmrq)) { - scm_cluster_request_irq(scmrq); - spin_lock_irqsave(&bdev->lock, flags); - continue; - } + scm_request_finish(scmrq); +} - scm_request_finish(scmrq); - spin_lock_irqsave(&bdev->lock, flags); - } - spin_unlock_irqrestore(&bdev->lock, flags); - /* Look out for more requests. */ - blk_run_queue(bdev->rq); +static void scm_blk_request_done(struct request *req) +{ + blk_mq_end_request(req, 0); } static const struct block_device_operations scm_blk_devops = { .owner = THIS_MODULE, }; +static const struct blk_mq_ops scm_mq_ops = { + .queue_rq = scm_blk_request, + .complete = scm_blk_request_done, + .init_hctx = scm_blk_init_hctx, + .exit_hctx = scm_blk_exit_hctx, +}; + int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) { - struct request_queue *rq; - int len, ret = -ENOMEM; unsigned int devindex, nr_max_blk; + struct request_queue *rq; + int len, ret; devindex = atomic_inc_return(&nr_devices) - 1; /* scma..scmz + scmaa..scmzz */ @@ -477,18 +444,23 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) bdev->scmdev = scmdev; bdev->state = SCM_OPER; - spin_lock_init(&bdev->rq_lock); spin_lock_init(&bdev->lock); - INIT_LIST_HEAD(&bdev->finished_requests); atomic_set(&bdev->queued_reqs, 0); - tasklet_init(&bdev->tasklet, - (void (*)(unsigned long)) scm_blk_tasklet, - (unsigned long) bdev); - rq = blk_init_queue(scm_blk_request, &bdev->rq_lock); - if (!rq) + bdev->tag_set.ops = &scm_mq_ops; + bdev->tag_set.nr_hw_queues = nr_requests; + bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; + bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + + ret = blk_mq_alloc_tag_set(&bdev->tag_set); + if (ret) goto out; + rq = blk_mq_init_queue(&bdev->tag_set); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_tag; + } bdev->rq = rq; nr_max_blk = min(scmdev->nr_max_block, (unsigned int) (PAGE_SIZE / sizeof(struct aidaw))); @@ -498,12 +470,12 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) blk_queue_max_segments(rq, nr_max_blk); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq); - scm_blk_dev_cluster_setup(bdev); bdev->gendisk = alloc_disk(SCM_NR_PARTS); - if (!bdev->gendisk) + if (!bdev->gendisk) { + ret = -ENOMEM; goto out_queue; - + } rq->queuedata = scmdev; bdev->gendisk->private_data = scmdev; bdev->gendisk->fops = &scm_blk_devops; @@ -528,6 +500,8 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) out_queue: blk_cleanup_queue(rq); +out_tag: + blk_mq_free_tag_set(&bdev->tag_set); out: atomic_dec(&nr_devices); return ret; @@ -535,9 +509,9 @@ out: void scm_blk_dev_cleanup(struct scm_blk_dev *bdev) { - tasklet_kill(&bdev->tasklet); del_gendisk(bdev->gendisk); blk_cleanup_queue(bdev->gendisk->queue); + blk_mq_free_tag_set(&bdev->tag_set); put_disk(bdev->gendisk); } @@ -558,7 +532,7 @@ static bool __init scm_blk_params_valid(void) if (!nr_requests_per_io || nr_requests_per_io > 64) return false; - return scm_cluster_size_valid(); + return true; } static int __init scm_blk_init(void) diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h index cd598d1..71288dd 100644 --- a/drivers/s390/block/scm_blk.h +++ b/drivers/s390/block/scm_blk.h @@ -4,6 +4,7 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/genhd.h> #include <linux/list.h> @@ -14,18 +15,14 @@ #define SCM_QUEUE_DELAY 5 struct scm_blk_dev { - struct tasklet_struct tasklet; struct request_queue *rq; struct gendisk *gendisk; + struct blk_mq_tag_set tag_set; struct scm_device *scmdev; - spinlock_t rq_lock; /* guard the request queue */ - spinlock_t lock; /* guard the rest of the blockdev */ + spinlock_t lock; atomic_t queued_reqs; enum {SCM_OPER, SCM_WR_PROHIBIT} state; struct list_head finished_requests; -#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE - struct list_head cluster_list; -#endif }; struct scm_request { @@ -36,13 +33,6 @@ struct scm_request { struct list_head list; u8 retries; blk_status_t error; -#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE - struct { - enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state; - struct list_head list; - void **buf; - } cluster; -#endif }; #define to_aobrq(rq) container_of((void *) rq, struct aob_rq_header, data) @@ -52,55 +42,11 @@ void scm_blk_dev_cleanup(struct scm_blk_dev *); void scm_blk_set_available(struct scm_blk_dev *); void scm_blk_irq(struct scm_device *, void *, blk_status_t); -void scm_request_finish(struct scm_request *); -void scm_request_requeue(struct scm_request *); - struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes); int scm_drv_init(void); void scm_drv_cleanup(void); -#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE -void __scm_free_rq_cluster(struct scm_request *); -int __scm_alloc_rq_cluster(struct scm_request *); -void scm_request_cluster_init(struct scm_request *); -bool scm_reserve_cluster(struct scm_request *); -void scm_release_cluster(struct scm_request *); -void scm_blk_dev_cluster_setup(struct scm_blk_dev *); -bool scm_need_cluster_request(struct scm_request *); -void scm_initiate_cluster_request(struct scm_request *); -void scm_cluster_request_irq(struct scm_request *); -bool scm_test_cluster_request(struct scm_request *); -bool scm_cluster_size_valid(void); -#else /* CONFIG_SCM_BLOCK_CLUSTER_WRITE */ -static inline void __scm_free_rq_cluster(struct scm_request *scmrq) {} -static inline int __scm_alloc_rq_cluster(struct scm_request *scmrq) -{ - return 0; -} -static inline void scm_request_cluster_init(struct scm_request *scmrq) {} -static inline bool scm_reserve_cluster(struct scm_request *scmrq) -{ - return true; -} -static inline void scm_release_cluster(struct scm_request *scmrq) {} -static inline void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev) {} -static inline bool scm_need_cluster_request(struct scm_request *scmrq) -{ - return false; -} -static inline void scm_initiate_cluster_request(struct scm_request *scmrq) {} -static inline void scm_cluster_request_irq(struct scm_request *scmrq) {} -static inline bool scm_test_cluster_request(struct scm_request *scmrq) -{ - return false; -} -static inline bool scm_cluster_size_valid(void) -{ - return true; -} -#endif /* CONFIG_SCM_BLOCK_CLUSTER_WRITE */ - extern debug_info_t *scm_debug; #define SCM_LOG(imp, txt) do { \ diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c deleted file mode 100644 index 7497ddde..0000000 --- a/drivers/s390/block/scm_blk_cluster.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Block driver for s390 storage class memory. - * - * Copyright IBM Corp. 2012 - * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com> - */ - -#include <linux/spinlock.h> -#include <linux/module.h> -#include <linux/blkdev.h> -#include <linux/genhd.h> -#include <linux/slab.h> -#include <linux/list.h> -#include <asm/eadm.h> -#include "scm_blk.h" - -static unsigned int write_cluster_size = 64; -module_param(write_cluster_size, uint, S_IRUGO); -MODULE_PARM_DESC(write_cluster_size, - "Number of pages used for contiguous writes."); - -#define CLUSTER_SIZE (write_cluster_size * PAGE_SIZE) - -void __scm_free_rq_cluster(struct scm_request *scmrq) -{ - int i; - - if (!scmrq->cluster.buf) - return; - - for (i = 0; i < 2 * write_cluster_size; i++) - free_page((unsigned long) scmrq->cluster.buf[i]); - - kfree(scmrq->cluster.buf); -} - -int __scm_alloc_rq_cluster(struct scm_request *scmrq) -{ - int i; - - scmrq->cluster.buf = kzalloc(sizeof(void *) * 2 * write_cluster_size, - GFP_KERNEL); - if (!scmrq->cluster.buf) - return -ENOMEM; - - for (i = 0; i < 2 * write_cluster_size; i++) { - scmrq->cluster.buf[i] = (void *) get_zeroed_page(GFP_DMA); - if (!scmrq->cluster.buf[i]) - return -ENOMEM; - } - INIT_LIST_HEAD(&scmrq->cluster.list); - return 0; -} - -void scm_request_cluster_init(struct scm_request *scmrq) -{ - scmrq->cluster.state = CLUSTER_NONE; -} - -static bool clusters_intersect(struct request *A, struct request *B) -{ - unsigned long firstA, lastA, firstB, lastB; - - firstA = ((u64) blk_rq_pos(A) << 9) / CLUSTER_SIZE; - lastA = (((u64) blk_rq_pos(A) << 9) + - blk_rq_bytes(A) - 1) / CLUSTER_SIZE; - - firstB = ((u64) blk_rq_pos(B) << 9) / CLUSTER_SIZE; - lastB = (((u64) blk_rq_pos(B) << 9) + - blk_rq_bytes(B) - 1) / CLUSTER_SIZE; - - return (firstB <= lastA && firstA <= lastB); -} - -bool scm_reserve_cluster(struct scm_request *scmrq) -{ - struct request *req = scmrq->request[scmrq->aob->request.msb_count]; - struct scm_blk_dev *bdev = scmrq->bdev; - struct scm_request *iter; - int pos, add = 1; - - if (write_cluster_size == 0) - return true; - - spin_lock(&bdev->lock); - list_for_each_entry(iter, &bdev->cluster_list, cluster.list) { - if (iter == scmrq) { - /* - * We don't have to use clusters_intersect here, since - * cluster requests are always started separately. - */ - add = 0; - continue; - } - for (pos = 0; pos < iter->aob->request.msb_count; pos++) { - if (clusters_intersect(req, iter->request[pos]) && - (rq_data_dir(req) == WRITE || - rq_data_dir(iter->request[pos]) == WRITE)) { - spin_unlock(&bdev->lock); - return false; - } - } - } - if (add) - list_add(&scmrq->cluster.list, &bdev->cluster_list); - spin_unlock(&bdev->lock); - - return true; -} - -void scm_release_cluster(struct scm_request *scmrq) -{ - struct scm_blk_dev *bdev = scmrq->bdev; - unsigned long flags; - - if (write_cluster_size == 0) - return; - - spin_lock_irqsave(&bdev->lock, flags); - list_del(&scmrq->cluster.list); - spin_unlock_irqrestore(&bdev->lock, flags); -} - -void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev) -{ - INIT_LIST_HEAD(&bdev->cluster_list); - blk_queue_io_opt(bdev->rq, CLUSTER_SIZE); -} - -static int scm_prepare_cluster_request(struct scm_request *scmrq) -{ - struct scm_blk_dev *bdev = scmrq->bdev; - struct scm_device *scmdev = bdev->gendisk->private_data; - struct request *req = scmrq->request[0]; - struct msb *msb = &scmrq->aob->msb[0]; - struct req_iterator iter; - struct aidaw *aidaw; - struct bio_vec bv; - int i = 0; - u64 addr; - - switch (scmrq->cluster.state) { - case CLUSTER_NONE: - scmrq->cluster.state = CLUSTER_READ; - /* fall through */ - case CLUSTER_READ: - msb->bs = MSB_BS_4K; - msb->oc = MSB_OC_READ; - msb->flags = MSB_FLAG_IDA; - msb->blk_count = write_cluster_size; - - addr = scmdev->address + ((u64) blk_rq_pos(req) << 9); - msb->scm_addr = round_down(addr, CLUSTER_SIZE); - - if (msb->scm_addr != - round_down(addr + (u64) blk_rq_bytes(req) - 1, - CLUSTER_SIZE)) - msb->blk_count = 2 * write_cluster_size; - - aidaw = scm_aidaw_fetch(scmrq, msb->blk_count * PAGE_SIZE); - if (!aidaw) - return -ENOMEM; - - scmrq->aob->request.msb_count = 1; - msb->data_addr = (u64) aidaw; - for (i = 0; i < msb->blk_count; i++) { - aidaw->data_addr = (u64) scmrq->cluster.buf[i]; - aidaw++; - } - - break; - case CLUSTER_WRITE: - aidaw = (void *) msb->data_addr; - msb->oc = MSB_OC_WRITE; - - for (addr = msb->scm_addr; - addr < scmdev->address + ((u64) blk_rq_pos(req) << 9); - addr += PAGE_SIZE) { - aidaw->data_addr = (u64) scmrq->cluster.buf[i]; - aidaw++; - i++; - } - rq_for_each_segment(bv, req, iter) { - aidaw->data_addr = (u64) page_address(bv.bv_page); - aidaw++; - i++; - } - for (; i < msb->blk_count; i++) { - aidaw->data_addr = (u64) scmrq->cluster.buf[i]; - aidaw++; - } - break; - } - return 0; -} - -bool scm_need_cluster_request(struct scm_request *scmrq) -{ - int pos = scmrq->aob->request.msb_count; - - if (rq_data_dir(scmrq->request[pos]) == READ) - return false; - - return blk_rq_bytes(scmrq->request[pos]) < CLUSTER_SIZE; -} - -/* Called with queue lock held. */ -void scm_initiate_cluster_request(struct scm_request *scmrq) -{ - if (scm_prepare_cluster_request(scmrq)) - goto requeue; - if (eadm_start_aob(scmrq->aob)) - goto requeue; - return; -requeue: - scm_request_requeue(scmrq); -} - -bool scm_test_cluster_request(struct scm_request *scmrq) -{ - return scmrq->cluster.state != CLUSTER_NONE; -} - -void scm_cluster_request_irq(struct scm_request *scmrq) -{ - struct scm_blk_dev *bdev = scmrq->bdev; - unsigned long flags; - - switch (scmrq->cluster.state) { - case CLUSTER_NONE: - BUG(); - break; - case CLUSTER_READ: - if (scmrq->error) { - scm_request_finish(scmrq); - break; - } - scmrq->cluster.state = CLUSTER_WRITE; - spin_lock_irqsave(&bdev->rq_lock, flags); - scm_initiate_cluster_request(scmrq); - spin_unlock_irqrestore(&bdev->rq_lock, flags); - break; - case CLUSTER_WRITE: - scm_request_finish(scmrq); - break; - } -} - -bool scm_cluster_size_valid(void) -{ - if (write_cluster_size == 1 || write_cluster_size > 128) - return false; - - return !(write_cluster_size & (write_cluster_size - 1)); -} diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index e2aa944..d3e504c 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -296,6 +296,51 @@ static const struct attribute_group *default_subch_attr_groups[] = { NULL, }; +static ssize_t chpids_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct chsc_ssd_info *ssd = &sch->ssd_info; + ssize_t ret = 0; + int mask; + int chp; + + for (chp = 0; chp < 8; chp++) { + mask = 0x80 >> chp; + if (ssd->path_mask & mask) + ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id); + else + ret += sprintf(buf + ret, "00 "); + } + ret += sprintf(buf + ret, "\n"); + return ret; +} +static DEVICE_ATTR(chpids, 0444, chpids_show, NULL); + +static ssize_t pimpampom_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct subchannel *sch = to_subchannel(dev); + struct pmcw *pmcw = &sch->schib.pmcw; + + return sprintf(buf, "%02x %02x %02x\n", + pmcw->pim, pmcw->pam, pmcw->pom); +} +static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL); + +static struct attribute *io_subchannel_type_attrs[] = { + &dev_attr_chpids.attr, + &dev_attr_pimpampom.attr, + NULL, +}; +ATTRIBUTE_GROUPS(io_subchannel_type); + +static const struct device_type io_subchannel_type = { + .groups = io_subchannel_type_groups, +}; + int css_register_subchannel(struct subchannel *sch) { int ret; @@ -304,6 +349,10 @@ int css_register_subchannel(struct subchannel *sch) sch->dev.parent = &channel_subsystems[0]->device; sch->dev.bus = &css_bus_type; sch->dev.groups = default_subch_attr_groups; + + if (sch->st == SUBCHANNEL_TYPE_IO) + sch->dev.type = &io_subchannel_type; + /* * We don't want to generate uevents for I/O subchannels that don't * have a working ccw device behind them since they will be diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index b8006ea..7be01a5 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -208,44 +208,6 @@ int __init io_subchannel_init(void) /************************ device handling **************************/ -/* - * A ccw_device has some interfaces in sysfs in addition to the - * standard ones. - * The following entries are designed to export the information which - * resided in 2.4 in /proc/subchannels. Subchannel and device number - * are obvious, so they don't have an entry :) - * TODO: Split chpids and pimpampom up? Where is "in use" in the tree? - */ -static ssize_t -chpids_show (struct device * dev, struct device_attribute *attr, char * buf) -{ - struct subchannel *sch = to_subchannel(dev); - struct chsc_ssd_info *ssd = &sch->ssd_info; - ssize_t ret = 0; - int chp; - int mask; - - for (chp = 0; chp < 8; chp++) { - mask = 0x80 >> chp; - if (ssd->path_mask & mask) - ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id); - else - ret += sprintf(buf + ret, "00 "); - } - ret += sprintf (buf+ret, "\n"); - return min((ssize_t)PAGE_SIZE, ret); -} - -static ssize_t -pimpampom_show (struct device * dev, struct device_attribute *attr, char * buf) -{ - struct subchannel *sch = to_subchannel(dev); - struct pmcw *pmcw = &sch->schib.pmcw; - - return sprintf (buf, "%02x %02x %02x\n", - pmcw->pim, pmcw->pam, pmcw->pom); -} - static ssize_t devtype_show (struct device *dev, struct device_attribute *attr, char *buf) { @@ -636,8 +598,6 @@ static ssize_t vpm_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%02x\n", sch->vpm); } -static DEVICE_ATTR(chpids, 0444, chpids_show, NULL); -static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL); static DEVICE_ATTR(devtype, 0444, devtype_show, NULL); static DEVICE_ATTR(cutype, 0444, cutype_show, NULL); static DEVICE_ATTR(modalias, 0444, modalias_show, NULL); @@ -647,8 +607,6 @@ static DEVICE_ATTR(logging, 0200, NULL, initiate_logging); static DEVICE_ATTR(vpm, 0444, vpm_show, NULL); static struct attribute *io_subchannel_attrs[] = { - &dev_attr_chpids.attr, - &dev_attr_pimpampom.attr, &dev_attr_logging.attr, &dev_attr_vpm.attr, NULL, diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index e90dd43..a25367e 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -90,54 +90,6 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) } /* - * Sysfs interfaces - */ -static ssize_t chpids_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct subchannel *sch = to_subchannel(dev); - struct chsc_ssd_info *ssd = &sch->ssd_info; - ssize_t ret = 0; - int chp; - int mask; - - for (chp = 0; chp < 8; chp++) { - mask = 0x80 >> chp; - if (ssd->path_mask & mask) - ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id); - else - ret += sprintf(buf + ret, "00 "); - } - ret += sprintf(buf+ret, "\n"); - return ret; -} - -static ssize_t pimpampom_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct subchannel *sch = to_subchannel(dev); - struct pmcw *pmcw = &sch->schib.pmcw; - - return sprintf(buf, "%02x %02x %02x\n", - pmcw->pim, pmcw->pam, pmcw->pom); -} - -static DEVICE_ATTR(chpids, 0444, chpids_show, NULL); -static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL); - -static struct attribute *vfio_subchannel_attrs[] = { - &dev_attr_chpids.attr, - &dev_attr_pimpampom.attr, - NULL, -}; - -static struct attribute_group vfio_subchannel_attr_group = { - .attrs = vfio_subchannel_attrs, -}; - -/* * Css driver callbacks */ static void vfio_ccw_sch_irq(struct subchannel *sch) @@ -174,13 +126,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) if (ret) goto out_free; - ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group); - if (ret) - goto out_disable; - ret = vfio_ccw_mdev_reg(sch); if (ret) - goto out_rm_group; + goto out_disable; INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); atomic_set(&private->avail, 1); @@ -188,8 +136,6 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) return 0; -out_rm_group: - sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group); out_disable: cio_disable_subchannel(sch); out_free: @@ -206,8 +152,6 @@ static int vfio_ccw_sch_remove(struct subchannel *sch) vfio_ccw_mdev_unreg(sch); - sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group); - dev_set_drvdata(&sch->dev, NULL); kfree(private); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index ea09991..6dee598 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -766,7 +766,7 @@ static ssize_t ap_domain_store(struct bus_type *bus, ap_domain_index = domain; spin_unlock_bh(&ap_domain_lock); - AP_DBF(DBF_DEBUG, "store new default domain=%d\n", domain); + AP_DBF(DBF_DEBUG, "stored new default domain=%d\n", domain); return count; } @@ -952,6 +952,7 @@ static int ap_select_domain(void) } if (best_domain >= 0){ ap_domain_index = best_domain; + AP_DBF(DBF_DEBUG, "new ap_domain_index=%d\n", ap_domain_index); spin_unlock_bh(&ap_domain_lock); return 0; } @@ -988,7 +989,7 @@ static void ap_scan_bus(struct work_struct *unused) ap_qid_t qid; int depth = 0, type = 0; unsigned int functions = 0; - int rc, id, dom, borked, domains; + int rc, id, dom, borked, domains, defdomdevs = 0; AP_DBF(DBF_DEBUG, "ap_scan_bus running\n"); @@ -1052,6 +1053,8 @@ static void ap_scan_bus(struct work_struct *unused) put_device(dev); if (!borked) { domains++; + if (dom == ap_domain_index) + defdomdevs++; continue; } } @@ -1098,6 +1101,8 @@ static void ap_scan_bus(struct work_struct *unused) continue; } domains++; + if (dom == ap_domain_index) + defdomdevs++; } /* end domain loop */ if (ac) { /* remove card dev if there are no queue devices */ @@ -1106,6 +1111,11 @@ static void ap_scan_bus(struct work_struct *unused) put_device(&ac->ap_dev.device); } } /* end device loop */ + + if (defdomdevs < 1) + AP_DBF(DBF_INFO, "no queue device with default domain %d available\n", + ap_domain_index); + out: mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); } @@ -1174,14 +1184,14 @@ int __init ap_module_init(void) ap_init_configuration(); if (ap_configuration) - max_domain_id = ap_max_domain_id ? : (AP_DOMAINS - 1); + max_domain_id = + ap_max_domain_id ? ap_max_domain_id : AP_DOMAINS - 1; else max_domain_id = 15; if (ap_domain_index < -1 || ap_domain_index > max_domain_id) { pr_warn("%d is not a valid cryptographic domain\n", ap_domain_index); - rc = -EINVAL; - goto out_free; + ap_domain_index = -1; } /* In resume callback we need to know if the user had set the domain. * If so, we can not just reset it. @@ -1254,7 +1264,6 @@ out: unregister_reset_call(&ap_reset_call); if (ap_using_interrupts()) unregister_adapter_interrupt(&ap_airq); -out_free: kfree(ap_configuration); return rc; } diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index ea86da8..f61fa47 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -178,9 +178,9 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb, pxcrb->user_defined = (cardnr == 0xFFFF ? AUTOSELECT : cardnr); pxcrb->request_control_blk_length = preqcblk->cprb_len + preqcblk->req_parml; - pxcrb->request_control_blk_addr = (void *) preqcblk; + pxcrb->request_control_blk_addr = (void __user *) preqcblk; pxcrb->reply_control_blk_length = preqcblk->rpl_msgbl; - pxcrb->reply_control_blk_addr = (void *) prepcblk; + pxcrb->reply_control_blk_addr = (void __user *) prepcblk; } /* @@ -1194,7 +1194,7 @@ static struct miscdevice pkey_dev = { /* * Module init */ -int __init pkey_init(void) +static int __init pkey_init(void) { cpacf_mask_t pckmo_functions; diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 93015f8..b1c27e2 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -821,8 +821,10 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, do { rc = zcrypt_rsa_modexpo(&mex); } while (rc == -EAGAIN); - if (rc) + if (rc) { + ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d", rc); return rc; + } return put_user(mex.outputdatalength, &umex->outputdatalength); } case ICARSACRT: { @@ -838,8 +840,10 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, do { rc = zcrypt_rsa_crt(&crt); } while (rc == -EAGAIN); - if (rc) + if (rc) { + ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d", rc); return rc; + } return put_user(crt.outputdatalength, &ucrt->outputdatalength); } case ZSECSENDCPRB: { @@ -855,6 +859,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, do { rc = zcrypt_send_cprb(&xcRB); } while (rc == -EAGAIN); + if (rc) + ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d", rc); if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB))) return -EFAULT; return rc; @@ -872,6 +878,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, do { rc = zcrypt_send_ep11_cprb(&xcrb); } while (rc == -EAGAIN); + if (rc) + ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d", rc); if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) return -EFAULT; return rc; diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index ca0cdbe..12cff62 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -48,26 +48,6 @@ struct cca_token_hdr { #define CCA_TKN_HDR_ID_EXT 0x1E -/** - * mapping for the cca private ME section - */ -struct cca_private_ext_ME_sec { - unsigned char section_identifier; - unsigned char version; - unsigned short section_length; - unsigned char private_key_hash[20]; - unsigned char reserved1[4]; - unsigned char key_format; - unsigned char reserved2; - unsigned char key_name_hash[20]; - unsigned char key_use_flags[4]; - unsigned char reserved3[6]; - unsigned char reserved4[24]; - unsigned char confounder[24]; - unsigned char exponent[128]; - unsigned char modulus[128]; -} __attribute__((packed)); - #define CCA_PVT_USAGE_ALL 0x80 /** @@ -124,77 +104,6 @@ struct cca_pvt_ext_CRT_sec { #define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40 /** - * Set up private key fields of a type6 MEX message. - * Note that all numerics in the key token are big-endian, - * while the entries in the key block header are little-endian. - * - * @mex: pointer to user input data - * @p: pointer to memory area for the key - * - * Returns the size of the key area or -EFAULT - */ -static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex, - void *p, int big_endian) -{ - static struct cca_token_hdr static_pvt_me_hdr = { - .token_identifier = 0x1E, - .token_length = 0x0183, - }; - static struct cca_private_ext_ME_sec static_pvt_me_sec = { - .section_identifier = 0x02, - .section_length = 0x016C, - .key_use_flags = {0x80,0x00,0x00,0x00}, - }; - static struct cca_public_sec static_pub_me_sec = { - .section_identifier = 0x04, - .section_length = 0x000F, - .exponent_len = 0x0003, - }; - static char pk_exponent[3] = { 0x01, 0x00, 0x01 }; - struct { - struct T6_keyBlock_hdr t6_hdr; - struct cca_token_hdr pvtMeHdr; - struct cca_private_ext_ME_sec pvtMeSec; - struct cca_public_sec pubMeSec; - char exponent[3]; - } __attribute__((packed)) *key = p; - unsigned char *temp; - - memset(key, 0, sizeof(*key)); - - if (big_endian) { - key->t6_hdr.blen = cpu_to_be16(0x189); - key->t6_hdr.ulen = cpu_to_be16(0x189 - 2); - } else { - key->t6_hdr.blen = cpu_to_le16(0x189); - key->t6_hdr.ulen = cpu_to_le16(0x189 - 2); - } - key->pvtMeHdr = static_pvt_me_hdr; - key->pvtMeSec = static_pvt_me_sec; - key->pubMeSec = static_pub_me_sec; - /* - * In a private key, the modulus doesn't appear in the public - * section. So, an arbitrary public exponent of 0x010001 will be - * used. - */ - memcpy(key->exponent, pk_exponent, 3); - - /* key parameter block */ - temp = key->pvtMeSec.exponent + - sizeof(key->pvtMeSec.exponent) - mex->inputdatalength; - if (copy_from_user(temp, mex->b_key, mex->inputdatalength)) - return -EFAULT; - - /* modulus */ - temp = key->pvtMeSec.modulus + - sizeof(key->pvtMeSec.modulus) - mex->inputdatalength; - if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength)) - return -EFAULT; - key->pubMeSec.modulus_bit_len = 8 * mex->inputdatalength; - return sizeof(*key); -} - -/** * Set up private key fields of a type6 MEX message. The _pad variant * strips leading zeroes from the b_key. * Note that all numerics in the key token are big-endian, @@ -205,8 +114,7 @@ static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex, * * Returns the size of the key area or -EFAULT */ -static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, - void *p, int big_endian) +static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p) { static struct cca_token_hdr static_pub_hdr = { .token_identifier = 0x1E, @@ -251,13 +159,8 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, 2*mex->inputdatalength - i; key->pubHdr.token_length = key->pubSec.section_length + sizeof(key->pubHdr); - if (big_endian) { - key->t6_hdr.ulen = cpu_to_be16(key->pubHdr.token_length + 4); - key->t6_hdr.blen = cpu_to_be16(key->pubHdr.token_length + 6); - } else { - key->t6_hdr.ulen = cpu_to_le16(key->pubHdr.token_length + 4); - key->t6_hdr.blen = cpu_to_le16(key->pubHdr.token_length + 6); - } + key->t6_hdr.ulen = key->pubHdr.token_length + 4; + key->t6_hdr.blen = key->pubHdr.token_length + 6; return sizeof(*key) + 2*mex->inputdatalength - i; } @@ -271,8 +174,7 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, * * Returns the size of the key area or -EFAULT */ -static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, - void *p, int big_endian) +static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p) { static struct cca_public_sec static_cca_pub_sec = { .section_identifier = 4, @@ -298,13 +200,8 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, size = sizeof(*key) + key_len + sizeof(*pub) + 3; /* parameter block.key block */ - if (big_endian) { - key->t6_hdr.blen = cpu_to_be16(size); - key->t6_hdr.ulen = cpu_to_be16(size - 2); - } else { - key->t6_hdr.blen = cpu_to_le16(size); - key->t6_hdr.ulen = cpu_to_le16(size - 2); - } + key->t6_hdr.blen = size; + key->t6_hdr.ulen = size - 2; /* key token header */ key->token.token_identifier = CCA_TKN_HDR_ID_EXT; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index e5563ff..4fddb43 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -291,7 +291,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, return -EFAULT; /* Set up key which is located after the variable length text. */ - size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength, 1); + size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength); if (size < 0) return size; size += sizeof(*msg) + mex->inputdatalength; @@ -353,7 +353,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, return -EFAULT; /* Set up key which is located after the variable length text. */ - size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 1); + size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength); if (size < 0) return size; size += sizeof(*msg) + crt->inputdatalength; /* total size of msg */ diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 574da15..b8d5ea0 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -44,7 +44,7 @@ config HVC_RTAS config HVC_IUCV bool "z/VM IUCV Hypervisor console support (VM only)" - depends on S390 + depends on S390 && NET select HVC_DRIVER select IUCV default y |