diff options
73 files changed, 2800 insertions, 2632 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3be9c83..683af90 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -254,12 +254,12 @@ config MARCH_ZEC12 older machines. config MARCH_Z13 - bool "IBM z13" + bool "IBM z13s and z13" select HAVE_MARCH_Z13_FEATURES help - Select this to enable optimizations for IBM z13 (2964 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z13s and z13 (2965 and + 2964 series). The kernel will be slightly faster but will not work on + older machines. endchoice diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index a0e71a5..5687d62 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -4,14 +4,23 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +#define CLP_LPS_BASE 0 +#define CLP_LPS_PCI 2 + struct clp_req_hdr { u16 len; u16 cmd; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; struct clp_rsp_hdr { u16 len; u16 rsp; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; /* CLP Response Codes */ @@ -25,4 +34,22 @@ struct clp_rsp_hdr { #define CLP_RC_NODATA 0x0080 /* No data available */ #define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ +/* Store logical-processor characteristics request */ +struct clp_req_slpc { + struct clp_req_hdr hdr; +} __packed; + +struct clp_rsp_slpc { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[8]; + u32 lpic[8]; +} __packed; + +struct clp_req_rsp_slpc { + struct clp_req_slpc request; + struct clp_rsp_slpc response; +} __packed; + #endif diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h new file mode 100644 index 0000000..d054c1b --- /dev/null +++ b/arch/s390/include/asm/gmap.h @@ -0,0 +1,64 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_S390_GMAP_H +#define _ASM_S390_GMAP_H + +/** + * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space + * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct list_head crst_list; + struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; + unsigned long *table; + unsigned long asce; + unsigned long asce_end; + void *private; + bool pfault_enabled; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); +unsigned long gmap_translate(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); +void gmap_discard(struct gmap *, unsigned long from, unsigned long to); +void __gmap_zap(struct gmap *, unsigned long gaddr); +void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); + +#endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c873e68..f833082d 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -45,7 +45,7 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(16); +} __packed __aligned(64); enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -66,7 +66,6 @@ struct s390_domain; /* Private data per function */ struct zpci_dev { - struct pci_dev *pdev; struct pci_bus *bus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ @@ -192,7 +191,7 @@ int zpci_fmb_disable_device(struct zpci_dev *); /* Debug */ int zpci_debug_init(void); void zpci_debug_exit(void); -void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); void zpci_debug_info(struct zpci_dev *, struct seq_file *); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index dd78f92..e75c64cb 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -49,9 +49,6 @@ struct clp_fh_list_entry { /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u64 reserved2; } __packed; @@ -59,9 +56,6 @@ struct clp_req_list_pci { /* List PCI functions response */ struct clp_rsp_list_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u32 reserved2; u16 max_fn; @@ -73,9 +67,6 @@ struct clp_rsp_list_pci { /* Query PCI function request */ struct clp_req_query_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved2; u64 reserved3; @@ -84,9 +75,6 @@ struct clp_req_query_pci { /* Query PCI function response */ struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 : 64; u16 vfn; /* virtual fn number */ u16 : 7; u16 util_str_avail : 1; /* utility string available? */ @@ -108,21 +96,15 @@ struct clp_rsp_query_pci { /* Query PCI function group request */ struct clp_req_query_pci_grp { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; - u32 : 24; + u32 reserved2 : 24; u32 pfgid : 8; /* function group id */ - u32 reserved2; - u64 reserved3; + u32 reserved3; + u64 reserved4; } __packed; /* Query PCI function group response */ struct clp_rsp_query_pci_grp { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; @@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp { /* Set PCI function request */ struct clp_req_set_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u16 reserved2; u8 oc; /* operation controls */ @@ -154,9 +133,6 @@ struct clp_req_set_pci { /* Set PCI function response */ struct clp_rsp_set_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved3; u64 reserved4; diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 6d6556c..90240df 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -178,7 +178,6 @@ ret__; \ }) -#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double #include <asm-generic/percpu.h> diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index f897ec7..1f7ff85 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -21,7 +21,7 @@ #define PMU_F_ERR_LSDA 0x0200 #define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); extern ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d7cc79f..9b3d9b6 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); extern int page_table_allocate_pgste; -int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 64ead80..2f66645 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr) /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * dy..R...I...wr + * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 * prot-none, dirty, old 10..1...1...00 * prot-none, dirty, young 11..1...1...00 - * read-only, clean, old 00..1...1...01 - * read-only, clean, young 01..1...0...01 - * read-only, dirty, old 10..1...1...01 - * read-only, dirty, young 11..1...0...01 + * read-only, clean, old 00..1...1...10 + * read-only, clean, young 01..1...0...10 + * read-only, dirty, old 10..1...1...10 + * read-only, dirty, young 11..1...0...10 * read-write, clean, old 00..1...1...11 * read-write, clean, young 01..1...0...11 * read-write, dirty, old 10..0...1...11 @@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -extern int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty); - -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return pmd; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long new = 0; -#ifdef CONFIG_PGSTE - unsigned long old; - - preempt_disable(); - asm( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear PCL bit in old */ - " oihh %1,0x0080\n" /* set PCL bit in new */ - " csg %0,%1,%2\n" - " jl 0b\n" - : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); -#endif - return __pgste(new); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - asm( - " nihh %1,0xff7f\n" /* clear PCL bit */ - " stg %1,%0\n" - : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) - : "cc", "memory"); - preempt_enable(); -#endif -} - -static inline pgste_t pgste_get(pte_t *ptep) -{ - unsigned long pgste = 0; -#ifdef CONFIG_PGSTE - pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); -#endif - return __pgste(pgste); -} - -static inline void pgste_set(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; -#endif -} - -static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address, bits, skey; - - if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) - return pgste; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); - bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; -#endif - return pgste; - -} - -static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address; - unsigned long nkey; - - if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) - return; - VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); - address = pte_val(entry) & PAGE_MASK; - /* - * Set page access key and fetch protection bit from pgste. - * The guest C/R information is still in the PGSTE, set real - * key C/R to 0. - */ - nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; - nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; - page_set_storage_key(address, nkey, 0); -#endif -} - -static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) -{ - if ((pte_val(entry) & _PAGE_PRESENT) && - (pte_val(entry) & _PAGE_WRITE) && - !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { - /* - * Without enhanced suppression-on-protection force - * the dirty bit on for all writable ptes. - */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; - } - if (!(pte_val(entry) & _PAGE_PROTECT)) - /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; - } - *ptep = entry; - return pgste; -} - -/** - * struct gmap_struct - guest address space - * @crst_list: list of all crst tables used in the guest address space - * @mm: pointer to the parent mm_struct - * @guest_to_host: radix tree with guest to host address translation - * @host_to_guest: radix tree with pointer to segment table entries - * @guest_table_lock: spinlock to protect all entries in the guest page table - * @table: pointer to the page directory - * @asce: address space control element for gmap page table - * @pfault_enabled: defines if pfaults are applicable for the guest - */ -struct gmap { - struct list_head list; - struct list_head crst_list; - struct mm_struct *mm; - struct radix_tree_root guest_to_host; - struct radix_tree_root host_to_guest; - spinlock_t guest_table_lock; - unsigned long *table; - unsigned long asce; - unsigned long asce_end; - void *private; - bool pfault_enabled; -}; - -/** - * struct gmap_notifier - notify function block for page invalidation - * @notifier_call: address of callback function - */ -struct gmap_notifier { - struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); -}; - -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); -void gmap_free(struct gmap *gmap); -void gmap_enable(struct gmap *gmap); -void gmap_disable(struct gmap *gmap); -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len); -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); -unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); -void __gmap_zap(struct gmap *, unsigned long gaddr); -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); - - -void gmap_register_ipte_notifier(struct gmap_notifier *); -void gmap_unregister_ipte_notifier(struct gmap_notifier *); -int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); - -static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, addr, ptep); - } -#endif - return pgste; -} - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; - pgste_set_key(ptep, pgste, entry, mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else { - *ptep = entry; - } -} - /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) } while (nr != 255); } -static inline void ptep_flush_direct(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte_local(address, ptep); - else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - -static inline void ptep_flush_lazy(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pte_val(*ptep) |= _PAGE_INVALID; - mm->context.flush_mm = 1; - } else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - /* - * Get (and clear) the user dirty bit for a pte. + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. */ -static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - int dirty; - - if (!mm_has_pgste(mm)) - return 0; - pgste = pgste_get_lock(ptep); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; - pte = *ptep; - if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; - } - pgste_set_unlock(ptep, pgste); - return dirty; -} +pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t); +pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte, oldpte; - int young; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); - } - - oldpte = pte = *ptep; - ptep_flush_direct(vma->vm_mm, addr, ptep); - young = pte_young(pte); - pte = pte_mkold(pte); - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; + pte_t pte = *ptep; - return young; + pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return pte_young(pte); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } -/* - * This is hard to understand. ptep_get_and_clear and ptep_clear_flush - * both clear the TLB for the unmapped pte. The reason is that - * ptep_get_and_clear is used in common code (e.g. change_pte_range) - * to modify an active pte. The sequence is - * 1) ptep_get_and_clear - * 2) set_pte_at - * 3) flush_tlb_range - * On s390 the tlb needs to get flushed with the modification of the pte - * if the pte is active. The only way how this can be implemented is to - * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range - * is a nop. - */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION -static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, - unsigned long address, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set(ptep, pgste); - } - return pte; -} - -static inline void ptep_modify_prot_commit(struct mm_struct *mm, - unsigned long address, - pte_t *ptep, pte_t pte) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get(ptep); - pgste_set_key(ptep, pgste, pte, mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; -} +pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *); +void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_direct(vma->vm_mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(vma->vm_mm)) { - if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == - _PGSTE_GPS_USAGE_UNUSED) - pte_val(pte) |= _PAGE_UNUSED; - pgste = pgste_update_all(&pte, pgste, vma->vm_mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); } /* @@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pte_t *ptep, int full) { - pgste_t pgste; - pte_t pte; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - if (!full) - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); + if (full) { + pte_t pte = *ptep; + *ptep = __pte(_PAGE_INVALID); + return pte; } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - pgste_t pgste; pte_t pte = *ptep; - if (pte_write(pte)) { - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - ptep_flush_lazy(mm, address, ptep); - pte = pte_wrprotect(pte); - - if (mm_has_pgste(mm)) { - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; - } - return pte; + if (pte_write(pte)) + ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, + unsigned long addr, pte_t *ptep, pte_t entry, int dirty) { - pgste_t pgste; - pte_t oldpte; - - oldpte = *ptep; - if (pte_same(oldpte, entry)) + if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } + ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); + return 1; +} - ptep_flush_direct(vma->vm_mm, address, ptep); +/* + * Additional functions to handle KVM guest page tables + */ +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry); +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , int reset); +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, bool nq); +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - if (mm_has_pgste(vma->vm_mm)) { - if (pte_val(oldpte) & _PAGE_INVALID) - pgste_set_key(ptep, pgste, entry, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + if (mm_has_pgste(mm)) + ptep_set_pte_at(mm, addr, ptep, entry); + else *ptep = entry; - return 1; } /* @@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } -static inline void pmdp_flush_direct(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) -{ - int active, count; +pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return; - } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte_local(address, pmdp); - else - __pmdp_idte(address, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); -} +#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void pmdp_flush_lazy(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PGTABLE_DEPOSIT +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + pmd_t entry, int dirty) { - int active, count; + VM_BUG_ON(addr & ~HPAGE_MASK); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; - mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(address, pmdp); - else - __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); + if (pmd_val(*pmdp) == pmd_val(entry)) + return 0; + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry); + return 1; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); + pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd)); + return pmd_young(pmd); +} -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + VM_BUG_ON(addr & ~HPAGE_MASK); + return pmdp_test_and_clear_young(vma, addr, pmdp); +} static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) @@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return pmd; } -#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - pmd_t pmd; - - pmd = *pmdp; - pmdp_flush_direct(vma->vm_mm, address, pmdp); - *pmdp = pmd_mkold(pmd); - return pmd_young(pmd); -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; - - pmdp_flush_direct(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pmd_t *pmdp, int full) { - pmd_t pmd = *pmdp; - - if (!full) - pmdp_flush_lazy(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + if (full) { + pmd_t pmd = *pmdp; + *pmdp = __pmd(_SEGMENT_ENTRY_INVALID); + return pmd; + } + return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE static inline void pmdp_invalidate(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmdp_flush_direct(vma->vm_mm, address, pmdp); + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t pmd = *pmdp; - if (pmd_write(pmd)) { - pmdp_flush_direct(mm, address, pmdp); - set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); - } + if (pmd_write(pmd)) + pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd)); } static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 1c4fe12..d6fd22e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -184,6 +184,10 @@ struct task_struct; struct mm_struct; struct seq_file; +typedef int (*dump_trace_func_t)(void *data, unsigned long address); +void dump_trace(dump_trace_func_t func, void *data, + struct task_struct *task, unsigned long sp); + void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ @@ -203,6 +207,14 @@ unsigned long get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("la %0,0(15)" : "=a" (sp)); + return sp; +} + static inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 4b43ee7..fead491 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -31,7 +31,7 @@ * This should be totally fair - if anything is waiting, a process that wants a * lock will go to the back of the queue. When the currently active lock is * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consequtive readers at the + * that will be woken up; if there's a bunch of consecutive readers at the * front, then they'll all be woken up, but no other readers will be. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6983722..c0f0efb 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -101,6 +101,8 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); + extern void cmma_init(void); extern void (*_machine_restart)(char *command); diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index c82eb12..c988df7 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h @@ -1 +1,20 @@ -#include <asm-generic/xor.h> +/* + * Optimited xor routines + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#ifndef _ASM_S390_XOR_H +#define _ASM_S390_XOR_H + +extern struct xor_block_template xor_block_xc; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_xc); \ +} while (0) + +#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) + +#endif /* _ASM_S390_XOR_H */ diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h new file mode 100644 index 0000000..ab72d9d --- /dev/null +++ b/arch/s390/include/uapi/asm/clp.h @@ -0,0 +1,28 @@ +/* + * ioctl interface for /dev/clp + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_CLP_H +#define _ASM_CLP_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +struct clp_req { + unsigned int c : 1; + unsigned int r : 1; + unsigned int lps : 6; + unsigned int cmd : 8; + unsigned int : 16; + unsigned int reserved; + __u64 data_p; +}; + +#define CLP_IOCTL_MAGIC 'c' + +#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req) + +#endif diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 53bbc9e..1f95cc1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> +#include <asm/gmap.h> /* * Make sure that the compiler is new enough. We want a compiler that diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 7f76891..7f48e56 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - pr_warning("The cpcmd kernel function failed to " - "allocate a response buffer\n"); + pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c890a55..aa12de7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - pr_warning("Root becomes the owner of all s390dbf files " - "in sysfs\n"); + pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); mutex_lock(&debug_mutex); @@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - pr_warning("%s is not a valid level for a debug " - "feature\n", str); + pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 62973ef..8cb9bfd 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); - /* - * Use four '%' characters below because of the - * following two conversions: - * - * 1) sprintf: %%%%r -> %%r - * 2) printk : %%r -> %r - */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%%%r%i", value); + ptr += sprintf(ptr, "%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%%%f%i", value); + ptr += sprintf(ptr, "%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%%%a%i", value); + ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%%%c%i", value); + ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%%%v%i", value); + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 02bd02f..2150b01 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -19,28 +19,28 @@ #include <asm/ipl.h> /* - * For show_trace we have tree different stack to consider: + * For dump_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown * - the asynchronous interrupt stack (cpu related) * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially + * The stack trace can start at any of the three stacks and can potentially * touch all of them. The order is: panic stack, async stack, sync stack. */ static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +__dump_trace(dump_trace_func_t func, void *data, unsigned long sp, + unsigned long low, unsigned long high) { struct stack_frame *sf; struct pt_regs *regs; - unsigned long addr; while (1) { if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { + if (func(data, sf->gprs[8])) + return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -48,46 +48,58 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - addr = regs->psw.addr; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + if (!user_mode(regs)) { + if (func(data, regs->psw.addr)) + return sp; + } low = sp; sp = regs->gprs[15]; } } -static void show_trace(struct task_struct *task, unsigned long *stack) +void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, + unsigned long sp) { - const unsigned long frame_size = - STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - register unsigned long __r15 asm ("15"); - unsigned long sp; + unsigned long frame_size; - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.panic_stack + frame_size - 4096, S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); else - __show_trace(sp, S390_lowcore.thread_info, + __dump_trace(func, data, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); +} +EXPORT_SYMBOL_GPL(dump_trace); + +static int show_address(void *data, unsigned long address) +{ + printk("([<%016lx>] %pSR)\n", address, (void *)address); + return 0; +} + +static void show_trace(struct task_struct *task, unsigned long sp) +{ + if (!sp) + sp = task ? task->thread.ksp : current_stack_pointer(); + printk("Call Trace:\n"); + dump_trace(show_address, NULL, task, sp); if (!task) task = current; debug_show_held_locks(task); @@ -95,15 +107,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack) void show_stack(struct task_struct *task, unsigned long *sp) { - register unsigned long *__r15 asm ("15"); unsigned long *stack; int i; - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - + stack = sp; + if (!stack) { + if (!task) + stack = (unsigned long *)current_stack_pointer(); + else + stack = (unsigned long *)task->thread.ksp; + } for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; @@ -112,7 +125,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) printk("%016lx ", *stack++); } printk("\n"); - show_trace(task, sp); + show_trace(task, (unsigned long)sp); } static void show_last_breaking_event(struct pt_regs *regs) @@ -121,13 +134,9 @@ static void show_last_breaking_event(struct pt_regs *regs) printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); } -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - void show_registers(struct pt_regs *regs) { + struct psw_bits *psw = &psw_bits(regs->psw); char *mode; mode = user_mode(regs) ? "User" : "Krnl"; @@ -136,13 +145,9 @@ void show_registers(struct pt_regs *regs) printk(" (%pSR)", (void *)regs->psw.addr); printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); + "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, + psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + printk(" RI:%x EA:%x", psw->ri, psw->eaba); printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", @@ -160,7 +165,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_trace(NULL, regs->gprs[15]); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cd5a191..2d47f9c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -186,6 +186,7 @@ ENTRY(__switch_to) stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next + /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task @@ -1199,114 +1200,12 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 - clg %r9,BASED(.Lcleanup_save_fpu_regs_done) - jhe 5f - clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) - jhe 4f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) - jhe 2f - clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) - jhe 1f - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread -0: # Store floating-point controls - stfpc __THREAD_FPU_fpc(%r2) -1: # Load register save area and check if VX is active - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz 4f # no VX -> store FP regs -2: # Store vector registers (V0-V15) - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) -3: # Store vector registers (V16-V31) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j 5f # -> done, set CIF_FPU flag -4: # Store floating-point registers - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -5: # Set CIF_FPU flag - oi __LC_CPU_FLAGS+7,_CIF_FPU - lg %r9,48(%r11) # return from save_fpu_regs + larl %r9,save_fpu_regs br %r14 -.Lcleanup_save_fpu_fpc_end: - .quad .Lsave_fpu_regs_fpc_end -.Lcleanup_save_fpu_regs_vx_low: - .quad .Lsave_fpu_regs_vx_low -.Lcleanup_save_fpu_regs_vx_high: - .quad .Lsave_fpu_regs_vx_high -.Lcleanup_save_fpu_regs_fp: - .quad .Lsave_fpu_regs_fp -.Lcleanup_save_fpu_regs_done: - .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 - clg %r9,BASED(.Lcleanup_load_fpu_regs_done) - jhe 1f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) - jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 4f - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 2f # -> no VX, load FP regs -4: # Load V0 ..V15 registers - VLM %v0,%v15,0,%r4 -3: # Load V16..V31 registers - VLM %v16,%v31,256,%r4 - j 1f -2: # Load floating-point registers - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -1: # Clear CIF_FPU bit - ni __LC_CPU_FLAGS+7,255-_CIF_FPU - lg %r9,48(%r11) # return from load_fpu_regs + larl %r9,load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx: - .quad .Lload_fpu_regs_vx -.Lcleanup_load_fpu_regs_vx_high: - .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp: - .quad .Lload_fpu_regs_fp -.Lcleanup_load_fpu_regs_done: - .quad .Lload_fpu_regs_done /* * Integer constants diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f41d520..c373a1d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -164,8 +164,7 @@ void do_softirq_own_stack(void) { unsigned long old, new; - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); + old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147..58bf457 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Validate the counter that is assigned to this event. * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicity + * same time without constraints, it is not necessary to explicitly * validate event groups (event->group_leader != event). */ err = validate_event(hwc); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3d8da1e..1a43474 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) /* * A non-zero guest program parameter indicates a guest * sample. - * Note that some early samples might be misaccounted to - * the host. + * Note that some early samples or samples from guests without + * lpp usage would be misaccounted to the host. We use the asn + * value as a heuristic to detect most of these guest samples. + * If the value differs from the host hpp value, we assume + * it to be a KVM guest. */ - if (sfr->basic.gpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0943b11..c3e4099 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,67 +222,23 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -/* See also arch/s390/kernel/traps.c */ -static unsigned long __store_trace(struct perf_callchain_entry *entry, - unsigned long sp, - unsigned long low, unsigned long high) +static int __perf_callchain_kernel(void *data, unsigned long address) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } + struct perf_callchain_entry *entry = data; + + perf_callchain_store(entry, address); + return 0; } void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - unsigned long head, frame_size; - struct stack_frame *head_sf; - if (user_mode(regs)) return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame *) head; - - if (!head_sf || !head_sf->back_chain) - return; - - head = head_sf->back_chain; - head = __store_trace(entry, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __store_trace(entry, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]); } -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index cedb019..d3f9688 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -327,6 +327,7 @@ static void __init setup_lowcore(void) + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, @@ -779,6 +780,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "zEC12"); break; case 0x2964: + case 0x2965: strcpy(elf_platform, "z13"); break; } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 8f64ebd..44f84b2 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,78 +10,39 @@ #include <linux/kallsyms.h> #include <linux/module.h> -static unsigned long save_context_stack(struct stack_trace *trace, - unsigned long sp, - unsigned long low, - unsigned long high, - int savesched) +static int __save_address(void *data, unsigned long address, int nosched) { - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long addr; + struct stack_trace *trace = data; - while(1) { - if (sp < low || sp > high) - return sp; - sf = (struct stack_frame *)sp; - while(1) { - addr = sf->gprs[8]; - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *)sp; - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long)(sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *)sp; - addr = regs->psw.addr; - if (savesched || !in_sched_functions(addr)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - } - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = regs->gprs[15]; + if (nosched && in_sched_functions(address)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; } + if (trace->nr_entries < trace->max_entries) { + trace->entries[trace->nr_entries++] = address; + return 0; + } + return 1; } -static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) +static int save_address(void *data, unsigned long address) { - unsigned long new_sp, frame_size; + return __save_address(data, address, 0); +} - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - new_sp = save_context_stack(trace, sp, - S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size, 1); - new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size, 1); - save_context_stack(trace, new_sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 1); +static int save_address_nosched(void *data, unsigned long address) +{ + return __save_address(data, address, 1); } void save_stack_trace(struct stack_trace *trace) { - register unsigned long r15 asm ("15"); unsigned long sp; - sp = r15; - __save_stack_trace(trace, sp); + sp = current_stack_pointer(); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - unsigned long sp, low, high; + unsigned long sp; sp = tsk->thread.ksp; - if (tsk == current) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (sp)); - } - low = (unsigned long) task_stack_page(tsk); - high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 0); + if (tsk == current) + sp = current_stack_pointer(); + dump_trace(save_address_nosched, trace, tsk, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) unsigned long sp; sp = kernel_stack_pointer(regs); - __save_stack_trace(trace, sp); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 99f84ac31..c4e5f18 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -499,8 +499,7 @@ static void etr_reset(void) if (etr_port0_online && etr_port1_online) set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { - pr_warning("The real or virtual hardware system does " - "not provide an ETR interface\n"); + pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); etr_port0_online = etr_port1_online = 0; } } @@ -1464,8 +1463,7 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warning("The real or virtual hardware system does " - "not provide an STP interface\n"); + pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = 0; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 017eb03d..dd97a3e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -22,8 +22,6 @@ #include <asm/fpu/api.h> #include "entry.h" -int show_unhandled_signals = 1; - static inline void __user *get_trap_ip(struct pt_regs *regs) { unsigned long address; @@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) return (void __user *) (address - (regs->int_code >> 16)); } -static inline void report_user_fault(struct pt_regs *regs, int signr) -{ - if ((task_pid_nr(current) > 1) && !show_unhandled_signals) - return; - if (!unhandled_signal(current, signr)) - return; - if (!printk_ratelimit()) - return; - printk("User process fault: interruption code %04x ilc:%d ", - regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr("in ", regs->psw.addr); - printk("\n"); - show_regs(regs); -} - int is_valid_bugaddr(unsigned long addr) { return 1; @@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) info.si_code = si_code; info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); - report_user_fault(regs, si_signo); + report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr); @@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap); void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 0); do_exit(SIGSEGV); } else die(regs, "Unknown program exception"); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 05f7de9..1ea4095 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -14,6 +14,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d697312..e8c6843 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -17,7 +17,7 @@ /* * Extends the address range given by *start and *stop to include the address * range starting with estart and the length len. Takes care of overflowing - * intervals and tries to minimize the overall intervall size. + * intervals and tries to minimize the overall interval size. */ static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) { @@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu) return; /* - * If the guest is not interrested in branching events, we can savely + * If the guest is not interested in branching events, we can safely * limit them to the PER address range. */ if (!(*cr9 & PER_EVENT_BRANCH)) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 704809d..84efc2b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -23,6 +23,7 @@ #include <asm/uaccess.h> #include <asm/sclp.h> #include <asm/isc.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e196582..668c087 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <asm/lowcore.h> #include <asm/etr.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> #include <asm/isc.h> @@ -281,7 +282,7 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (gmap_test_and_clear_dirty(address, gmap)) + if (test_and_clear_guest_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f218ccf0..0a1591d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -23,6 +23,7 @@ #include <asm/sysinfo.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 0e8fefe..1d1af31 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o -obj-y += mem.o +obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c new file mode 100644 index 0000000..7d94e3e --- /dev/null +++ b/arch/s390/lib/xor.c @@ -0,0 +1,134 @@ +/* + * Optimized xor_block operation for RAID4/5 + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/raid/xor.h> + +static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + "3:\n" + : : "d" (bytes), "a" (p1), "a" (p2) + : "0", "1", "cc", "memory"); +} + +static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + /* Get around a gcc oddity */ + register unsigned long *reg7 asm ("7") = p5; + + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " xc 0(256,%1),0(%5)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " la %5,256(%5)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " ex %0,18(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + " xc 0(1,%1),0(%5)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + "+a" (reg7) + : : "0", "1", "cc", "memory"); +} + +struct xor_block_template xor_block_xc = { + .name = "xc", + .do_2 = xor_xc_2, + .do_3 = xor_xc_3, + .do_4 = xor_xc_4, + .do_5 = xor_xc_5, +}; +EXPORT_SYMBOL(xor_block_xc); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 839592c..2ae54ca 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,9 +2,11 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o +obj-y += pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PGSTE) += gmap.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index a1bf4ad..02042b6 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg) goto out_free; } if (diag_cc > 1) { - pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); + pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } @@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_resource; } if (diag_cc > 1) { - pr_warning("Loading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); @@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } if (atomic_read (&seg->ref_count) != 1) { - pr_warning("DCSS %s is in use and cannot be reloaded\n", - name); + pr_warn("DCSS %s is in use and cannot be reloaded\n", name); rc = -EAGAIN; goto out_unlock; } @@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared) seg->res->flags |= IORESOURCE_READONLY; if (request_resource(&iomem_resource, seg->res)) { - pr_warning("DCSS %s overlaps with used memory resources " - "and cannot be reloaded\n", name); + pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n", + name); rc = -EBUSY; kfree(seg->res); goto out_del_mem; @@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared) goto out_del_res; } if (diag_cc > 1) { - pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Reloading DCSS %s failed with rc=%ld\n", + name, end_addr); rc = dcss_diag_translate_rc(end_addr); goto out_del_res; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 791a414..cce577f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include <asm/asm-offsets.h> #include <asm/diag.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/facility.h> @@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs) { unsigned long asce; + pr_alert("Failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); pr_alert("Fault in "); switch (regs->int_parm_long & 3) { case 3: @@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs) dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); } -static inline void report_user_fault(struct pt_regs *regs, long signr) +int show_unhandled_signals = 1; + +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); - dump_fault_info(regs); + if (is_mm_fault) + dump_fault_info(regs); show_regs(regs); } @@ -244,7 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 1); si.si_signo = SIGSEGV; si.si_code = si_code; si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); @@ -272,8 +276,6 @@ static noinline void do_no_context(struct pt_regs *regs) else printk(KERN_ALERT "Unable to handle kernel paging request" " in virtual user address space\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c new file mode 100644 index 0000000..69247b4 --- /dev/null +++ b/arch/s390/mm/gmap.c @@ -0,0 +1,774 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/swapops.h> +#include <linux/ksm.h> +#include <linux/mman.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * @limit: maximum size of the gmap address space + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + unsigned long etype, atype; + + if (limit < (1UL << 31)) { + limit = (1UL << 31) - 1; + atype = _ASCE_TYPE_SEGMENT; + etype = _SEGMENT_ENTRY_EMPTY; + } else if (limit < (1UL << 42)) { + limit = (1UL << 42) - 1; + atype = _ASCE_TYPE_REGION3; + etype = _REGION3_ENTRY_EMPTY; + } else if (limit < (1UL << 53)) { + limit = (1UL << 53) - 1; + atype = _ASCE_TYPE_REGION2; + etype = _REGION2_ENTRY_EMPTY; + } else { + limit = -1UL; + atype = _ASCE_TYPE_REGION1; + etype = _REGION1_ENTRY_EMPTY; + } + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + goto out_free; + page->index = 0; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, etype); + gmap->table = table; + gmap->asce = atype | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + gmap->asce_end = limit; + down_write(&mm->mmap_sem); + list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); +} + +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) + __free_pages(page, 2); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); + list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) + __free_pages(page, 2); + return 0; +} + +/** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset, mask; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @to: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_map_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * @len: length of the memory area to map + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len < from || to + len < to || + from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) { + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; + } + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + if (off >= len) + return 0; + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/** + * __gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(gmap, gaddr); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +{ + struct mm_struct *mm; + unsigned long *table; + spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + int rc; + + /* Create higher level tables in the gmap page table */ + table = gmap->table; + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { + table += (gaddr >> 53) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { + table += (gaddr >> 42) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { + table += (gaddr >> 31) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + table += (gaddr >> 20) & 0x7ff; + /* Walk the parent mm page table */ + mm = gmap->mm; + pgd = pgd_offset(mm, vmaddr); + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* Link gmap segment table entry location to page table. */ + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; +} + +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + */ +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + unsigned long vmaddr; + int rc; + bool unlocked; + + down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { + rc = -EFAULT; + goto out_up; + } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + spinlock_t *ptl; + pte_t *ptep; + + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (vmaddr) { + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); + if (likely(ptep)) + ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); + pte_unmap_unlock(ptep, ptl); + } +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +{ + unsigned long gaddr, vmaddr, size; + struct vm_area_struct *vma; + + down_read(&gmap->mm->mmap_sem); + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + continue; + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); + size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); + zap_page_range(vma, vmaddr, size, NULL); + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep; + bool unlocked; + int rc = 0; + + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + unlocked = false; + /* Convert gmap address and connect the page tables */ + addr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { + rc = -EFAULT; + break; + } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + VM_BUG_ON(!ptep); + /* Set notification bit in the pgste of the pte */ + if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + ptep_set_notify(gmap->mm, addr, ptep); + gaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + pte_unmap_unlock(ptep, ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * ptep_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +{ + unsigned long offset, gaddr; + unsigned long *table; + struct gmap_notifier *nb; + struct gmap *gmap; + + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); + spin_lock(&gmap_notifier_lock); + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(gmap, gaddr); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(ptep_notify); + +static inline void thp_split_mm(struct mm_struct *mm) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct vm_area_struct *vma; + unsigned long addr; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (addr = vma->vm_start; + addr < vma->vm_end; + addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +#endif +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct mm_struct *mm = current->mm; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(mm)) + return 0; + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; + down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + up_write(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +static int __s390_enable_skey(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) + ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); + /* Clear storage key */ + ptep_zap_key(walk->mm, addr, pte); + return 0; +} + +int s390_enable_skey(void) +{ + struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; + + down_write(&mm->mmap_sem); + if (mm_use_skey(mm)) + goto out_up; + + mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + +out_up: + up_write(&mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + +/* + * Reset CMMA state, make all pages stable again. + */ +static int __s390_reset_cmma(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + ptep_zap_unused(walk->mm, addr, pte, 1); + return 0; +} + +void s390_reset_cmma(struct mm_struct *mm) +{ + struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; + + down_write(&mm->mmap_sem); + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(s390_reset_cmma); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f81096b..1b5e898 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pmd_t *pmdp = (pmd_t *) ptep; - pte_t pte = huge_ptep_get(ptep); + pmd_t old; - pmdp_flush_direct(mm, addr, pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; - return pte; + old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return __pmd_to_pte(old); } pte_t *huge_pte_alloc(struct mm_struct *mm, diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 749c984..f2a5c29 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr) static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { - pte_t *ptep, pte; + pte_t *ptep; int i; for (i = 0; i < numpages; i++) { ptep = walk_page_table(addr); if (WARN_ON_ONCE(!ptep)) break; - pte = *ptep; - pte = set(pte); - __ptep_ipte(addr, ptep); - *ptep = pte; + *ptep = set(*ptep); addr += PAGE_SIZE; } + __tlb_flush_kernel(); } int set_memory_ro(unsigned long addr, int numpages) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c new file mode 100644 index 0000000..f6c3de2 --- /dev/null +++ b/arch/s390/mm/pgalloc.c @@ -0,0 +1,360 @@ +/* + * Page table allocation functions + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_PGSTE + +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + +#endif /* CONFIG_PGSTE */ + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, 2); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, 2); +} + +static void __crst_table_upgrade(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } + __tlb_flush_local(); +} + +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +{ + unsigned long *table, *pgd; + unsigned long entry; + int flush; + + BUG_ON(limit > TASK_MAX_SIZE); + flush = 0; +repeat: + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); + if (mm->context.asce_limit < limit) { + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit <= (1UL << 31)) { + entry = _REGION3_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + } else { + entry = _REGION2_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 53; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION2; + } + crst_table_init(table, entry); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; + table = NULL; + flush = 1; + } + spin_unlock_bh(&mm->page_table_lock); + if (table) + crst_table_free(mm, table); + if (mm->context.asce_limit < limit) + goto repeat; + if (flush) + on_each_cpu(__crst_table_upgrade, mm, 0); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +{ + pgd_t *pgd; + + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } + while (mm->context.asce_limit > limit) { + pgd = mm->pgd; + switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { + case _REGION_ENTRY_TYPE_R2: + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + break; + case _REGION_ENTRY_TYPE_R3: + mm->context.asce_limit = 1UL << 31; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_SEGMENT; + break; + default: + BUG(); + } + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + } + if (current->active_mm == mm) + set_user_asce(mm); +} + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm) +{ + unsigned long *table; + struct page *page; + unsigned int mask, bit; + + /* Try to get a fragment of a 4K page as a 2K page table */ + if (!mm_alloc_pgste(mm)) { + table = NULL; + spin_lock_bh(&mm->context.list_lock); + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + mask = atomic_read(&page->_mapcount); + mask = (mask | (mask >> 4)) & 3; + if (mask != 3) { + table = (unsigned long *) page_to_phys(page); + bit = mask & 1; /* =1 -> second 2K */ + if (bit) + table += PTRS_PER_PTE; + atomic_xor_bits(&page->_mapcount, 1U << bit); + list_del(&page->lru); + } + } + spin_unlock_bh(&mm->context.list_lock); + if (table) + return table; + } + /* Allocate a fresh page */ + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + /* Initialize page table */ + table = (unsigned long *) page_to_phys(page); + if (mm_alloc_pgste(mm)) { + /* Return 4K page table with PGSTEs */ + atomic_set(&page->_mapcount, 3); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } else { + /* Return the first 2K fragment of the page */ + atomic_set(&page->_mapcount, 1); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + } + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (!mm_alloc_pgste(mm)) { + /* Free 2K page table fragment of a 4K page */ + bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + if (mask & 3) + list_add(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (mask != 0) + return; + } + + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (mm_alloc_pgste(mm)) { + gmap_unlink(mm, table, vmaddr); + table = (unsigned long *) (__pa(table) | 3); + tlb_remove_table(tlb, table); + return; + } + bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + if (mask & 3) + list_add_tail(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (1U << bit)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + unsigned int mask = (unsigned long) _table & 3; + void *table = (void *)((unsigned long) _table ^ mask); + struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + + switch (mask) { + case 0: /* pmd or pud */ + free_pages((unsigned long) table, 2); + break; + case 1: /* lower 2K of a 4K page table */ + case 2: /* higher 2K of a 4K page table */ + if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + break; + /* fallthrough */ + case 3: /* 4K page table with pgstes */ + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + break; + } +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5109827..4324b87 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,591 +24,397 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -unsigned long *crst_table_alloc(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 2); - - if (!page) - return NULL; - return (unsigned long *) page_to_phys(page); +static inline pte_t ptep_flush_direct(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(addr, ptep); + else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static inline pte_t ptep_flush_lazy(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - free_pages((unsigned long) table, 2); + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -static void __crst_table_upgrade(void *arg) +static inline pgste_t pgste_get_lock(pte_t *ptep) { - struct mm_struct *mm = arg; + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; - if (current->active_mm == mm) { - clear_user_asce(); - set_user_asce(mm); - } - __tlb_flush_local(); + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); +#endif + return __pgste(new); } -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { - unsigned long *table, *pgd; - unsigned long entry; - int flush; - - BUG_ON(limit > TASK_MAX_SIZE); - flush = 0; -repeat: - table = crst_table_alloc(mm); - if (!table) - return -ENOMEM; - spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } - spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); + preempt_enable(); +#endif } -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_get(pte_t *ptep) { - pgd_t *pgd; - - if (current->active_mm == mm) { - clear_user_asce(); - __tlb_flush_mm(mm); - } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } - if (current->active_mm == mm) - set_user_asce(mm); + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); } +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ #ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} -/** - * gmap_alloc - allocate a guest address space - * @mm: pointer to the parent mm_struct - * @limit: maximum address of the gmap address space - * - * Returns a guest address space structure. - */ -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, + struct mm_struct *mm) { - struct gmap *gmap; - struct page *page; - unsigned long *table; - unsigned long etype, atype; - - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; - atype = _ASCE_TYPE_SEGMENT; - etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; - atype = _ASCE_TYPE_REGION3; - etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; - atype = _ASCE_TYPE_REGION2; - etype = _REGION2_ENTRY_EMPTY; - } else { - limit = -1UL; - atype = _ASCE_TYPE_REGION1; - etype = _REGION1_ENTRY_EMPTY; - } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); - if (!gmap) - goto out; - INIT_LIST_HEAD(&gmap->crst_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - spin_lock_init(&gmap->guest_table_lock); - gmap->mm = mm; - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); - crst_table_init(table, etype); - gmap->table = table; - gmap->asce = atype | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(table); - gmap->asce_end = limit; - down_write(&mm->mmap_sem); - list_add(&gmap->list, &mm->context.gmap_list); - up_write(&mm->mmap_sem); - return gmap; - -out_free: - kfree(gmap); -out: - return NULL; +#ifdef CONFIG_PGSTE + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) + return pgste; + address = pte_val(pte) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + } -EXPORT_SYMBOL_GPL(gmap_alloc); -static void gmap_flush_tlb(struct gmap *gmap) +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) { - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif } -static void gmap_radix_tree_free(struct radix_tree_root *root) +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { - struct radix_tree_iter iter; - unsigned long indices[16]; - unsigned long index; - void **slot; - int i, nr; - - /* A radix tree is freed by deleting all of its entries */ - index = 0; - do { - nr = 0; - radix_tree_for_each_slot(slot, root, &iter, index) { - indices[nr] = iter.index; - if (++nr == 16) - break; - } - for (i = 0; i < nr; i++) { - index = indices[i]; - radix_tree_delete(root, index); +#ifdef CONFIG_PGSTE + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } - } while (nr > 0); + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; + } +#endif + *ptep = entry; + return pgste; } -/** - * gmap_free - free a guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_free(struct gmap *gmap) +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) { - struct page *page, *next; - - /* Flush tlb. */ - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); - - /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); - gmap_radix_tree_free(&gmap->guest_to_host); - gmap_radix_tree_free(&gmap->host_to_guest); - down_write(&gmap->mm->mmap_sem); - list_del(&gmap->list); - up_write(&gmap->mm->mmap_sem); - kfree(gmap); +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + ptep_notify(mm, addr, ptep); + } +#endif + return pgste; } -EXPORT_SYMBOL_GPL(gmap_free); -/** - * gmap_enable - switch primary space to the guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_enable(struct gmap *gmap) +static inline pgste_t ptep_xchg_start(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - S390_lowcore.gmap = (unsigned long) gmap; + pgste_t pgste = __pgste(0); + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + } + return pgste; } -EXPORT_SYMBOL_GPL(gmap_enable); -/** - * gmap_disable - switch back to the standard primary address space - * @gmap: pointer to the guest address space structure - */ -void gmap_disable(struct gmap *gmap) +static inline void ptep_xchg_commit(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pgste_t pgste, pte_t old, pte_t new) { - S390_lowcore.gmap = 0UL; + if (mm_has_pgste(mm)) { + if (pte_val(old) & _PAGE_INVALID) + pgste_set_key(ptep, pgste, new, mm); + if (pte_val(new) & _PAGE_INVALID) { + pgste = pgste_update_all(old, pgste, mm); + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(old) |= _PAGE_UNUSED; + } + pgste = pgste_set_pte(ptep, pgste, new); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = new; + } } -EXPORT_SYMBOL_GPL(gmap_disable); -/* - * gmap_alloc_table is assumed to be called with mmap_sem held - */ -static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, - unsigned long init, unsigned long gaddr) +pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long *new; - - /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - return -ENOMEM; - new = (unsigned long *) page_to_phys(page); - crst_table_init(new, init); - spin_lock(&gmap->mm->page_table_lock); - if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | - (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; - page = NULL; - } - spin_unlock(&gmap->mm->page_table_lock); - if (page) - __free_pages(page, 2); - return 0; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_direct(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_direct); -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long offset, mask; - - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); - return page->index + offset; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_lazy); -/** - * __gmap_unlink_by_vmaddr - unlink a single segment via a host address - * @gmap: pointer to the guest address space structure - * @vmaddr: address in the host process address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - unsigned long *entry; - int flush = 0; - - spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_INVALID); - *entry = _SEGMENT_ENTRY_INVALID; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(old, pgste, mm); + pgste_set(ptep, pgste); } - spin_unlock(&gmap->guest_table_lock); - return flush; + return old; } +EXPORT_SYMBOL(ptep_modify_prot_start); -/** - * __gmap_unmap_by_gaddr - unmap a single segment via a guest address - * @gmap: pointer to the guest address space structure - * @gaddr: address in the guest address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - unsigned long vmaddr; + pgste_t pgste; - vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = pte; + } } +EXPORT_SYMBOL(ptep_modify_prot_commit); -/** - * gmap_unmap_segment - unmap segment from the guest address space - * @gmap: pointer to the guest address space structure - * @to: address in the guest address space - * @len: length of the memory area to unmap - * - * Returns 0 if the unmap succeeded, -EINVAL if not. - */ -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) { - unsigned long off; - int flush; - - if ((to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || to + len < to) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - return 0; + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return old; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(addr, pmdp); + else + __pmdp_idte(addr, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -EXPORT_SYMBOL_GPL(gmap_unmap_segment); - -/** - * gmap_mmap_segment - map a segment to the guest address space - * @gmap: pointer to the guest address space structure - * @from: source address in the parent address space - * @to: target address in the guest address space - * @len: length of the memory area to map - * - * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. - */ -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len) + +pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long off; - int flush; - - if ((from | to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) { - /* Remove old translation */ - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - /* Store new translation */ - if (radix_tree_insert(&gmap->guest_to_host, - (to + off) >> PMD_SHIFT, - (void *) from + off)) - break; - } - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - if (off >= len) - return 0; - gmap_unmap_segment(gmap, to, len); - return -ENOMEM; + pmd_t old; + + old = pmdp_flush_direct(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(gmap_map_segment); - -/** - * __gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_direct); + +pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long vmaddr; + pmd_t old; - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); - return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; + old = pmdp_flush_lazy(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(__gmap_translate); - -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_lazy); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { - unsigned long rc; + struct list_head *lh = (struct list_head *) pgtable; - down_read(&gmap->mm->mmap_sem); - rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); - return rc; + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } -EXPORT_SYMBOL_GPL(gmap_translate); -/** - * gmap_unlink - disconnect a page table from the gmap shadow tables - * @gmap: pointer to guest mapping meta data structure - * @table: pointer to the host page table - * @vmaddr: vm address associated with the host page table - */ -static void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - struct gmap *gmap; - int flush; + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); - if (flush) - gmap_flush_tlb(gmap); + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_INVALID; + ptep++; + pte_val(*ptep) = _PAGE_INVALID; + return pgtable; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/** - * gmap_link - set up shadow page tables to connect a host to a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @vmaddr: vm address - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +#ifdef CONFIG_PGSTE +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) { - struct mm_struct *mm; - unsigned long *table; - spinlock_t *ptl; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - int rc; - - /* Create higher level tables in the gmap page table */ - table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - table += (gaddr >> 20) & 0x7ff; - /* Walk the parent mm page table */ - mm = gmap->mm; - pgd = pgd_offset(mm, vmaddr); - VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); - VM_BUG_ON(pud_none(*pud)); - pmd = pmd_offset(pud, vmaddr); - VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) - return -EFAULT; - /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); - if (rc) - return rc; - ptl = pmd_lock(mm, pmd); - spin_lock(&gmap->guest_table_lock); - if (*table == _SEGMENT_ENTRY_INVALID) { - rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; - spin_unlock(&gmap->guest_table_lock); - spin_unlock(ptl); - radix_tree_preload_end(); - return rc; + pgste_t pgste; + + /* the mm_has_pgste() check is done in set_pte_at() */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); } -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long vmaddr; - int rc; - bool unlocked; - - down_read(&gmap->mm->mmap_sem); - -retry: - unlocked = false; - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) { - rc = vmaddr; - goto out_up; - } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, - &unlocked)) { - rc = -EFAULT; - goto out_up; - } - /* - * In the case that fixup_user_fault unlocked the mmap_sem during - * faultin redo __gmap_translate to not race with a map/unmap_segment. - */ - if (unlocked) - goto retry; + pgste_t pgste; - rc = __gmap_link(gmap, gaddr, vmaddr); -out_up: - up_read(&gmap->mm->mmap_sem); - return rc; + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_fault); -static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) dec_mm_counter(mm, MM_SWAPENTS); @@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) free_swap_and_cache(entry); } -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int reset) { - unsigned long vmaddr, ptev, pgstev; - pte_t *ptep, pte; - spinlock_t *ptl; + unsigned long pgstev; pgste_t pgste; + pte_t pte; - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - return; - vmaddr |= gaddr & ~PMD_MASK; - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (unlikely(!ptep)) - return; - pte = *ptep; - if (!pte_swap(pte)) - goto out_pte; /* Zap unused and logically-zero pages */ pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); - ptev = pte_val(pte); - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); - pte_clear(gmap->mm, vmaddr, ptep); - } + pte = *ptep; + if (pte_swap(pte) && + ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO))) { + ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + pte_clear(mm, addr, ptep); + } + if (reset) + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; pgste_set_unlock(ptep, pgste); -out_pte: - pte_unmap_unlock(ptep, ptl); } -EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - down_read(&gmap->mm->mmap_sem); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); - } - up_read(&gmap->mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(gmap_discard); - -static LIST_HEAD(gmap_notifier_list); -static DEFINE_SPINLOCK(gmap_notifier_lock); + unsigned long ptev; + pgste_t pgste; -/** - * gmap_register_ipte_notifier - register a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_register_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_add(&nb->list, &gmap_notifier_list); - spin_unlock(&gmap_notifier_lock); + /* Clear storage key */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + ptev = pte_val(*ptep); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); -/** - * gmap_unregister_ipte_notifier - remove a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_del_init(&nb->list); - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); - -/** - * gmap_ipte_notify - mark a range of ptes for invalidation notification - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * - * Returns 0 if for each page in the given range a gmap mapping exists and - * the invalidation notification could be set. If the gmap mapping is missing - * for one or more pages -EFAULT is returned. If no memory could be allocated - * -ENOMEM is returned. This function establishes missing page table entries. +/* + * Test and reset if a guest page is dirty */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { - unsigned long addr; spinlock_t *ptl; - pte_t *ptep, entry; pgste_t pgste; - bool unlocked; - int rc = 0; - - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) - return -EINVAL; - down_read(&gmap->mm->mmap_sem); - while (len) { - unlocked = false; - /* Convert gmap address and connect the page tables */ - addr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(addr)) { - rc = addr; - break; - } - /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - rc = -EFAULT; - break; - } - /* While trying to map mmap_sem got unlocked. Let us retry */ - if (unlocked) - continue; - rc = __gmap_link(gmap, gaddr, addr); - if (rc) - break; - /* Walk the process page table, lock and get pte pointer */ - ptep = get_locked_pte(gmap->mm, addr, &ptl); - VM_BUG_ON(!ptep); - /* Set notification bit in the pgste of the pte */ - entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; - pgste_set_unlock(ptep, pgste); - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; - } - pte_unmap_unlock(ptep, ptl); - } - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_ipte_notify); - -/** - * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. - * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space - * @pte: pointer to the page table entry - * - * This function is assumed to be called with the page table lock held - * for the pte to notify. - */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) -{ - unsigned long offset, gaddr; - unsigned long *table; - struct gmap_notifier *nb; - struct gmap *gmap; - - offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); - spin_lock(&gmap_notifier_lock); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (!table) - continue; - gaddr = __gmap_segment_gaddr(table) + offset; - list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(gmap, gaddr); + pte_t *ptep; + pte_t pte; + bool dirty; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return false; + + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; } - spin_unlock(&gmap_notifier_lock); + pgste_set_unlock(ptep, pgste); + + spin_unlock(ptl); + return dirty; } -EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); +EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq) + unsigned char key, bool nq) { + unsigned long keyul; spinlock_t *ptl; pgste_t old, new; pte_t *ptep; - bool unlocked; down_read(&mm->mmap_sem); -retry: - unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } - if (!(pte_val(*ptep) & _PAGE_INVALID) && - (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(ptep, ptl); - /* - * We do not really care about unlocked. We will retry either - * way. But this allows fixup_user_fault to enable userfaultfd. - */ - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - goto retry; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + keyul = (unsigned long) key; + pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long address, bits, skey; @@ -863,13 +543,12 @@ retry: } EXPORT_SYMBOL(set_guest_storage_key); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) { + unsigned char key; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; - uint64_t physaddr; - unsigned long key = 0; down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); @@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) pgste = pgste_get_lock(ptep); if (pte_val(*ptep) & _PAGE_INVALID) { - key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; } else { - physaddr = pte_val(*ptep) & PAGE_MASK; - key = page_get_storage_key(physaddr); + key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); /* Reflect guest's logical view, not physical */ if (pgste_val(pgste) & PGSTE_GR_BIT) @@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return key; } EXPORT_SYMBOL(get_guest_storage_key); - -static int page_table_allocate_pgste_min = 0; -static int page_table_allocate_pgste_max = 1; -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, - .extra1 = &page_table_allocate_pgste_min, - .extra2 = &page_table_allocate_pgste_max, - }, - { } -}; - -static struct ctl_table page_table_sysctl_dir[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = page_table_sysctl, - }, - { } -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#else /* CONFIG_PGSTE */ - -static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) -{ -} - -#endif /* CONFIG_PGSTE */ - -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - unsigned int old, new; - - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; -} - -/* - * page table entry allocation/free routines. - */ -unsigned long *page_table_alloc(struct mm_struct *mm) -{ - unsigned long *table; - struct page *page; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.list_lock); - if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - mask = atomic_read(&page->_mapcount); - mask = (mask | (mask >> 4)) & 3; - if (mask != 3) { - table = (unsigned long *) page_to_phys(page); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&page->_mapcount, 1U << bit); - list_del(&page->lru); - } - } - spin_unlock_bh(&mm->context.list_lock); - if (table) - return table; - } - /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - atomic_set(&page->_mapcount, 3); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); - } else { - /* Return the first 2K fragment of the page */ - atomic_set(&page->_mapcount, 1); - clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); - } - return table; -} - -void page_table_free(struct mm_struct *mm, unsigned long *table) -{ - struct page *page; - unsigned int bit, mask; - - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 1U << bit); - if (mask & 3) - list_add(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (mask != 0) - return; - } - - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); -} - -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) -{ - struct mm_struct *mm; - struct page *page; - unsigned int bit, mask; - - mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); - tlb_remove_table(tlb, table); - return; - } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); - if (mask & 3) - list_add_tail(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); - tlb_remove_table(tlb, table); -} - -static void __tlb_remove_table(void *_table) -{ - unsigned int mask = (unsigned long) _table & 3; - void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - - switch (mask) { - case 0: /* pmd or pud */ - free_pages((unsigned long) table, 2); - break; - case 1: /* lower 2K of a 4K page table */ - case 2: /* higher 2K of a 4K page table */ - if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) - break; - /* fallthrough */ - case 3: /* 4K page table with pgstes */ - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); - break; - } -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely - * on IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - tlb->mm->context.flush_mm = 1; - if (*batch == NULL) { - *batch = (struct mmu_table_batch *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - __tlb_flush_mm_lazy(tlb->mm); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_flush_mmu(tlb); -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void thp_split_vma(struct vm_area_struct *vma) -{ - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); -} - -static inline void thp_split_mm(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - thp_split_vma(vma); - vma->vm_flags &= ~VM_HUGEPAGE; - vma->vm_flags |= VM_NOHUGEPAGE; - } - mm->def_flags |= VM_NOHUGEPAGE; -} -#else -static inline void thp_split_mm(struct mm_struct *mm) -{ -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - -/* - * switch on pgstes for its userspace process (for kvm) - */ -int s390_enable_sie(void) -{ - struct mm_struct *mm = current->mm; - - /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(mm)) - return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; - down_write(&mm->mmap_sem); - mm->context.has_pgste = 1; - /* split thp mappings and disable thp for future mappings */ - thp_split_mm(mm); - up_write(&mm->mmap_sem); - return 0; -} -EXPORT_SYMBOL_GPL(s390_enable_sie); - -/* - * Enable storage key handling from now on and initialize the storage - * keys with the default key. - */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - unsigned long ptev; - pgste_t pgste; - - pgste = pgste_get_lock(pte); - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) { - ptep_flush_direct(walk->mm, addr, pte); - pte_val(*pte) = _PAGE_INVALID; - } - /* Clear storage key */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); - ptev = pte_val(*pte); - if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); - pgste_set_unlock(pte, pgste); - return 0; -} - -int s390_enable_skey(void) -{ - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int rc = 0; - - down_write(&mm->mmap_sem); - if (mm_use_skey(mm)) - goto out_up; - - mm->context.use_skey = 1; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags)) { - mm->context.use_skey = 0; - rc = -ENOMEM; - goto out_up; - } - } - mm->def_flags &= ~VM_MERGEABLE; - - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - -out_up: - up_write(&mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(s390_enable_skey); - -/* - * Reset CMMA state, make all pages stable again. - */ -static int __s390_reset_cmma(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgste_t pgste; - - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; - pgste_set_unlock(pte, pgste); - return 0; -} - -void s390_reset_cmma(struct mm_struct *mm) -{ - struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; - - down_write(&mm->mmap_sem); - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - up_write(&mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(s390_reset_cmma); - -/* - * Test and reset if a guest page is dirty - */ -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) -{ - pte_t *pte; - spinlock_t *ptl; - bool dirty = false; - - pte = get_locked_pte(gmap->mm, address, &ptl); - if (unlikely(!pte)) - return false; - - if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) - dirty = true; - - spin_unlock(ptl); - return dirty; -} -EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB */ - return pmdp_test_and_clear_young(vma, address, pmdp); -} - -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - entry = pmd_mkyoung(entry); - if (dirty) - entry = pmd_mkdirty(entry); - if (pmd_same(*pmdp, entry)) - return 0; - pmdp_invalidate(vma, address, pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, entry); - return 1; -} - -void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable) -{ - struct list_head *lh = (struct list_head *) pgtable; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; -} - -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) -{ - struct list_head *lh; - pgtable_t pgtable; - pte_t *ptep; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; - ptep++; - pte_val(*ptep) = _PAGE_INVALID; - return pgtable; -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 1bd2301..496e4a7 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c deleted file mode 100644 index 1884e17..0000000 --- a/arch/s390/oprofile/backtrace.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * S390 Version - * Copyright IBM Corp. 2005 - * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> - */ - -#include <linux/oprofile.h> - -#include <asm/processor.h> /* for struct stack_frame */ - -static unsigned long -__show_trace(unsigned int *depth, unsigned long sp, - unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (*depth) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - /* Follow the backchain. */ - while (*depth) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - } - - if (*depth == 0) - break; - - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } - return sp; -} - -void s390_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - unsigned long head, frame_size; - struct stack_frame* head_sf; - - if (user_mode(regs)) - return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame*)head; - - if (!head_sf->back_chain) - return; - - head = head_sf->back_chain; - - head = __show_trace(&depth, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __show_trace(&depth, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); -} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 9cfa2ff..791935a 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -20,8 +20,6 @@ #include "../../../drivers/oprofile/oprof.h" -extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); - #include "hwsampler.h" #include "op_counter.h" @@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; default: return -ENODEV; } } @@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void) hwsampler_shutdown(); } +static int __s390_backtrace(void *data, unsigned long address) +{ + unsigned int *depth = data; + + if (*depth == 0) + return 1; + (*depth)--; + oprofile_add_trace(address); + return 0; +} + +static void s390_backtrace(struct pt_regs *regs, unsigned int depth) +{ + if (user_mode(regs)) + return; + dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]); +} + int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8f19c8f..9fd59a7 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -637,11 +637,9 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { - struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; - zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; zpci_map_resources(pdev); @@ -664,8 +662,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = to_zpci(pdev); - zdev->pdev = pdev; - zpci_debug_init_device(zdev); + zpci_debug_init_device(zdev, dev_name(&pdev->dev)); zpci_fmb_enable_device(zdev); return pci_enable_resources(pdev, mask); @@ -677,7 +674,6 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); - zdev->pdev = NULL; } #ifdef CONFIG_HIBERNATE_CALLBACKS @@ -864,8 +860,11 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), - 16, 0, NULL); + __alignof__(struct zpci_fmb), 0, NULL); if (!zdev_fmb_cache) goto error_fmb; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d6e411e..21591dd 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -8,13 +8,19 @@ #define KMSG_COMPONENT "zpci" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/compat.h> #include <linux/kernel.h> +#include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/delay.h> #include <linux/pci.h> +#include <linux/uaccess.h> #include <asm/pci_debug.h> #include <asm/pci_clp.h> +#include <asm/compat.h> +#include <asm/clp.h> +#include <uapi/asm/clp.h> static inline void zpci_err_clp(unsigned int rsp, int rc) { @@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc) } /* - * Call Logical Processor - * Retry logic is handled by the caller. + * Call Logical Processor with c=1, lps=0 and command 1 + * to get the bit mask of installed logical processors */ -static inline u8 clp_instr(void *data) +static inline int clp_get_ilp(unsigned long *ilp) +{ + unsigned long mask; + int cc = 3; + + asm volatile ( + " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1) + : "cc"); + *ilp = mask; + return cc; +} + +/* + * Call Logical Processor with c=0, the give constant lps and an lpcb request. + */ +static inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; - u8 cc; + int cc = 3; asm volatile ( - " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" - " ipm %[cc]\n" + " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) - : [req] "a" (req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req), [lps] "i" (lps) : "cc"); return cc; } @@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.pfgid = pfgid; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) clp_store_query_pci_fngrp(zdev, &rrb->response); else { @@ -143,7 +171,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.fh = fh; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { rc = clp_store_query_pci_fn(zdev, &rrb->response); if (rc) @@ -214,7 +242,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.oc = command; rrb->request.ndas = nr_dma_as; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { retries--; if (retries < 0) @@ -280,7 +308,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, rrb->request.resume_token = resume_token; /* Get PCI function handle list */ - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("List PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); @@ -391,3 +419,198 @@ int clp_rescan_pci_devices_simple(void) clp_free_block(rrb); return rc; } + +static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0; +} + +static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_base_slpc(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query(struct clp_req *req, + struct clp_req_rsp_query_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query_grp(struct clp_req *req, + struct clp_req_rsp_query_pci_grp *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 || + lpcb->request.reserved4 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_pci_slpc(req, (void *) lpcb); + case 0x0002: /* list PCI functions */ + return clp_pci_list(req, (void *) lpcb); + case 0x0003: /* query PCI function */ + return clp_pci_query(req, (void *) lpcb); + case 0x0004: /* query PCI function group */ + return clp_pci_query_grp(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_normal_command(struct clp_req *req) +{ + struct clp_req_hdr *lpcb; + void __user *uptr; + int rc; + + rc = -EINVAL; + if (req->lps != 0 && req->lps != 2) + goto out; + + rc = -ENOMEM; + lpcb = clp_alloc_block(GFP_KERNEL); + if (!lpcb) + goto out; + + rc = -EFAULT; + uptr = (void __force __user *)(unsigned long) req->data_p; + if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0) + goto out_free; + + rc = -EINVAL; + if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0) + goto out_free; + + switch (req->lps) { + case 0: + rc = clp_base_command(req, lpcb); + break; + case 2: + rc = clp_pci_command(req, lpcb); + break; + } + if (rc) + goto out_free; + + rc = -EFAULT; + if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0) + goto out_free; + + rc = 0; + +out_free: + clp_free_block(lpcb); +out: + return rc; +} + +static int clp_immediate_command(struct clp_req *req) +{ + void __user *uptr; + unsigned long ilp; + int exists; + + if (req->cmd > 1 || clp_get_ilp(&ilp) != 0) + return -EINVAL; + + uptr = (void __force __user *)(unsigned long) req->data_p; + if (req->cmd == 0) { + /* Command code 0: test for a specific processor */ + exists = test_bit_inv(req->lps, &ilp); + return put_user(exists, (int __user *) uptr); + } + /* Command code 1: return bit mask of installed processors */ + return put_user(ilp, (unsigned long __user *) uptr); +} + +static long clp_misc_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct clp_req req; + void __user *argp; + + if (cmd != CLP_SYNC) + return -EINVAL; + + argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg; + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + if (req.r != 0) + return -EINVAL; + return req.c ? clp_immediate_command(&req) : clp_normal_command(&req); +} + +static int clp_misc_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static const struct file_operations clp_misc_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .release = clp_misc_release, + .unlocked_ioctl = clp_misc_ioctl, + .compat_ioctl = clp_misc_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice clp_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "clp", + .fops = &clp_misc_fops, +}; + +static int __init clp_misc_init(void) +{ + return misc_register(&clp_misc_device); +} + +device_initcall(clp_misc_init); diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 4129b0a..c555de3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -void zpci_debug_init_device(struct zpci_dev *zdev) +void zpci_debug_init_device(struct zpci_dev *zdev, const char *name) { - zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), - debugfs_root); + zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root); if (IS_ERR(zdev->debugfs_dev)) zdev->debugfs_dev = NULL; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4638b93..a06ce80 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table) dma_free_cpu_table(table); } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, +static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } -static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +static unsigned long dma_alloc_iommu(struct device *dev, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long offset, flags; int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { /* wrap-around */ - offset = __dma_alloc_iommu(zdev, 0, size); + offset = __dma_alloc_iommu(dev, 0, size); wrap = 1; } @@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) return offset; } -static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +static void dma_free_iommu(struct device *dev, unsigned long offset, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long flags; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); @@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + iommu_page_index = dma_alloc_iommu(dev, nr_pages); if (iommu_page_index == -1) { ret = -ENOSPC; goto out_err; @@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, return dma_addr + (offset & ~PAGE_MASK); out_free: - dma_free_iommu(zdev, iommu_page_index, nr_pages); + dma_free_iommu(dev, iommu_page_index, nr_pages); out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); @@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - dma_free_iommu(zdev, iommu_page_index, npages); + dma_free_iommu(dev, iommu_page_index, npages); } static void *s390_dma_alloc(struct device *dev, size_t size, diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b0e0475..fb2a9a5 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -46,11 +46,14 @@ struct zpci_ccdf_avail { static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; zpci_err("error CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) return; pdev->error_state = pci_channel_io_perm_failure; + pci_dev_put(pdev); } void zpci_event_error(void *data) @@ -69,9 +73,12 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; int ret; + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); @@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (pdev) + pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index eb5efae..50b8b7d 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -93,13 +93,17 @@ out_deconfigure: static int disable_slot(struct hotplug_slot *hotplug_slot) { struct slot *slot = hotplug_slot->private; + struct pci_dev *pdev; int rc; if (!zpci_fn_configured(slot->zdev->state)) return -EIO; - if (slot->zdev->pdev) - pci_stop_and_remove_bus_device_locked(slot->zdev->pdev); + pdev = pci_get_slot(slot->zdev->bus, ZPCI_DEVFN); + if (pdev) { + pci_stop_and_remove_bus_device_locked(pdev); + pci_dev_put(pdev); + } rc = zpci_disable_device(slot->zdev); if (rc) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 286782c..17ad574 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -185,14 +185,12 @@ static void _free_lcu(struct alias_lcu *lcu) */ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; struct alias_server *server, *newserver; struct alias_lcu *lcu, *newlcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; - device->discipline->get_uid(device, &uid); spin_lock_irqsave(&aliastree.lock, flags); server = _find_server(&uid); @@ -244,14 +242,13 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) */ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; struct alias_lcu *lcu; struct alias_server *server; int was_pending; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; /* nothing to do if already disconnected */ if (!lcu) @@ -316,25 +313,15 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, struct dasd_device *pos) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_pav_group *group; struct dasd_uid uid; - unsigned long flags; - - private = (struct dasd_eckd_private *) device->private; - /* only lock if not already locked */ - if (device != pos) - spin_lock_irqsave_nested(get_ccwdev_lock(device->cdev), flags, - CDEV_NESTED_SECOND); private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type; private->uid.base_unit_addr = lcu->uac->unit[private->uid.real_unit_addr].base_ua; uid = private->uid; - if (device != pos) - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - /* if we have no PAV anyway, we don't need to bother with PAV groups */ if (lcu->pav == NO_PAV) { list_move(&device->alias_list, &lcu->active_devices); @@ -370,10 +357,9 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, static void _remove_device_from_lcu(struct alias_lcu *lcu, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_pav_group *group; - private = (struct dasd_eckd_private *) device->private; list_move(&device->alias_list, &lcu->inactive_devices); group = private->pavgroup; if (!group) @@ -411,6 +397,130 @@ suborder_not_supported(struct dasd_ccw_req *cqr) return 0; } +/* + * This function tries to lock all devices on an lcu via trylock + * return NULL on success otherwise return first failed device + */ +static struct dasd_device *_trylock_all_devices_on_lcu(struct alias_lcu *lcu, + struct dasd_device *pos) + +{ + struct alias_pav_group *pavgroup; + struct dasd_device *device; + + list_for_each_entry(device, &lcu->active_devices, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(pavgroup, &lcu->grouplist, group) { + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + } + return NULL; +} + +/* + * unlock all devices except the one that is specified as pos + * stop if enddev is specified and reached + */ +static void _unlock_all_devices_on_lcu(struct alias_lcu *lcu, + struct dasd_device *pos, + struct dasd_device *enddev) + +{ + struct alias_pav_group *pavgroup; + struct dasd_device *device; + + list_for_each_entry(device, &lcu->active_devices, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(pavgroup, &lcu->grouplist, group) { + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + } +} + +/* + * this function is needed because the locking order + * device lock -> lcu lock + * needs to be assured when iterating over devices in an LCU + * + * if a device is specified in pos then the device lock is already hold + */ +static void _trylock_and_lock_lcu_irqsave(struct alias_lcu *lcu, + struct dasd_device *pos, + unsigned long *flags) +{ + struct dasd_device *failed; + + do { + spin_lock_irqsave(&lcu->lock, *flags); + failed = _trylock_all_devices_on_lcu(lcu, pos); + if (failed) { + _unlock_all_devices_on_lcu(lcu, pos, failed); + spin_unlock_irqrestore(&lcu->lock, *flags); + cpu_relax(); + } + } while (failed); +} + +static void _trylock_and_lock_lcu(struct alias_lcu *lcu, + struct dasd_device *pos) +{ + struct dasd_device *failed; + + do { + spin_lock(&lcu->lock); + failed = _trylock_all_devices_on_lcu(lcu, pos); + if (failed) { + _unlock_all_devices_on_lcu(lcu, pos, failed); + spin_unlock(&lcu->lock); + cpu_relax(); + } + } while (failed); +} + static int read_unit_address_configuration(struct dasd_device *device, struct alias_lcu *lcu) { @@ -487,13 +597,13 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) list_for_each_entry_safe(device, tempdev, &pavgroup->baselist, alias_list) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } list_for_each_entry_safe(device, tempdev, &pavgroup->aliaslist, alias_list) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } list_del(&pavgroup->group); @@ -505,10 +615,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) if (rc) return rc; - /* need to take cdev lock before lcu lock */ - spin_lock_irqsave_nested(get_ccwdev_lock(refdev->cdev), flags, - CDEV_NESTED_FIRST); - spin_lock(&lcu->lock); + _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -527,8 +634,8 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } - spin_unlock(&lcu->lock); - spin_unlock_irqrestore(get_ccwdev_lock(refdev->cdev), flags); + _unlock_all_devices_on_lcu(lcu, NULL, NULL); + spin_unlock_irqrestore(&lcu->lock, flags); return 0; } @@ -608,16 +715,13 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, int dasd_alias_add_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_lcu *lcu; unsigned long flags; int rc; - private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; rc = 0; - - /* need to take cdev lock before lcu lock */ spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); spin_lock(&lcu->lock); if (!(lcu->flags & UPDATE_PENDING)) { @@ -636,20 +740,18 @@ int dasd_alias_add_device(struct dasd_device *device) int dasd_alias_update_add_device(struct dasd_device *device) { - struct dasd_eckd_private *private; - private = (struct dasd_eckd_private *) device->private; + struct dasd_eckd_private *private = device->private; + private->lcu->flags |= UPDATE_PENDING; return dasd_alias_add_device(device); } int dasd_alias_remove_device(struct dasd_device *device) { - struct dasd_eckd_private *private; - struct alias_lcu *lcu; + struct dasd_eckd_private *private = device->private; + struct alias_lcu *lcu = private->lcu; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; - lcu = private->lcu; /* nothing to do if already removed */ if (!lcu) return 0; @@ -661,16 +763,12 @@ int dasd_alias_remove_device(struct dasd_device *device) struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) { - + struct dasd_eckd_private *alias_priv, *private = base_device->private; + struct alias_pav_group *group = private->pavgroup; + struct alias_lcu *lcu = private->lcu; struct dasd_device *alias_device; - struct alias_pav_group *group; - struct alias_lcu *lcu; - struct dasd_eckd_private *private, *alias_priv; unsigned long flags; - private = (struct dasd_eckd_private *) base_device->private; - group = private->pavgroup; - lcu = private->lcu; if (!group || !lcu) return NULL; if (lcu->pav == NO_PAV || @@ -706,7 +804,7 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) group->next = list_first_entry(&alias_device->alias_list, struct dasd_device, alias_list); spin_unlock_irqrestore(&lcu->lock, flags); - alias_priv = (struct dasd_eckd_private *) alias_device->private; + alias_priv = alias_device->private; if ((alias_priv->count < private->count) && !alias_device->stopped && !test_bit(DASD_FLAG_OFFLINE, &alias_device->flags)) return alias_device; @@ -754,30 +852,19 @@ static void _restart_all_base_devices_on_lcu(struct alias_lcu *lcu) struct alias_pav_group *pavgroup; struct dasd_device *device; struct dasd_eckd_private *private; - unsigned long flags; /* active and inactive list can contain alias as well as base devices */ list_for_each_entry(device, &lcu->active_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (private->uid.type != UA_BASE_DEVICE) { - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); + private = device->private; + if (private->uid.type != UA_BASE_DEVICE) continue; - } - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (private->uid.type != UA_BASE_DEVICE) { - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); + private = device->private; + if (private->uid.type != UA_BASE_DEVICE) continue; - } - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } @@ -812,7 +899,7 @@ static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu) spin_lock_irqsave(&lcu->lock, flags); list_for_each_entry_safe(device, temp, &lcu->active_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; + private = device->private; if (private->uid.type == UA_BASE_DEVICE) continue; list_move(&device->alias_list, &active); @@ -834,45 +921,27 @@ static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu) if (device == list_first_entry(&active, struct dasd_device, alias_list)) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } } spin_unlock_irqrestore(&lcu->lock, flags); } -static void __stop_device_on_lcu(struct dasd_device *device, - struct dasd_device *pos) -{ - /* If pos == device then device is already locked! */ - if (pos == device) { - dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); - return; - } - spin_lock(get_ccwdev_lock(pos->cdev)); - dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); - spin_unlock(get_ccwdev_lock(pos->cdev)); -} - -/* - * This function is called in interrupt context, so the - * cdev lock for device is already locked! - */ -static void _stop_all_devices_on_lcu(struct alias_lcu *lcu, - struct dasd_device *device) +static void _stop_all_devices_on_lcu(struct alias_lcu *lcu) { struct alias_pav_group *pavgroup; - struct dasd_device *pos; + struct dasd_device *device; - list_for_each_entry(pos, &lcu->active_devices, alias_list) - __stop_device_on_lcu(device, pos); - list_for_each_entry(pos, &lcu->inactive_devices, alias_list) - __stop_device_on_lcu(device, pos); + list_for_each_entry(device, &lcu->active_devices, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + list_for_each_entry(device, &lcu->inactive_devices, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(pos, &pavgroup->baselist, alias_list) - __stop_device_on_lcu(device, pos); - list_for_each_entry(pos, &pavgroup->aliaslist, alias_list) - __stop_device_on_lcu(device, pos); + list_for_each_entry(device, &pavgroup->baselist, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); } } @@ -880,33 +949,16 @@ static void _unstop_all_devices_on_lcu(struct alias_lcu *lcu) { struct alias_pav_group *pavgroup; struct dasd_device *device; - unsigned long flags; - list_for_each_entry(device, &lcu->active_devices, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &lcu->active_devices, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - } - - list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &lcu->inactive_devices, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - } - list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &pavgroup->baselist, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); - } - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); - } } } @@ -932,13 +984,14 @@ static void summary_unit_check_handling_work(struct work_struct *work) spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); reset_summary_unit_check(lcu, device, suc_data->reason); - spin_lock_irqsave(&lcu->lock, flags); + _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); _unstop_all_devices_on_lcu(lcu); _restart_all_base_devices_on_lcu(lcu); /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; dasd_put_device(device); + _unlock_all_devices_on_lcu(lcu, NULL, NULL); spin_unlock_irqrestore(&lcu->lock, flags); } @@ -948,13 +1001,11 @@ static void summary_unit_check_handling_work(struct work_struct *work) void dasd_alias_handle_summary_unit_check(struct dasd_device *device, struct irb *irb) { + struct dasd_eckd_private *private = device->private; struct alias_lcu *lcu; char reason; - struct dasd_eckd_private *private; char *sense; - private = (struct dasd_eckd_private *) device->private; - sense = dasd_get_sense(irb); if (sense) { reason = sense[8]; @@ -974,10 +1025,7 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, " unit check (no lcu structure)"); return; } - spin_lock(&lcu->lock); - _stop_all_devices_on_lcu(lcu, device); - /* prepare for lcu_update */ - private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; + _trylock_and_lock_lcu(lcu, device); /* If this device is about to be removed just return and wait for * the next interrupt on a different device */ @@ -985,6 +1033,7 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, DBF_DEV_EVENT(DBF_WARNING, device, "%s", "device is in offline processing," " don't do summary unit check handling"); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); return; } @@ -993,12 +1042,17 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, DBF_DEV_EVENT(DBF_WARNING, device, "%s", "previous instance of summary unit check worker" " still pending"); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); return ; } + _stop_all_devices_on_lcu(lcu); + /* prepare for lcu_update */ + private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; lcu->suc_data.reason = reason; lcu->suc_data.device = device; dasd_get_device(device); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); if (!schedule_work(&lcu->suc_data.worker)) dasd_put_device(device); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 8286f74..2f18f61 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -214,8 +214,8 @@ dasd_feature_list(char *str, char **endp) else if (len == 8 && !strncmp(str, "failfast", 8)) features |= DASD_FEATURE_FAILFAST; else { - pr_warning("%*s is not a supported device option\n", - len, str); + pr_warn("%*s is not a supported device option\n", + len, str); rc = -EINVAL; } str += len; @@ -224,8 +224,7 @@ dasd_feature_list(char *str, char **endp) str++; } if (*str != ')') { - pr_warning("A closing parenthesis ')' is missing in the " - "dasd= parameter\n"); + pr_warn("A closing parenthesis ')' is missing in the dasd= parameter\n"); rc = -EINVAL; } else str++; @@ -348,8 +347,7 @@ dasd_parse_range( char *parsestring ) { return str + 1; if (*str == '\0') return str; - pr_warning("The dasd= parameter value %s has an invalid ending\n", - str); + pr_warn("The dasd= parameter value %s has an invalid ending\n", str); return ERR_PTR(-EINVAL); } diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 277b5c8..5667146 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -104,12 +104,10 @@ static inline int mdsk_init_io(struct dasd_device *device, unsigned int blocksize, blocknum_t offset, blocknum_t *end_block) { - struct dasd_diag_private *private; - struct dasd_diag_init_io *iib; + struct dasd_diag_private *private = device->private; + struct dasd_diag_init_io *iib = &private->iib; int rc; - private = (struct dasd_diag_private *) device->private; - iib = &private->iib; memset(iib, 0, sizeof (struct dasd_diag_init_io)); iib->dev_nr = private->dev_id.devno; @@ -130,12 +128,10 @@ mdsk_init_io(struct dasd_device *device, unsigned int blocksize, static inline int mdsk_term_io(struct dasd_device * device) { - struct dasd_diag_private *private; - struct dasd_diag_init_io *iib; + struct dasd_diag_private *private = device->private; + struct dasd_diag_init_io *iib = &private->iib; int rc; - private = (struct dasd_diag_private *) device->private; - iib = &private->iib; memset(iib, 0, sizeof (struct dasd_diag_init_io)); iib->dev_nr = private->dev_id.devno; rc = dia250(iib, TERM_BIO); @@ -153,14 +149,13 @@ dasd_diag_erp(struct dasd_device *device) rc = mdsk_init_io(device, device->block->bp_block, 0, NULL); if (rc == 4) { if (!(test_and_set_bit(DASD_FLAG_DEVICE_RO, &device->flags))) - pr_warning("%s: The access mode of a DIAG device " - "changed to read-only\n", - dev_name(&device->cdev->dev)); + pr_warn("%s: The access mode of a DIAG device changed to read-only\n", + dev_name(&device->cdev->dev)); rc = 0; } if (rc) - pr_warning("%s: DIAG ERP failed with " - "rc=%d\n", dev_name(&device->cdev->dev), rc); + pr_warn("%s: DIAG ERP failed with rc=%d\n", + dev_name(&device->cdev->dev), rc); } /* Start a given request at the device. Return zero on success, non-zero @@ -180,8 +175,8 @@ dasd_start_diag(struct dasd_ccw_req * cqr) cqr->status = DASD_CQR_ERROR; return -EIO; } - private = (struct dasd_diag_private *) device->private; - dreq = (struct dasd_diag_req *) cqr->data; + private = device->private; + dreq = cqr->data; private->iob.dev_nr = private->dev_id.devno; private->iob.key = 0; @@ -320,18 +315,17 @@ static void dasd_ext_handler(struct ext_code ext_code, static int dasd_diag_check_device(struct dasd_device *device) { - struct dasd_block *block; - struct dasd_diag_private *private; + struct dasd_diag_private *private = device->private; struct dasd_diag_characteristics *rdc_data; - struct dasd_diag_bio bio; struct vtoc_cms_label *label; - blocknum_t end_block; + struct dasd_block *block; + struct dasd_diag_bio bio; unsigned int sb, bsize; + blocknum_t end_block; int rc; - private = (struct dasd_diag_private *) device->private; if (private == NULL) { - private = kzalloc(sizeof(struct dasd_diag_private),GFP_KERNEL); + private = kzalloc(sizeof(*private), GFP_KERNEL); if (private == NULL) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Allocating memory for private DASD data " @@ -339,7 +333,7 @@ dasd_diag_check_device(struct dasd_device *device) return -ENOMEM; } ccw_device_get_id(device->cdev, &private->dev_id); - device->private = (void *) private; + device->private = private; } block = dasd_alloc_block(); if (IS_ERR(block)) { @@ -353,7 +347,7 @@ dasd_diag_check_device(struct dasd_device *device) block->base = device; /* Read Device Characteristics */ - rdc_data = (void *) &(private->rdc_data); + rdc_data = &private->rdc_data; rdc_data->dev_nr = private->dev_id.devno; rdc_data->rdc_len = sizeof (struct dasd_diag_characteristics); @@ -377,9 +371,9 @@ dasd_diag_check_device(struct dasd_device *device) private->pt_block = 2; break; default: - pr_warning("%s: Device type %d is not supported " - "in DIAG mode\n", dev_name(&device->cdev->dev), - private->rdc_data.vdev_class); + pr_warn("%s: Device type %d is not supported in DIAG mode\n", + dev_name(&device->cdev->dev), + private->rdc_data.vdev_class); rc = -EOPNOTSUPP; goto out; } @@ -420,8 +414,8 @@ dasd_diag_check_device(struct dasd_device *device) private->iob.flaga = DASD_DIAG_FLAGA_DEFAULT; rc = dia250(&private->iob, RW_BIO); if (rc == 3) { - pr_warning("%s: A 64-bit DIAG call failed\n", - dev_name(&device->cdev->dev)); + pr_warn("%s: A 64-bit DIAG call failed\n", + dev_name(&device->cdev->dev)); rc = -EOPNOTSUPP; goto out_label; } @@ -430,9 +424,8 @@ dasd_diag_check_device(struct dasd_device *device) break; } if (bsize > PAGE_SIZE) { - pr_warning("%s: Accessing the DASD failed because of an " - "incorrect format (rc=%d)\n", - dev_name(&device->cdev->dev), rc); + pr_warn("%s: Accessing the DASD failed because of an incorrect format (rc=%d)\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; goto out_label; } @@ -450,8 +443,8 @@ dasd_diag_check_device(struct dasd_device *device) block->s2b_shift++; rc = mdsk_init_io(device, block->bp_block, 0, NULL); if (rc && (rc != 4)) { - pr_warning("%s: DIAG initialization failed with rc=%d\n", - dev_name(&device->cdev->dev), rc); + pr_warn("%s: DIAG initialization failed with rc=%d\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; } else { if (rc == 4) @@ -601,16 +594,14 @@ static int dasd_diag_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { - struct dasd_diag_private *private; + struct dasd_diag_private *private = device->private; - private = (struct dasd_diag_private *) device->private; info->label_block = (unsigned int) private->pt_block; info->FBA_layout = 1; info->format = DASD_FORMAT_LDL; - info->characteristics_size = sizeof (struct dasd_diag_characteristics); - memcpy(info->characteristics, - &((struct dasd_diag_private *) device->private)->rdc_data, - sizeof (struct dasd_diag_characteristics)); + info->characteristics_size = sizeof(private->rdc_data); + memcpy(info->characteristics, &private->rdc_data, + sizeof(private->rdc_data)); info->confdata_size = 0; return 0; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9083247..75c032d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -212,10 +212,9 @@ check_XRC (struct ccw1 *de_ccw, struct DE_eckd_data *data, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc; - private = (struct dasd_eckd_private *) device->private; if (!private->rdc_data.facilities.XRC_supported) return 0; @@ -237,13 +236,11 @@ static int define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk, unsigned int totrk, int cmd, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; u32 begcyl, endcyl; u16 heads, beghead, endhead; int rc = 0; - private = (struct dasd_eckd_private *) device->private; - ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT; ccw->flags = 0; ccw->count = 16; @@ -322,10 +319,9 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk, static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc; - private = (struct dasd_eckd_private *) device->private; if (!private->rdc_data.facilities.XRC_supported) return 0; @@ -346,12 +342,10 @@ static void fill_LRE_data(struct LRE_eckd_data *data, unsigned int trk, struct dasd_device *device, unsigned int reclen, unsigned int tlf) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int sector; int dn, d; - private = (struct dasd_eckd_private *) device->private; - memset(data, 0, sizeof(*data)); sector = 0; if (rec_on_trk) { @@ -488,8 +482,8 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, u16 heads, beghead, endhead; int rc = 0; - basepriv = (struct dasd_eckd_private *) basedev->private; - startpriv = (struct dasd_eckd_private *) startdev->private; + basepriv = basedev->private; + startpriv = startdev->private; dedata = &pfxdata->define_extent; lredata = &pfxdata->locate_record; @@ -631,12 +625,10 @@ locate_record(struct ccw1 *ccw, struct LO_eckd_data *data, unsigned int trk, unsigned int rec_on_trk, int no_rec, int cmd, struct dasd_device * device, int reclen) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int sector; int dn, d; - private = (struct dasd_eckd_private *) device->private; - DBF_DEV_EVENT(DBF_INFO, device, "Locate: trk %d, rec %d, no_rec %d, cmd %d, reclen %d", trk, rec_on_trk, no_rec, cmd, reclen); @@ -800,10 +792,9 @@ static void create_uid(struct dasd_eckd_private *private) */ static int dasd_eckd_generate_uid(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; if (!private) return -ENODEV; if (!private->ned || !private->gneq) @@ -816,11 +807,10 @@ static int dasd_eckd_generate_uid(struct dasd_device *device) static int dasd_eckd_get_uid(struct dasd_device *device, struct dasd_uid *uid) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; - if (device->private) { - private = (struct dasd_eckd_private *)device->private; + if (private) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); *uid = private->uid; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); @@ -1034,10 +1024,9 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) static void dasd_eckd_clear_conf_data(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int i; - private = (struct dasd_eckd_private *) device->private; private->conf_data = NULL; private->conf_len = 0; for (i = 0; i < 8; i++) { @@ -1058,7 +1047,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device) struct dasd_uid *uid; char print_path_uid[60], print_device_uid[60]; - private = (struct dasd_eckd_private *) device->private; + private = device->private; path_data = &device->path_data; opm = ccw_device_get_path_mask(device->cdev); conf_data_saved = 0; @@ -1191,11 +1180,10 @@ static int dasd_eckd_read_conf(struct dasd_device *device) static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int mdc; u32 fcx_max_data; - private = (struct dasd_eckd_private *) device->private; if (private->fcx_max_data) { mdc = ccw_device_get_mdc(device->cdev, lpm); if ((mdc < 0)) { @@ -1221,15 +1209,10 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) static int rebuild_device_uid(struct dasd_device *device, struct path_verification_work_data *data) { - struct dasd_eckd_private *private; - struct dasd_path *path_data; - __u8 lpm, opm; - int rc; - - rc = -ENODEV; - private = (struct dasd_eckd_private *) device->private; - path_data = &device->path_data; - opm = device->path_data.opm; + struct dasd_eckd_private *private = device->private; + struct dasd_path *path_data = &device->path_data; + __u8 lpm, opm = path_data->opm; + int rc = -ENODEV; for (lpm = 0x80; lpm; lpm >>= 1) { if (!(lpm & opm)) @@ -1463,14 +1446,13 @@ static int dasd_eckd_verify_path(struct dasd_device *device, __u8 lpm) static int dasd_eckd_read_features(struct dasd_device *device) { + struct dasd_eckd_private *private = device->private; struct dasd_psf_prssd_data *prssdp; struct dasd_rssd_features *features; struct dasd_ccw_req *cqr; struct ccw1 *ccw; int rc; - struct dasd_eckd_private *private; - private = (struct dasd_eckd_private *) device->private; memset(&private->features, 0, sizeof(struct dasd_rssd_features)); cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + @@ -1605,11 +1587,9 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav, static int dasd_eckd_validate_server(struct dasd_device *device, unsigned long flags) { - int rc; - struct dasd_eckd_private *private; - int enable_pav; + struct dasd_eckd_private *private = device->private; + int enable_pav, rc; - private = (struct dasd_eckd_private *) device->private; if (private->uid.type == UA_BASE_PAV_ALIAS || private->uid.type == UA_HYPER_PAV_ALIAS) return 0; @@ -1662,14 +1642,13 @@ static void dasd_eckd_kick_validate_server(struct dasd_device *device) static u32 get_fcx_max_data(struct dasd_device *device) { - int tpm, mdc; + struct dasd_eckd_private *private = device->private; int fcx_in_css, fcx_in_gneq, fcx_in_features; - struct dasd_eckd_private *private; + int tpm, mdc; if (dasd_nofcx) return 0; /* is transport mode supported? */ - private = (struct dasd_eckd_private *) device->private; fcx_in_css = css_general_characteristics.fcx; fcx_in_gneq = private->gneq->reserved2[7] & 0x04; fcx_in_features = private->features.feature[40] & 0x80; @@ -1694,7 +1673,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) static int dasd_eckd_check_characteristics(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_block *block; struct dasd_uid temp_uid; int rc, i; @@ -1713,7 +1692,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) dev_info(&device->cdev->dev, "The DASD is not operating in multipath mode\n"); } - private = (struct dasd_eckd_private *) device->private; if (!private) { private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) { @@ -1722,7 +1700,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) "failed\n"); return -ENOMEM; } - device->private = (void *) private; + device->private = private; } else { memset(private, 0, sizeof(*private)); } @@ -1837,10 +1815,9 @@ out_err1: static void dasd_eckd_uncheck_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int i; - private = (struct dasd_eckd_private *) device->private; dasd_alias_disconnect_device_from_lcu(device); private->ned = NULL; private->sneq = NULL; @@ -1863,7 +1840,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) static struct dasd_ccw_req * dasd_eckd_analysis_ccw(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct eckd_count *count_data; struct LO_eckd_data *LO_data; struct dasd_ccw_req *cqr; @@ -1871,8 +1848,6 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) int cplength, datasize; int i; - private = (struct dasd_eckd_private *) device->private; - cplength = 8; datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data); cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device); @@ -1946,11 +1921,9 @@ static int dasd_eckd_analysis_evaluation(struct dasd_ccw_req *init_cqr) static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, void *data) { - struct dasd_eckd_private *private; - struct dasd_device *device; + struct dasd_device *device = init_cqr->startdev; + struct dasd_eckd_private *private = device->private; - device = init_cqr->startdev; - private = (struct dasd_eckd_private *) device->private; private->init_cqr_status = dasd_eckd_analysis_evaluation(init_cqr); dasd_sfree_request(init_cqr, device); dasd_kick_device(device); @@ -1977,15 +1950,13 @@ static int dasd_eckd_start_analysis(struct dasd_block *block) static int dasd_eckd_end_analysis(struct dasd_block *block) { - struct dasd_device *device; - struct dasd_eckd_private *private; + struct dasd_device *device = block->base; + struct dasd_eckd_private *private = device->private; struct eckd_count *count_area; unsigned int sb, blk_per_trk; int status, i; struct dasd_ccw_req *init_cqr; - device = block->base; - private = (struct dasd_eckd_private *) device->private; status = private->init_cqr_status; private->init_cqr_status = -1; if (status == INIT_CQR_ERROR) { @@ -2083,9 +2054,8 @@ raw: static int dasd_eckd_do_analysis(struct dasd_block *block) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = block->base->private; - private = (struct dasd_eckd_private *) block->base->private; if (private->init_cqr_status < 0) return dasd_eckd_start_analysis(block); else @@ -2112,9 +2082,8 @@ static int dasd_eckd_basic_to_known(struct dasd_device *device) static int dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = block->base->private; - private = (struct dasd_eckd_private *) block->base->private; if (dasd_check_blocksize(block->bp_block) == 0) { geo->sectors = recs_per_track(&private->rdc_data, 0, block->bp_block); @@ -2151,8 +2120,8 @@ dasd_eckd_build_format(struct dasd_device *base, if (!startdev) startdev = base; - start_priv = (struct dasd_eckd_private *) startdev->private; - base_priv = (struct dasd_eckd_private *) base->private; + start_priv = startdev->private; + base_priv = base->private; rpt = recs_per_track(&base_priv->rdc_data, 0, fdata->blksize); @@ -2349,14 +2318,14 @@ dasd_eckd_build_format(struct dasd_device *base, * when formatting CDL */ if ((intensity & 0x08) && - fdata->start_unit == 0) { + address.cyl == 0 && address.head == 0) { if (i < 3) { ect->kl = 4; ect->dl = sizes_trk0[i] - 4; } } if ((intensity & 0x08) && - fdata->start_unit == 1) { + address.cyl == 0 && address.head == 1) { ect->kl = 44; ect->dl = LABEL_SIZE - 44; } @@ -2386,23 +2355,24 @@ dasd_eckd_build_format(struct dasd_device *base, return fcp; } -static int -dasd_eckd_format_device(struct dasd_device *base, - struct format_data_t *fdata, - int enable_pav) +/* + * Wrapper function to build a CCW request depending on input data + */ +static struct dasd_ccw_req * +dasd_eckd_format_build_ccw_req(struct dasd_device *base, + struct format_data_t *fdata, int enable_pav) { - struct dasd_ccw_req *cqr, *n; - struct dasd_block *block; - struct dasd_eckd_private *private; - struct list_head format_queue; - struct dasd_device *device; - int old_stop, format_step; - int step, rc = 0, sleep_rc; + return dasd_eckd_build_format(base, fdata, enable_pav); +} - block = base->block; - private = (struct dasd_eckd_private *) base->private; +/* + * Sanity checks on format_data + */ +static int dasd_eckd_format_sanity_checks(struct dasd_device *base, + struct format_data_t *fdata) +{ + struct dasd_eckd_private *private = base->private; - /* Sanity checks. */ if (fdata->start_unit >= (private->real_cyl * private->rdc_data.trk_per_cyl)) { dev_warn(&base->cdev->dev, @@ -2429,75 +2399,98 @@ dasd_eckd_format_device(struct dasd_device *base, fdata->blksize); return -EINVAL; } + return 0; +} + +/* + * This function will process format_data originally coming from an IOCTL + */ +static int dasd_eckd_format_process_data(struct dasd_device *base, + struct format_data_t *fdata, + int enable_pav) +{ + struct dasd_eckd_private *private = base->private; + struct dasd_ccw_req *cqr, *n; + struct list_head format_queue; + struct dasd_device *device; + int old_start, old_stop, format_step; + int step, retry; + int rc; + + rc = dasd_eckd_format_sanity_checks(base, fdata); + if (rc) + return rc; INIT_LIST_HEAD(&format_queue); + old_start = fdata->start_unit; old_stop = fdata->stop_unit; - while (fdata->start_unit <= 1) { - fdata->stop_unit = fdata->start_unit; - cqr = dasd_eckd_build_format(base, fdata, enable_pav); - list_add(&cqr->blocklist, &format_queue); - - fdata->stop_unit = old_stop; - fdata->start_unit++; - if (fdata->start_unit > fdata->stop_unit) - goto sleep; - } + format_step = DASD_CQR_MAX_CCW / recs_per_track(&private->rdc_data, 0, + fdata->blksize); + do { + retry = 0; + while (fdata->start_unit <= old_stop) { + step = fdata->stop_unit - fdata->start_unit + 1; + if (step > format_step) { + fdata->stop_unit = + fdata->start_unit + format_step - 1; + } -retry: - format_step = 255 / recs_per_track(&private->rdc_data, 0, - fdata->blksize); - while (fdata->start_unit <= old_stop) { - step = fdata->stop_unit - fdata->start_unit + 1; - if (step > format_step) - fdata->stop_unit = fdata->start_unit + format_step - 1; + cqr = dasd_eckd_format_build_ccw_req(base, fdata, + enable_pav); + if (IS_ERR(cqr)) { + rc = PTR_ERR(cqr); + if (rc == -ENOMEM) { + if (list_empty(&format_queue)) + goto out; + /* + * not enough memory available, start + * requests retry after first requests + * were finished + */ + retry = 1; + break; + } + goto out_err; + } + list_add_tail(&cqr->blocklist, &format_queue); - cqr = dasd_eckd_build_format(base, fdata, enable_pav); - if (IS_ERR(cqr)) { - if (PTR_ERR(cqr) == -ENOMEM) { - /* - * not enough memory available - * go to out and start requests - * retry after first requests were finished - */ - fdata->stop_unit = old_stop; - goto sleep; - } else - return PTR_ERR(cqr); + fdata->start_unit = fdata->stop_unit + 1; + fdata->stop_unit = old_stop; } - list_add(&cqr->blocklist, &format_queue); - fdata->start_unit = fdata->stop_unit + 1; - fdata->stop_unit = old_stop; - } + rc = dasd_sleep_on_queue(&format_queue); -sleep: - sleep_rc = dasd_sleep_on_queue(&format_queue); +out_err: + list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { + device = cqr->startdev; + private = device->private; + if (cqr->status == DASD_CQR_FAILED) + rc = -EIO; + list_del_init(&cqr->blocklist); + dasd_sfree_request(cqr, device); + private->count--; + } - list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { - device = cqr->startdev; - private = (struct dasd_eckd_private *) device->private; - if (cqr->status == DASD_CQR_FAILED) - rc = -EIO; - list_del_init(&cqr->blocklist); - dasd_sfree_request(cqr, device); - private->count--; - } + if (rc) + goto out; - if (sleep_rc) - return sleep_rc; + } while (retry); - /* - * in case of ENOMEM we need to retry after - * first requests are finished - */ - if (fdata->start_unit <= fdata->stop_unit) - goto retry; +out: + fdata->start_unit = old_start; + fdata->stop_unit = old_stop; return rc; } +static int dasd_eckd_format_device(struct dasd_device *base, + struct format_data_t *fdata, int enable_pav) +{ + return dasd_eckd_format_process_data(base, fdata, enable_pav); +} + static void dasd_eckd_handle_terminated_request(struct dasd_ccw_req *cqr) { if (cqr->retries < 0) { @@ -2543,9 +2536,8 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, { char mask; char *sense = NULL; - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; - private = (struct dasd_eckd_private *) device->private; /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((scsw_dstat(&irb->scsw) & mask) == mask) { @@ -2634,7 +2626,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_device *basedev; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; + private = basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_MT; else if (rq_data_dir(req) == WRITE) @@ -2990,8 +2982,8 @@ static int prepare_itcw(struct itcw *itcw, /* setup prefix data */ - basepriv = (struct dasd_eckd_private *) basedev->private; - startpriv = (struct dasd_eckd_private *) startdev->private; + basepriv = basedev->private; + startpriv = startdev->private; dedata = &pfxdata.define_extent; lredata = &pfxdata.locate_record; @@ -3278,7 +3270,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, struct dasd_ccw_req *cqr; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; + private = basedev->private; /* Calculate number of blocks/records per track. */ blksize = block->bp_block; @@ -3503,7 +3495,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (!dasd_page_cache) goto out; - private = (struct dasd_eckd_private *) cqr->block->base->private; + private = cqr->block->base->private; blksize = cqr->block->bp_block; blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize); recid = blk_rq_pos(req) >> cqr->block->s2b_shift; @@ -3587,7 +3579,7 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base, startdev = dasd_alias_get_start_dev(base); if (!startdev) startdev = base; - private = (struct dasd_eckd_private *) startdev->private; + private = startdev->private; if (private->count >= DASD_ECKD_CHANQ_MAX_SIZE) return ERR_PTR(-EBUSY); @@ -3610,7 +3602,7 @@ static int dasd_eckd_free_alias_cp(struct dasd_ccw_req *cqr, unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(cqr->memdev->cdev), flags); - private = (struct dasd_eckd_private *) cqr->memdev->private; + private = cqr->memdev->private; private->count--; spin_unlock_irqrestore(get_ccwdev_lock(cqr->memdev->cdev), flags); return dasd_eckd_free_cp(cqr, req); @@ -3620,15 +3612,14 @@ static int dasd_eckd_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; - private = (struct dasd_eckd_private *) device->private; info->label_block = 2; info->FBA_layout = private->uses_cdl ? 0 : 1; info->format = private->uses_cdl ? DASD_FORMAT_CDL : DASD_FORMAT_LDL; - info->characteristics_size = sizeof(struct dasd_eckd_characteristics); + info->characteristics_size = sizeof(private->rdc_data); memcpy(info->characteristics, &private->rdc_data, - sizeof(struct dasd_eckd_characteristics)); + sizeof(private->rdc_data)); info->confdata_size = min((unsigned long)private->conf_len, sizeof(info->configuration_data)); memcpy(info->configuration_data, private->conf_data, @@ -3941,8 +3932,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) static int dasd_eckd_get_attrib(struct dasd_device *device, void __user *argp) { - struct dasd_eckd_private *private = - (struct dasd_eckd_private *)device->private; + struct dasd_eckd_private *private = device->private; struct attrib_data_t attrib = private->attrib; int rc; @@ -3966,8 +3956,7 @@ dasd_eckd_get_attrib(struct dasd_device *device, void __user *argp) static int dasd_eckd_set_attrib(struct dasd_device *device, void __user *argp) { - struct dasd_eckd_private *private = - (struct dasd_eckd_private *)device->private; + struct dasd_eckd_private *private = device->private; struct attrib_data_t attrib; if (!capable(CAP_SYS_ADMIN)) @@ -4430,15 +4419,13 @@ static int dasd_eckd_pm_freeze(struct dasd_device *device) static int dasd_eckd_restore_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_eckd_characteristics temp_rdc_data; int rc; struct dasd_uid temp_uid; unsigned long flags; unsigned long cqr_flags = 0; - private = (struct dasd_eckd_private *) device->private; - /* Read Configuration Data */ rc = dasd_eckd_read_conf(device); if (rc) { @@ -4502,14 +4489,12 @@ out_err: static int dasd_eckd_reload_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc, old_base; char print_uid[60]; struct dasd_uid uid; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); old_base = private->uid.base_unit_addr; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); @@ -4556,12 +4541,10 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, { struct dasd_rssd_messages *message_buf; struct dasd_psf_prssd_data *prssdp; - struct dasd_eckd_private *private; struct dasd_ccw_req *cqr; struct ccw1 *ccw; int rc; - private = (struct dasd_eckd_private *) device->private; cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + sizeof(struct dasd_rssd_messages)), @@ -4686,11 +4669,10 @@ static struct dasd_conf_data *dasd_eckd_get_ref_conf(struct dasd_device *device, __u8 lpum, struct dasd_cuir_message *cuir) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_conf_data *conf_data; int path, pos; - private = (struct dasd_eckd_private *) device->private; if (cuir->record_selector == 0) goto out; for (path = 0x80, pos = 0; path; path >>= 1, pos++) { @@ -4715,9 +4697,9 @@ out: static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct dasd_conf_data *ref_conf_data; unsigned long bitmask = 0, mask = 0; - struct dasd_eckd_private *private; struct dasd_conf_data *conf_data; unsigned int pos, path; char *ref_gneq, *gneq; @@ -4730,7 +4712,6 @@ static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum, !(cuir->neq_map[0] | cuir->neq_map[1] | cuir->neq_map[2])) return lpum; - private = (struct dasd_eckd_private *) device->private; /* get reference conf data */ ref_conf_data = dasd_eckd_get_ref_conf(device, lpum, cuir); /* reference ned is determined by ned_map field */ @@ -4829,14 +4810,13 @@ static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum, struct subchannel_id sch_id, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct alias_pav_group *pavgroup, *tempgroup; - struct dasd_eckd_private *private; struct dasd_device *dev, *n; unsigned long paths = 0; unsigned long flags; int tbcpm; - private = (struct dasd_eckd_private *) device->private; /* active devices */ list_for_each_entry_safe(dev, n, &private->lcu->active_devices, alias_list) { @@ -4892,13 +4872,12 @@ static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum, struct subchannel_id sch_id, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct alias_pav_group *pavgroup, *tempgroup; - struct dasd_eckd_private *private; struct dasd_device *dev, *n; unsigned long paths = 0; int tbcpm; - private = (struct dasd_eckd_private *) device->private; /* * the path may have been added through a generic path event before * only trigger path verification if the path is not already in use diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index c9262e7..d7b5b55 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -125,13 +125,11 @@ locate_record(struct ccw1 * ccw, struct LO_fba_data *data, int rw, static int dasd_fba_check_characteristics(struct dasd_device *device) { - struct dasd_block *block; - struct dasd_fba_private *private; + struct dasd_fba_private *private = device->private; struct ccw_device *cdev = device->cdev; - int rc; - int readonly; + struct dasd_block *block; + int readonly, rc; - private = (struct dasd_fba_private *) device->private; if (!private) { private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) { @@ -140,7 +138,7 @@ dasd_fba_check_characteristics(struct dasd_device *device) "data failed\n"); return -ENOMEM; } - device->private = (void *) private; + device->private = private; } else { memset(private, 0, sizeof(*private)); } @@ -192,10 +190,9 @@ dasd_fba_check_characteristics(struct dasd_device *device) static int dasd_fba_do_analysis(struct dasd_block *block) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = block->base->private; int sb, rc; - private = (struct dasd_fba_private *) block->base->private; rc = dasd_check_blocksize(private->rdc_data.blk_size); if (rc) { DBF_DEV_EVENT(DBF_WARNING, block->base, "unknown blocksize %d", @@ -254,7 +251,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, struct dasd_block *block, struct request *req) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = block->base->private; unsigned long *idaws; struct LO_fba_data *LO_data; struct dasd_ccw_req *cqr; @@ -267,7 +264,6 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, unsigned int blksize, off; unsigned char cmd; - private = (struct dasd_fba_private *) block->base->private; if (rq_data_dir(req) == READ) { cmd = DASD_FBA_CCW_READ; } else if (rq_data_dir(req) == WRITE) { @@ -379,7 +375,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, static int dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = cqr->block->base->private; struct ccw1 *ccw; struct req_iterator iter; struct bio_vec bv; @@ -389,7 +385,6 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (!dasd_page_cache) goto out; - private = (struct dasd_fba_private *) cqr->block->base->private; blksize = cqr->block->bp_block; ccw = cqr->cpaddr; /* Skip over define extent & locate record. */ @@ -436,13 +431,14 @@ static int dasd_fba_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { + struct dasd_fba_private *private = device->private; + info->label_block = 1; info->FBA_layout = 1; info->format = DASD_FORMAT_LDL; - info->characteristics_size = sizeof(struct dasd_fba_characteristics); - memcpy(info->characteristics, - &((struct dasd_fba_private *) device->private)->rdc_data, - sizeof (struct dasd_fba_characteristics)); + info->characteristics_size = sizeof(private->rdc_data); + memcpy(info->characteristics, &private->rdc_data, + sizeof(private->rdc_data)); info->confdata_size = 0; return 0; } diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index ef1d9fb..31d544a 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -178,8 +178,8 @@ int dasd_gendisk_init(void) /* Register to static dasd major 94 */ rc = register_blkdev(DASD_MAJOR, "dasd"); if (rc != 0) { - pr_warning("Registering the device driver with major number " - "%d failed\n", DASD_MAJOR); + pr_warn("Registering the device driver with major number %d failed\n", + DASD_MAJOR); return rc; } return 0; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 4aed5ed..8de29be 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -241,6 +241,13 @@ struct dasd_ccw_req { typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); /* + * A single CQR can only contain a maximum of 255 CCWs. It is limited by + * the locate record and locate record extended count value which can only hold + * 1 Byte max. + */ +#define DASD_CQR_MAX_CCW 255 + +/* * Unique identifier for dasd device. */ #define UA_NOT_CONFIGURED 0x00 @@ -438,7 +445,7 @@ struct dasd_device { /* Device discipline stuff. */ struct dasd_discipline *discipline; struct dasd_discipline *base_discipline; - char *private; + void *private; struct dasd_path path_data; /* Device state and target state. */ diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 02837d0..90f30cc 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -203,9 +203,7 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) { struct dasd_device *base; - int enable_pav = 1; - int rc, retries; - int start, stop; + int rc; base = block->base; if (base->discipline->format_device == NULL) @@ -233,30 +231,11 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) bdput(bdev); } - retries = 255; - /* backup start- and endtrack for retries */ - start = fdata->start_unit; - stop = fdata->stop_unit; - do { - rc = base->discipline->format_device(base, fdata, enable_pav); - if (rc) { - if (rc == -EAGAIN) { - retries--; - /* disable PAV in case of errors */ - enable_pav = 0; - fdata->start_unit = start; - fdata->stop_unit = stop; - } else - return rc; - } else - /* success */ - break; - } while (retries); - - if (!retries) - return -EIO; - else - return 0; + rc = base->discipline->format_device(base, fdata, 1); + if (rc == -EAGAIN) + rc = base->discipline->format_device(base, fdata, 0); + + return rc; } /* @@ -286,9 +265,8 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp) return -EFAULT; } if (bdev != bdev->bd_contains) { - pr_warning("%s: The specified DASD is a partition and cannot " - "be formatted\n", - dev_name(&base->cdev->dev)); + pr_warn("%s: The specified DASD is a partition and cannot be formatted\n", + dev_name(&base->cdev->dev)); dasd_put_device(base); return -EINVAL; } diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index aa7bb2d..bad7a19 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -322,13 +322,12 @@ static ssize_t dasd_stats_proc_write(struct file *file, return user_len; out_parse_error: rc = -EINVAL; - pr_warning("%s is not a supported value for /proc/dasd/statistics\n", - str); + pr_warn("%s is not a supported value for /proc/dasd/statistics\n", str); out_error: vfree(buffer); return rc; #else - pr_warning("/proc/dasd/statistics: is not activated in this kernel\n"); + pr_warn("/proc/dasd/statistics: is not activated in this kernel\n"); return user_len; #endif /* CONFIG_DASD_PROFILE */ } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index ce7b701..1bce9cf 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -738,15 +738,15 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch dev_info = dcssblk_get_device_by_name(local_buf); if (dev_info == NULL) { up_write(&dcssblk_devices_sem); - pr_warning("Device %s cannot be removed because it is not a " - "known device\n", local_buf); + pr_warn("Device %s cannot be removed because it is not a known device\n", + local_buf); rc = -ENODEV; goto out_buf; } if (atomic_read(&dev_info->use_count) != 0) { up_write(&dcssblk_devices_sem); - pr_warning("Device %s cannot be removed while it is in " - "use\n", local_buf); + pr_warn("Device %s cannot be removed while it is in use\n", + local_buf); rc = -EBUSY; goto out_buf; } @@ -850,9 +850,8 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) case SEG_TYPE_SC: /* cannot write to these segments */ if (bio_data_dir(bio) == WRITE) { - pr_warning("Writing to %s failed because it " - "is a read-only device\n", - dev_name(&dev_info->dev)); + pr_warn("Writing to %s failed because it is a read-only device\n", + dev_name(&dev_info->dev)); goto fail; } } diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index fc94bfd..ebdeaa5 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -257,7 +257,7 @@ static void mon_iucv_message_pending(struct iucv_path *path, memcpy(&monpriv->msg_array[monpriv->write_index]->msg, msg, sizeof(*msg)); if (atomic_inc_return(&monpriv->msglim_count) == MON_MSGLIM) { - pr_warning("The read queue for monitor data is full\n"); + pr_warn("The read queue for monitor data is full\n"); monpriv->msg_array[monpriv->write_index]->msglim_reached = 1; } monpriv->write_index = (monpriv->write_index + 1) % MON_MSGLIM; @@ -342,8 +342,8 @@ static int mon_close(struct inode *inode, struct file *filp) if (monpriv->path) { rc = iucv_path_sever(monpriv->path, user_data_sever); if (rc) - pr_warning("Disconnecting the z/VM *MONITOR system " - "service failed with rc=%i\n", rc); + pr_warn("Disconnecting the z/VM *MONITOR system service failed with rc=%i\n", + rc); iucv_path_free(monpriv->path); } @@ -469,8 +469,8 @@ static int monreader_freeze(struct device *dev) if (monpriv->path) { rc = iucv_path_sever(monpriv->path, user_data_sever); if (rc) - pr_warning("Disconnecting the z/VM *MONITOR system " - "service failed with rc=%i\n", rc); + pr_warn("Disconnecting the z/VM *MONITOR system service failed with rc=%i\n", + rc); iucv_path_free(monpriv->path); } atomic_set(&monpriv->iucv_severed, 0); diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 806239c..d3947ea 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -67,8 +67,8 @@ int sclp_sync_request_timeout(sclp_cmdw_t cmd, void *sccb, int timeout) /* Check response. */ if (request->status != SCLP_REQ_DONE) { - pr_warning("sync request failed (cmd=0x%08x, " - "status=0x%02x)\n", cmd, request->status); + pr_warn("sync request failed (cmd=0x%08x, status=0x%02x)\n", + cmd, request->status); rc = -EIO; } out: @@ -122,8 +122,8 @@ int sclp_get_core_info(struct sclp_core_info *info) if (rc) goto out; if (sccb->header.response_code != 0x0010) { - pr_warning("readcpuinfo failed (response=0x%04x)\n", - sccb->header.response_code); + pr_warn("readcpuinfo failed (response=0x%04x)\n", + sccb->header.response_code); rc = -EIO; goto out; } @@ -160,9 +160,8 @@ static int do_core_configure(sclp_cmdw_t cmd) case 0x0120: break; default: - pr_warning("configure cpu failed (cmd=0x%08x, " - "response=0x%04x)\n", cmd, - sccb->header.response_code); + pr_warn("configure cpu failed (cmd=0x%08x, response=0x%04x)\n", + cmd, sccb->header.response_code); rc = -EIO; break; } @@ -230,9 +229,8 @@ static int do_assign_storage(sclp_cmdw_t cmd, u16 rn) case 0x0120: break; default: - pr_warning("assign storage failed (cmd=0x%08x, " - "response=0x%04x, rn=0x%04x)\n", cmd, - sccb->header.response_code, rn); + pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n", + cmd, sccb->header.response_code, rn); rc = -EIO; break; } @@ -675,9 +673,8 @@ static int do_chp_configure(sclp_cmdw_t cmd) case 0x0450: break; default: - pr_warning("configure channel-path failed " - "(cmd=0x%08x, response=0x%04x)\n", cmd, - sccb->header.response_code); + pr_warn("configure channel-path failed (cmd=0x%08x, response=0x%04x)\n", + cmd, sccb->header.response_code); rc = -EIO; break; } @@ -744,8 +741,8 @@ int sclp_chp_read_info(struct sclp_chp_info *info) if (rc) goto out; if (sccb->header.response_code != 0x0010) { - pr_warning("read channel-path info failed " - "(response=0x%04x)\n", sccb->header.response_code); + pr_warn("read channel-path info failed (response=0x%04x)\n", + sccb->header.response_code); rc = -EIO; goto out; } diff --git a/drivers/s390/char/sclp_cpi_sys.c b/drivers/s390/char/sclp_cpi_sys.c index 2acea80..f344e5b 100644 --- a/drivers/s390/char/sclp_cpi_sys.c +++ b/drivers/s390/char/sclp_cpi_sys.c @@ -154,16 +154,14 @@ static int cpi_req(void) wait_for_completion(&completion); if (req->status != SCLP_REQ_DONE) { - pr_warning("request failed (status=0x%02x)\n", - req->status); + pr_warn("request failed (status=0x%02x)\n", req->status); rc = -EIO; goto out_free_req; } response = ((struct cpi_sccb *) req->sccb)->header.response_code; if (response != 0x0020) { - pr_warning("request failed with response code 0x%x\n", - response); + pr_warn("request failed with response code 0x%x\n", response); rc = -EIO; } diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index f3b5123..3c379da 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -699,8 +699,8 @@ tape_generic_remove(struct ccw_device *cdev) */ DBF_EVENT(3, "(%08x): Drive in use vanished!\n", device->cdev_id); - pr_warning("%s: A tape unit was detached while in " - "use\n", dev_name(&device->cdev->dev)); + pr_warn("%s: A tape unit was detached while in use\n", + dev_name(&device->cdev->dev)); tape_state_set(device, TS_NOT_OPER); __tape_discard_requests(device); spin_unlock_irq(get_ccwdev_lock(device->cdev)); diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 799c152..e883063 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -343,8 +343,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) if (logptr->autorecording) { ret = vmlogrdr_recording(logptr,1,logptr->autopurge); if (ret) - pr_warning("vmlogrdr: failed to start " - "recording automatically\n"); + pr_warn("vmlogrdr: failed to start recording automatically\n"); } /* create connection to the system service */ @@ -396,8 +395,7 @@ static int vmlogrdr_release (struct inode *inode, struct file *filp) if (logptr->autorecording) { ret = vmlogrdr_recording(logptr,0,logptr->autopurge); if (ret) - pr_warning("vmlogrdr: failed to stop " - "recording automatically\n"); + pr_warn("vmlogrdr: failed to stop recording automatically\n"); } logptr->dev_in_use = 0; diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 20314aa..9082476 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -51,9 +51,8 @@ static int blacklist_range(range_action action, unsigned int from_ssid, { if ((from_ssid > to_ssid) || ((from_ssid == to_ssid) && (from > to))) { if (msgtrigger) - pr_warning("0.%x.%04x to 0.%x.%04x is not a valid " - "range for cio_ignore\n", from_ssid, from, - to_ssid, to); + pr_warn("0.%x.%04x to 0.%x.%04x is not a valid range for cio_ignore\n", + from_ssid, from, to_ssid, to); return 1; } @@ -140,8 +139,8 @@ static int parse_busid(char *str, unsigned int *cssid, unsigned int *ssid, rc = 0; out: if (rc && msgtrigger) - pr_warning("%s is not a valid device for the cio_ignore " - "kernel parameter\n", str); + pr_warn("%s is not a valid device for the cio_ignore kernel parameter\n", + str); return rc; } diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index 79f5991..2782100 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -333,13 +333,12 @@ void ccw_request_timeout(struct ccw_device *cdev) for (chp = 0; chp < 8; chp++) { if ((0x80 >> chp) & sch->schib.pmcw.lpum) - pr_warning("%s: No interrupt was received within %lus " - "(CS=%02x, DS=%02x, CHPID=%x.%02x)\n", - dev_name(&cdev->dev), req->timeout / HZ, - scsw_cstat(&sch->schib.scsw), - scsw_dstat(&sch->schib.scsw), - sch->schid.cssid, - sch->schib.pmcw.chpid[chp]); + pr_warn("%s: No interrupt was received within %lus (CS=%02x, DS=%02x, CHPID=%x.%02x)\n", + dev_name(&cdev->dev), req->timeout / HZ, + scsw_cstat(&sch->schib.scsw), + scsw_dstat(&sch->schib.scsw), + sch->schid.cssid, + sch->schib.pmcw.chpid[chp]); } if (!ccwreq_next_path(cdev)) { diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 39a8ae5..de6fccc 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -656,7 +656,7 @@ struct subchannel *cio_probe_console(void) sch_no = cio_get_console_sch_no(); if (sch_no == -1) { - pr_warning("No CCW console was found\n"); + pr_warn("No CCW console was found\n"); return ERR_PTR(-ENODEV); } init_subchannel_id(&schid); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 6aae684..7ada078 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -364,11 +364,11 @@ int ccw_device_set_offline(struct ccw_device *cdev) cdev->private->state == DEV_STATE_DISCONNECTED)); /* Inform the user if set offline failed. */ if (cdev->private->state == DEV_STATE_BOXED) { - pr_warning("%s: The device entered boxed state while " - "being set offline\n", dev_name(&cdev->dev)); + pr_warn("%s: The device entered boxed state while being set offline\n", + dev_name(&cdev->dev)); } else if (cdev->private->state == DEV_STATE_NOT_OPER) { - pr_warning("%s: The device stopped operating while " - "being set offline\n", dev_name(&cdev->dev)); + pr_warn("%s: The device stopped operating while being set offline\n", + dev_name(&cdev->dev)); } /* Give up reference from ccw_device_set_online(). */ put_device(&cdev->dev); @@ -429,13 +429,11 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_unlock_irq(cdev->ccwlock); /* Inform the user that set online failed. */ if (cdev->private->state == DEV_STATE_BOXED) { - pr_warning("%s: Setting the device online failed " - "because it is boxed\n", - dev_name(&cdev->dev)); + pr_warn("%s: Setting the device online failed because it is boxed\n", + dev_name(&cdev->dev)); } else if (cdev->private->state == DEV_STATE_NOT_OPER) { - pr_warning("%s: Setting the device online failed " - "because it is not operational\n", - dev_name(&cdev->dev)); + pr_warn("%s: Setting the device online failed because it is not operational\n", + dev_name(&cdev->dev)); } /* Give up online reference since onlining failed. */ put_device(&cdev->dev); @@ -619,9 +617,8 @@ initiate_logging(struct device *dev, struct device_attribute *attr, rc = chsc_siosl(sch->schid); if (rc < 0) { - pr_warning("Logging for subchannel 0.%x.%04x failed with " - "errno=%d\n", - sch->schid.ssid, sch->schid.sch_no, rc); + pr_warn("Logging for subchannel 0.%x.%04x failed with errno=%d\n", + sch->schid.ssid, sch->schid.sch_no, rc); return rc; } pr_notice("Logging for subchannel 0.%x.%04x was triggered\n", diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 2f5b518..251db0a 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1761,8 +1761,8 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: - pr_warning("Stoplan for %s initiated by LGW.\n", - card->dev->name); + pr_warn("Stoplan for %s initiated by LGW\n", + card->dev->name); if (card->dev) netif_carrier_off(card->dev); break; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7c8c68c..ac54433 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3624,7 +3624,7 @@ static int qeth_l3_register_notifiers(void) return rc; } #else - pr_warning("There is no IPv6 support for the layer 3 discipline\n"); + pr_warn("There is no IPv6 support for the layer 3 discipline\n"); #endif return 0; } |