diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-06-07 18:47:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-06-07 18:47:53 -0700 |
commit | aec040e29e804b40fa2934ec7a5be9f515f23098 (patch) | |
tree | c9afca2811091ceab7362a34fb22916fc881be10 /arch | |
parent | 8ea656bdc9d3c39e18f2bd5d8ffe0301fde64f72 (diff) | |
parent | 6c61cfe91be53b444abc1da2dbab14efa77706c0 (diff) | |
download | op-kernel-dev-aec040e29e804b40fa2934ec7a5be9f515f23098.zip op-kernel-dev-aec040e29e804b40fa2934ec7a5be9f515f23098.tar.gz |
Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
* 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6:
[S390] fix kvm defines for 31 bit compile
[S390] use generic RCU page-table freeing code
[S390] qdio: Split SBAL entry flags
[S390] kvm-s390: fix stfle facilities numbers >=64
[S390] kvm-s390: Fix host crash on misbehaving guests
Diffstat (limited to 'arch')
-rw-r--r-- | arch/s390/Kconfig | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/pgalloc.h | 8 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 39 | ||||
-rw-r--r-- | arch/s390/include/asm/qdio.h | 119 | ||||
-rw-r--r-- | arch/s390/include/asm/tlb.h | 94 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 1 | ||||
-rw-r--r-- | arch/s390/kvm/sie64a.S | 2 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 292 |
8 files changed, 223 insertions, 333 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9fab2aa..90d77bd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -89,6 +89,7 @@ config S390 select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_RCU_TABLE_FREE if SMP select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f6314af..38e71eb 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -17,15 +17,15 @@ #include <linux/gfp.h> #include <linux/mm.h> -#define check_pgt_cache() do {} while (0) - unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); -void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mm_struct *, unsigned long *); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +void page_table_free_rcu(struct mmu_gather *, unsigned long *); +void __tlb_remove_table(void *_table); +#endif static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e4efacf..801fbe1 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -293,19 +293,6 @@ extern unsigned long VMALLOC_START; * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. */ -/* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL - #ifndef __s390x__ /* Bits in the segment table address-space-control-element */ @@ -325,6 +312,19 @@ extern unsigned long VMALLOC_START; #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf0000000UL +#define RCP_FP_BIT 0x08000000UL +#define RCP_PCL_BIT 0x00800000UL +#define RCP_HR_BIT 0x00400000UL +#define RCP_HC_BIT 0x00200000UL +#define RCP_GR_BIT 0x00040000UL +#define RCP_GC_BIT 0x00020000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x00008000UL +#define KVM_UC_BIT 0x00004000UL + #else /* __s390x__ */ /* Bits in the segment/region table address-space-control-element */ @@ -367,6 +367,19 @@ extern unsigned long VMALLOC_START; #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL + #endif /* __s390x__ */ /* diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 350e7ee..15c9762 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -139,110 +139,47 @@ struct slib { struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(2048))); -/** - * struct sbal_flags - storage block address list flags - * @last: last entry - * @cont: contiguous storage - * @frag: fragmentation - */ -struct sbal_flags { - u8 : 1; - u8 last : 1; - u8 cont : 1; - u8 : 1; - u8 frag : 2; - u8 : 2; -} __attribute__ ((packed)); - -#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL -#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL -#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL -#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL -#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL +#define SBAL_EFLAGS_LAST_ENTRY 0x40 +#define SBAL_EFLAGS_CONTIGUOUS 0x20 +#define SBAL_EFLAGS_FIRST_FRAG 0x04 +#define SBAL_EFLAGS_MIDDLE_FRAG 0x08 +#define SBAL_EFLAGS_LAST_FRAG 0x0c +#define SBAL_EFLAGS_MASK 0x6f -#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL +#define SBAL_SFLAGS0_PCI_REQ 0x40 +#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20 /* Awesome OpenFCP extensions */ -#define SBAL_FLAGS0_TYPE_STATUS 0x00UL -#define SBAL_FLAGS0_TYPE_WRITE 0x08UL -#define SBAL_FLAGS0_TYPE_READ 0x10UL -#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL -#define SBAL_FLAGS0_MORE_SBALS 0x04UL -#define SBAL_FLAGS0_COMMAND 0x02UL -#define SBAL_FLAGS0_LAST_SBAL 0x00UL -#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS -#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_PCI 0x40 - -/** - * struct sbal_sbalf_0 - sbal flags for sbale 0 - * @pci: PCI indicator - * @cont: data continuation - * @sbtype: storage-block type (FCP) - */ -struct sbal_sbalf_0 { - u8 : 1; - u8 pci : 1; - u8 cont : 1; - u8 sbtype : 2; - u8 : 3; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_1 - sbal flags for sbale 1 - * @key: storage key - */ -struct sbal_sbalf_1 { - u8 : 4; - u8 key : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_14 - sbal flags for sbale 14 - * @erridx: error index - */ -struct sbal_sbalf_14 { - u8 : 4; - u8 erridx : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_15 - sbal flags for sbale 15 - * @reason: reason for error state - */ -struct sbal_sbalf_15 { - u8 reason; -} __attribute__ ((packed)); - -/** - * union sbal_sbalf - storage block address list flags - * @i0: sbalf0 - * @i1: sbalf1 - * @i14: sbalf14 - * @i15: sblaf15 - * @value: raw value - */ -union sbal_sbalf { - struct sbal_sbalf_0 i0; - struct sbal_sbalf_1 i1; - struct sbal_sbalf_14 i14; - struct sbal_sbalf_15 i15; - u8 value; -}; +#define SBAL_SFLAGS0_TYPE_STATUS 0x00 +#define SBAL_SFLAGS0_TYPE_WRITE 0x08 +#define SBAL_SFLAGS0_TYPE_READ 0x10 +#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18 +#define SBAL_SFLAGS0_MORE_SBALS 0x04 +#define SBAL_SFLAGS0_COMMAND 0x02 +#define SBAL_SFLAGS0_LAST_SBAL 0x00 +#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND +#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS +#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND) /** * struct qdio_buffer_element - SBAL entry - * @flags: flags + * @eflags: SBAL entry flags + * @scount: SBAL count + * @sflags: whole SBAL flags * @length: length * @addr: address */ struct qdio_buffer_element { - u32 flags; + u8 eflags; + /* private: */ + u8 res1; + /* public: */ + u8 scount; + u8 sflags; u32 length; #ifdef CONFIG_32BIT /* private: */ - void *reserved; + void *res2; /* public: */ #endif void *addr; diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 77eee54..c687a2c 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -26,67 +26,60 @@ #include <linux/swap.h> #include <asm/processor.h> #include <asm/pgalloc.h> -#include <asm/smp.h> #include <asm/tlbflush.h> struct mmu_gather { struct mm_struct *mm; +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + struct mmu_table_batch *batch; +#endif unsigned int fullmm; - unsigned int nr_ptes; - unsigned int nr_pxds; - unsigned int max; - void **array; - void *local[8]; + unsigned int need_flush; }; -static inline void __tlb_alloc_page(struct mmu_gather *tlb) -{ - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +struct mmu_table_batch { + struct rcu_head rcu; + unsigned int nr; + void *tables[0]; +}; - if (addr) { - tlb->array = (void *) addr; - tlb->max = PAGE_SIZE / sizeof(void *); - } -} +#define MAX_TABLE_BATCH \ + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) + +extern void tlb_table_flush(struct mmu_gather *tlb); +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#endif static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) { tlb->mm = mm; - tlb->max = ARRAY_SIZE(tlb->local); - tlb->array = tlb->local; tlb->fullmm = full_mm_flush; + tlb->need_flush = 0; +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif if (tlb->fullmm) __tlb_flush_mm(mm); - else - __tlb_alloc_page(tlb); - tlb->nr_ptes = 0; - tlb->nr_pxds = tlb->max; } static inline void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max)) - __tlb_flush_mm(tlb->mm); - while (tlb->nr_ptes > 0) - page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); - while (tlb->nr_pxds < tlb->max) - crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); + if (!tlb->need_flush) + return; + tlb->need_flush = 0; + __tlb_flush_mm(tlb->mm); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); - - rcu_table_freelist_finish(); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - if (tlb->array != tlb->local) - free_pages((unsigned long) tlb->array, 0); } /* @@ -112,12 +105,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - if (!tlb->fullmm) { - tlb->array[tlb->nr_ptes++] = pte; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - page_table_free(tlb->mm, (unsigned long *) pte); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return page_table_free_rcu(tlb, (unsigned long *) pte); +#endif + page_table_free(tlb->mm, (unsigned long *) pte); } /* @@ -133,12 +125,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 31)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pmd; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - crst_table_free(tlb->mm, (unsigned long *) pmd); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return tlb_remove_table(tlb, pmd); +#endif + crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -155,12 +146,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 42)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pud; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - crst_table_free(tlb->mm, (unsigned long *) pud); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return tlb_remove_table(tlb, pud); +#endif + crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 30ca85c..67345ae 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -731,6 +731,7 @@ static int __init kvm_s390_init(void) } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[1] &= 0x201c000000000000ULL; return 0; } diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S index ab0e041..5faa1b1 100644 --- a/arch/s390/kvm/sie64a.S +++ b/arch/s390/kvm/sie64a.S @@ -93,4 +93,6 @@ sie_err: .section __ex_table,"a" .quad sie_inst,sie_err + .quad sie_exit,sie_err + .quad sie_reenter,sie_err .previous diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b09763f..37a23c22 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,94 +24,12 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -struct rcu_table_freelist { - struct rcu_head rcu; - struct mm_struct *mm; - unsigned int pgt_index; - unsigned int crst_index; - unsigned long *table[0]; -}; - -#define RCU_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ - / sizeof(unsigned long)) - -static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); - -static void __page_table_free(struct mm_struct *mm, unsigned long *table); - -static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) -{ - struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); - struct rcu_table_freelist *batch = *batchp; - - if (batch) - return batch; - batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); - if (batch) { - batch->mm = mm; - batch->pgt_index = 0; - batch->crst_index = RCU_FREELIST_SIZE; - *batchp = batch; - } - return batch; -} - -static void rcu_table_freelist_callback(struct rcu_head *head) -{ - struct rcu_table_freelist *batch = - container_of(head, struct rcu_table_freelist, rcu); - - while (batch->pgt_index > 0) - __page_table_free(batch->mm, batch->table[--batch->pgt_index]); - while (batch->crst_index < RCU_FREELIST_SIZE) - crst_table_free(batch->mm, batch->table[batch->crst_index++]); - free_page((unsigned long) batch); -} - -void rcu_table_freelist_finish(void) -{ - struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist); - struct rcu_table_freelist *batch = *batchp; - - if (!batch) - goto out; - call_rcu(&batch->rcu, rcu_table_freelist_callback); - *batchp = NULL; -out: - put_cpu_var(rcu_table_freelist); -} - -static void smp_sync(void *arg) -{ -} - #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 -#define TABLES_PER_PAGE 4 -#define FRAG_MASK 15UL -#define SECOND_HALVES 10UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 256, 0, PAGE_SIZE/4); - clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 768, 0, PAGE_SIZE/4); -} - +#define FRAG_MASK 0x0f #else #define ALLOC_ORDER 2 -#define TABLES_PER_PAGE 2 -#define FRAG_MASK 3UL -#define SECOND_HALVES 2UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); - memset(table + 256, 0, PAGE_SIZE/2); -} - +#define FRAG_MASK 0x03 #endif unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; @@ -140,29 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) free_pages((unsigned long) table, ALLOC_ORDER); } -void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) -{ - struct rcu_table_freelist *batch; - - preempt_disable(); - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - crst_table_free(mm, table); - goto out; - } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - crst_table_free(mm, table); - goto out; - } - batch->table[--batch->crst_index] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); -out: - preempt_enable(); -} - #ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { @@ -238,124 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) } #endif +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + /* * page table entry allocation/free routines. */ +#ifdef CONFIG_PGSTE +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) +{ + struct page *page; + unsigned long *table; + + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + pgtable_page_ctor(page); + atomic_set(&page->_mapcount, 3); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + return table; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ + struct page *page; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + pgtable_page_ctor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} +#endif + unsigned long *page_table_alloc(struct mm_struct *mm) { struct page *page; unsigned long *table; - unsigned long bits; + unsigned int mask, bit; - bits = (mm->context.has_pgste) ? 3UL : 1UL; +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) + return page_table_alloc_pgste(mm); +#endif + /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); - page = NULL; + mask = FRAG_MASK; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - page = NULL; + table = (unsigned long *) page_to_phys(page); + mask = atomic_read(&page->_mapcount); + mask = mask | (mask >> 4); } - if (!page) { + if ((mask & FRAG_MASK) == FRAG_MASK) { spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; pgtable_page_ctor(page); - page->flags &= ~FRAG_MASK; + atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - if (mm->context.has_pgste) - clear_table_pgstes(table); - else - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); + } else { + for (bit = 1; mask & bit; bit <<= 1) + table += PTRS_PER_PTE; + mask = atomic_xor_bits(&page->_mapcount, bit); + if ((mask & FRAG_MASK) == FRAG_MASK) + list_del(&page->lru); } - table = (unsigned long *) page_to_phys(page); - while (page->flags & bits) { - table += 256; - bits <<= 1; - } - page->flags |= bits; - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - list_move_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); return table; } -static void __page_table_free(struct mm_struct *mm, unsigned long *table) +void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; - unsigned long bits; + unsigned int bit, mask; - bits = ((unsigned long) table) & 15; - table = (unsigned long *)(((unsigned long) table) ^ bits); +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) + return page_table_free_pgste(table); +#endif + /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - page->flags ^= bits; - if (!(page->flags & FRAG_MASK)) { + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit); + if (mask & FRAG_MASK) + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + if (mask == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free(struct mm_struct *mm, unsigned long *table) +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; - unsigned long bits; - bits = (mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); +#ifdef CONFIG_PGSTE + if (bit == FRAG_MASK) + return page_table_free_pgste(table); +#endif + /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock_bh(&mm->context.list_lock); - page->flags ^= bits; - if (page->flags & FRAG_MASK) { - /* Page now has some free pgtable fragments. */ - if (!list_empty(&page->lru)) - list_move(&page->lru, &mm->context.pgtable_list); - page = NULL; - } else - /* All fragments of the 4K page have been freed. */ - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (page) { + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) { - struct rcu_table_freelist *batch; + struct mm_struct *mm; struct page *page; - unsigned long bits; + unsigned int bit, mask; - preempt_disable(); - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - page_table_free(mm, table); - goto out; - } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - page_table_free(mm, table); - goto out; + mm = tlb->mm; +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) { + table = (unsigned long *) (__pa(table) | FRAG_MASK); + tlb_remove_table(tlb, table); + return; } - bits = (mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); +#endif + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); - /* Delayed freeing with rcu prevents reuse of pgtable fragments */ - list_del_init(&page->lru); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); + if (mask & FRAG_MASK) + list_add_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *)(((unsigned long) table) | bits); - batch->table[batch->pgt_index++] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); -out: - preempt_enable(); + table = (unsigned long *) (__pa(table) | (bit << 4)); + tlb_remove_table(tlb, table); } +void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long) _table & PAGE_MASK); + unsigned type = (unsigned long) _table & ~PAGE_MASK; + + if (type) + __page_table_free_rcu(table, type); + else + free_pages((unsigned long) table, ALLOC_ORDER); +} + +#endif + /* * switch on pgstes for its userspace process (for kvm) */ @@ -369,7 +315,7 @@ int s390_enable_sie(void) return -EINVAL; /* Do we have pgstes? if yes, we are done */ - if (tsk->mm->context.has_pgste) + if (mm_has_pgste(tsk->mm)) return 0; /* lets check if we are allowed to replace the mm */ |