diff options
-rw-r--r-- | arch/s390/include/asm/lowcore.h | 9 | ||||
-rw-r--r-- | arch/s390/include/asm/vdso.h | 15 | ||||
-rw-r--r-- | arch/s390/kernel/asm-offsets.c | 5 | ||||
-rw-r--r-- | arch/s390/kernel/entry64.S | 45 | ||||
-rw-r--r-- | arch/s390/kernel/head64.S | 2 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 9 | ||||
-rw-r--r-- | arch/s390/kernel/vdso.c | 123 | ||||
-rw-r--r-- | arch/s390/kernel/vdso64/clock_getres.S | 5 | ||||
-rw-r--r-- | arch/s390/kernel/vdso64/clock_gettime.S | 39 |
10 files changed, 222 insertions, 32 deletions
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index a547817..ffdef5f 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -106,8 +106,10 @@ #define __LC_IPLDEV 0xDB8 #define __LC_CURRENT 0xDD8 #define __LC_INT_CLOCK 0xDE8 +#define __LC_VDSO_PER_CPU 0xE38 #endif /* __s390x__ */ +#define __LC_PASTE 0xE40 #define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ @@ -381,7 +383,12 @@ struct _lowcore /* whether the kernel died with panic() or not */ __u32 panic_magic; /* 0xe00 */ - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ + /* Per cpu primary space access list */ + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ + __u32 vdso_per_cpu_data; /* 0xe3c */ + __u32 paste[16]; /* 0xe40 */ + + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ /* 64 bit extparam used for pfault, diag 250 etc */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index a44f4fe..7bdd7c8 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,9 +12,9 @@ #ifndef __ASSEMBLY__ /* - * Note about this structure: + * Note about the vdso_data and vdso_per_cpu_data structures: * - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the * structure is supposed to be known only to the function in the vdso * itself and may change without notice. */ @@ -28,10 +28,21 @@ struct vdso_data { __u64 wtom_clock_nsec; /* 0x28 */ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; }; extern struct vdso_data *vdso_data; +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e641f60..67a6001 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -48,6 +48,11 @@ int main(void) DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_ECTG_BASE, + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, + offsetof(struct vdso_per_cpu_data, ectg_user_time)); /* constants used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index ae83c19..c6fbde1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ .if !\sync ni \psworg+1,0xfd # clear wait state bit .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user lpswe \psworg # back to caller .endm @@ -980,23 +983,23 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: .quad sysc_done - 4 - .quad sysc_done - 8 + .quad sysc_done - 16 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1006,23 +1009,23 @@ cleanup_io_return: cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: .quad io_done - 4 - .quad io_done - 8 + .quad io_done - 16 /* * Integer constants diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3ccd36b..f9f70aa 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -87,6 +87,8 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b7a1efd..d825f49 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -427,6 +427,8 @@ setup_lowcore(void) /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3979a6f..b3461e8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,6 +47,7 @@ #include <asm/lowcore.h> #include <asm/sclp.h> #include <asm/cpu.h> +#include <asm/vdso.h> #include "entry.h" /* @@ -506,6 +507,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu) goto out; lowcore->extended_save_area_addr = (u32) save_area; } +#else + if (vdso_alloc_per_cpu(cpu, lowcore)) + goto out; #endif lowcore_ptr[cpu] = lowcore; return 0; @@ -528,6 +532,8 @@ static void smp_free_lowcore(int cpu) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) free_page((unsigned long) lowcore->extended_save_area_addr); +#else + vdso_free_per_cpu(cpu, lowcore); #endif free_page(lowcore->panic_stack - PAGE_SIZE); free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); @@ -670,6 +676,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); panic_stack = __get_free_page(GFP_KERNEL); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + BUG_ON(!lowcore || !panic_stack || !async_stack); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) save_area = get_zeroed_page(GFP_KERNEL); @@ -683,6 +690,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; +#else + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10a6cce..25a6a82 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -31,9 +31,6 @@ #include <asm/sections.h> #include <asm/vdso.h> -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; static void *vdso32_kbase = &vdso32_start; @@ -71,6 +68,119 @@ static union { struct vdso_data *vdso_data = &vdso_data_store.data; /* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + unsigned int facility_list; + + facility_list = stfl(); + vd->ectg_available = switch_amode && (facility_list & 1); +} + +#ifdef CONFIG_64BIT +/* + * Setup per cpu vdso data page. + */ +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) +{ +} + +/* + * Allocate/free per cpu vdso data. + */ +#ifdef CONFIG_64BIT +#define SEGMENT_ORDER 2 +#else +#define SEGMENT_ORDER 1 +#endif + +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!switch_amode || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!switch_amode || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __vdso_init_cr5(void *dummy) +{ + unsigned long cr5; + + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} + +static void vdso_init_cr5(void) +{ + if (switch_amode && vdso_enabled) + on_each_cpu(__vdso_init_cr5, NULL, 1); +} +#endif /* CONFIG_64BIT */ + +/* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ @@ -172,6 +282,9 @@ static int __init vdso_init(void) { int i; + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) /* Calculate the size of the 32 bit vDSO */ vdso32_pages = ((&vdso32_end - &vdso32_start @@ -208,6 +321,10 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; +#ifndef CONFIG_SMP + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); +#endif + vdso_init_cr5(); #endif /* CONFIG_64BIT */ get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 488e31a..9ce8caa 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -22,7 +22,12 @@ __kernel_clock_getres: cghi %r2,CLOCK_REALTIME je 0f cghi %r2,CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ larl %r1,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 738a410..79dbfee 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -22,8 +22,10 @@ __kernel_clock_gettime: larl %r5,_vdso_data cghi %r2,CLOCK_REALTIME je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f cghi %r2,CLOCK_MONOTONIC - jne 9f + jne 12f /* CLOCK_MONOTONIC */ ltgr %r3,%r3 @@ -42,7 +44,7 @@ __kernel_clock_gettime: alg %r0,__VDSO_WTOM_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b - larl %r5,10f + larl %r5,13f 1: clg %r1,0(%r5) jl 2f slg %r1,0(%r5) @@ -68,7 +70,7 @@ __kernel_clock_gettime: lg %r0,__VDSO_XTIME_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 5b - larl %r5,10f + larl %r5,13f 6: clg %r1,0(%r5) jl 7f slg %r1,0(%r5) @@ -79,11 +81,38 @@ __kernel_clock_gettime: 8: lghi %r2,0 br %r14 + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + sacf 0 + sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + /* Fallback to system call */ -9: lghi %r1,__NR_clock_gettime +12: lghi %r1,__NR_clock_gettime svc 0 br %r14 -10: .quad 1000000000 +13: .quad 1000000000 +14: .quad 19342813113834067 .cfi_endproc .size __kernel_clock_gettime,.-__kernel_clock_gettime |