From 210b1607012cc9034841a393e0591b2c86d9e26c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 19 Sep 2013 16:26:18 +0200 Subject: KVM: s390: Removed SIE_INTERCEPT_UCONTROL The SIE_INTERCEPT_UCONTROL can be removed by moving the related code from kvm_arch_vcpu_ioctl_run() to vcpu_post_run(). Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 24 ++++++------------------ arch/s390/kvm/kvm-s390.h | 2 -- 2 files changed, 6 insertions(+), 20 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 569494e..7f47835 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -732,14 +732,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (exit_reason >= 0) { rc = 0; - } else { - if (kvm_is_ucontrol(vcpu->kvm)) { - rc = SIE_INTERCEPT_UCONTROL; - } else { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - trace_kvm_s390_sie_fault(vcpu); - rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } + } else if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + vcpu->run->s390_ucontrol.pgm_code = 0x10; + rc = -EREMOTE; } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); @@ -833,16 +831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = -EINTR; } -#ifdef CONFIG_KVM_S390_UCONTROL - if (rc == SIE_INTERCEPT_UCONTROL) { - kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; - kvm_run->s390_ucontrol.trans_exc_code = - current->thread.gmap_addr; - kvm_run->s390_ucontrol.pgm_code = 0x10; - rc = 0; - } -#endif - if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index b44912a..aad541f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -27,8 +27,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ extern unsigned long *vfacilities; -/* negativ values are error codes, positive values for internal conditions */ -#define SIE_INTERCEPT_UCONTROL (1<<0) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ -- cgit v1.1 From ac5b03420150241dc2db3cb4aa4f58c1e7e4640f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 7 Oct 2013 17:50:22 +0200 Subject: KVM: s390: Removed VIRTIODESCSPACE VIRTIODESCSPACE is completely unused nowadays and thus can be removed without any problems. Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index aad541f..fcd25b4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,9 +19,6 @@ #include #include -/* The current code can have up to 256 pages for virtio */ -#define VIRTIODESCSPACE (256ul * 4096ul) - typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ -- cgit v1.1 From f092669e743048f50c714a1af7f8e3478d7b9e1b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 14:15:54 +0200 Subject: KVM: s390: Fix access to CR6 in TPI handler The TPI handler currently uses vcpu->run->s.regs.crs[6] to get the current value of CR6. I think this is wrong, because vcpu->run->s.regs.crs is only updated when kvm_arch_vcpu_ioctl_run() drops back to userspace. So let's change the TPI handler to use vcpu->arch.sie_block->gcr[6] instead. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2440602..b18fe52 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); cc = 0; - inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); if (!inti) goto no_interrupt; cc = 1; -- cgit v1.1 From c95221f69dfa5d3696b2b91374cbd7e5897657c5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 16:49:03 +0200 Subject: KVM: s390: Do not set CC3 for EQBS and SQBS The EQBS and SQBS instructions do not set CC3 for invalid channels, but should throw an operation exception instead when not available. Thus they should not be handled by the handle_io_inst() wrapper but drop to userspace instead (which will then inject the operation exception). Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- arch/s390/kvm/priv.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b18fe52..05537ab 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, - [0x9c] = handle_io_inst, [0xaf] = handle_pfmf, }; @@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) static const intercept_handler_t eb_handlers[256] = { [0x2f] = handle_lctlg, - [0x8a] = handle_io_inst, }; int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) -- cgit v1.1 From e879892c725217a4af1012f31ae56be762473216 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 6 Nov 2013 15:46:33 +0100 Subject: KVM: s390: Always store status during SIGP STOP_AND_STORE_STATUS The SIGP order STOP_AND_STORE_STATUS is defined to stop a CPU and store its status. However, we only stored the status if the CPU was still running, so make sure that the status is now also stored if the CPU was already stopped. This fixes the problem that the CPU information was not stored correctly in kdump files, rendering them unreadable. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Cc: stable@vger.kernel.org Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++---------- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/sigp.c | 15 ++++++++++++++- 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f47835..55eb8de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -873,7 +873,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from, * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit * KVM_S390_STORE_STATUS_PREFIXED: -> prefix */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; @@ -891,15 +891,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) } else prefix = 0; - /* - * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy - * copying in vcpu load/put. Lets update our copies before we save - * it into the save area - */ - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); - save_access_regs(vcpu->run->s.regs.acrs); - if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), vcpu->arch.guest_fpregs.fprs, 128, prefix)) return -EFAULT; @@ -944,6 +935,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return 0; } +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + + return kvm_s390_store_status_unloaded(vcpu, addr); +} + static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, struct kvm_enable_cap *cap) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index fcd25b4..36f6b18 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -145,8 +145,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, - unsigned long addr); +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); void s390_vcpu_block(struct kvm_vcpu *vcpu); void s390_vcpu_unblock(struct kvm_vcpu *vcpu); void exit_sie(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bec398c..6805601 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -130,6 +130,7 @@ unlock: static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) { struct kvm_s390_interrupt_info *inti; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; inti = kzalloc(sizeof(*inti), GFP_ATOMIC); if (!inti) @@ -139,6 +140,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) spin_lock_bh(&li->lock); if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); + if ((action & ACTION_STORE_ON_STOP) != 0) + rc = -ESHUTDOWN; goto out; } list_add_tail(&inti->list, &li->list); @@ -150,7 +153,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) out: spin_unlock_bh(&li->lock); - return SIGP_CC_ORDER_CODE_ACCEPTED; + return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) @@ -174,6 +177,16 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) unlock: spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + + if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + /* If the CPU has already been stopped, we still have + * to save the status when doing stop-and-store. This + * has to be done after unlocking all spinlocks. */ + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + rc = kvm_s390_store_status_unloaded(dst_vcpu, + KVM_S390_STORE_STATUS_NOADDR); + } + return rc; } -- cgit v1.1 From 178bd789775ab29233e0553155253ec8d73af71f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:28:18 +0100 Subject: KVM: s390: Fix clock comparator field for STORE STATUS Only the most 7 significant bytes of the clock comparator must be saved to the status area, and the byte at offset 304 has to be zero. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 55eb8de..1bb1dda 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -877,6 +877,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; + u64 clkcomp; if (addr == KVM_S390_STORE_STATUS_NOADDR) { if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) @@ -920,8 +921,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) &vcpu->arch.sie_block->cputm, 8, prefix)) return -EFAULT; + clkcomp = vcpu->arch.sie_block->ckc >> 8; if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), - &vcpu->arch.sie_block->ckc, 8, prefix)) + &clkcomp, 8, prefix)) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), -- cgit v1.1 From 743db27c526e0f31cc507959d662e97e2048a86f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 11 Nov 2013 13:56:47 +0100 Subject: KVM: s390: fix diagnose code extraction The diagnose code to be used is the contents of the base register (if not zero), plus the displacement. The current code ignores the base register contents. So let's fix that... Reviewed-by: Cornelia Huck Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Cornelia Huck --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 78d967f..5ff29be 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); -- cgit v1.1 From 00e9e435f97b409db8986f9cd35d126ae2d02a0c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:48:51 +0100 Subject: KVM: s390: Add SIGP store-status-at-address order The STORE STATUS AT ADDRESS order of SIGP was still missing. Now it is supported, using the common kvm_s390_store_status() function. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 35 +++++++++++++++++++++++++++++++++++ arch/s390/kvm/trace.h | 1 + 2 files changed, 36 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 6805601..c137ed3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -275,6 +275,37 @@ out_fi: return rc; } +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, + u32 addr, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + int flags; + int rc; + + if (cpu_id < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + spin_lock_bh(&dst_vcpu->arch.local_int.lock); + flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); + spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + if (!(flags & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } + + addr &= 0x7ffffe00; + rc = kvm_s390_store_status_unloaded(dst_vcpu, addr); + if (rc == -EFAULT) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + rc = SIGP_CC_STATUS_STORED; + } + return rc; +} + static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { @@ -379,6 +410,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | ACTION_STOP_ON_STOP); break; + case SIGP_STORE_STATUS_AT_ADDRESS: + rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; rc = __sigp_set_arch(vcpu, parameter); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 0c991c6..3db76b2 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity, {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \ {SIGP_SET_ARCHITECTURE, "set architecture"}, \ {SIGP_SET_PREFIX, "set prefix"}, \ + {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \ {SIGP_SENSE_RUNNING, "sense running"}, \ {SIGP_RESTART, "restart"} -- cgit v1.1 From 36daca9bb36f0395755817d1b0c45ab6fbf0441b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 14 Nov 2013 11:08:20 +0100 Subject: KVM: s390: Removed kvm_s390_inject_sigp_stop() The function kvm_s390_inject_sigp_stop() as been unused since the removal of the old mmu reload code and thus can be removed safely. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.h | 1 - arch/s390/kvm/sigp.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 36f6b18..095cf51 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -128,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm, int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c137ed3..c370058 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -190,12 +190,6 @@ unlock: return rc; } -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - return __inject_sigp_stop(li, action); -} - static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; -- cgit v1.1 From 949c007acd8b6887cf5f3ac86512a7b12fa245dc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 26 Nov 2013 12:27:16 +0100 Subject: KVM: s390: Use helper function to set CC in SIGP handler We've got a helper function for setting the condition code now, so let's use it in the SIGP handler, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c370058..bc0d85a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -435,7 +435,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (rc < 0) return rc; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, rc); return 0; } -- cgit v1.1 From b13d3580ee47ba3b2814e90b8a9b8241f7a4ba83 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 21 Nov 2013 16:01:48 +0100 Subject: KVM: s390: Add the SIGP order CONDITIONAL EMERGENCY SIGNAL This patch adds the missing SIGP order "conditional emergency signal" by calling the "emergency signal" SIGP handler if the required conditions are met. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/include/asm/sigp.h | 1 + arch/s390/kvm/sigp.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 5a87d16..c002cd5 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -12,6 +12,7 @@ #define SIGP_SET_PREFIX 13 #define SIGP_STORE_STATUS_AT_ADDRESS 14 #define SIGP_SET_ARCHITECTURE 18 +#define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 /* SIGP condition codes */ diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bc0d85a..eee1402 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,7 +1,7 @@ /* * handling interprocessor communication * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -89,6 +89,37 @@ unlock: return rc; } +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, + u16 asn, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; + u16 p_asn, s_asn; + psw_t *psw; + u32 flags; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + psw = &dst_vcpu->arch.sie_block->gpsw; + p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ + s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ + + /* Deliver the emergency signal? */ + if (!(flags & CPUSTAT_STOPPED) + || (psw->mask & psw_int_mask) != psw_int_mask + || ((flags & CPUSTAT_WAIT) && psw->addr != 0) + || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + return __sigp_emergency(vcpu, cpu_addr); + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } +} + static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; @@ -417,6 +448,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_COND_EMERGENCY_SIGNAL: + rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; rc = __sigp_sense_running(vcpu, cpu_addr, -- cgit v1.1 From cc92d6dea11cd43842e20cd05c066963de586417 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 27 Nov 2013 11:47:10 +0100 Subject: KVM: s390: Reworked SIGP RESTART order When SIGP RESTART detected an illegal CPU address, there is no need to drop to userspace, we can return CC3 to the guest directly instead. Also renamed __sigp_restart() to sigp_check_callable() (since this is a better description of what the function is really doing) and moved a string specific to RESTART to the calling place instead, so that this function gets usable by other SIGP orders, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index eee1402..509547d 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -363,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, return rc; } -static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) +/* Test whether the destination CPU is available and not busy */ +static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; @@ -382,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) spin_lock_bh(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - else - VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", - cpu_addr); spin_unlock_bh(&li->lock); out: spin_unlock(&fi->lock); @@ -459,10 +457,15 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - rc = __sigp_restart(vcpu, cpu_addr); - if (rc == SIGP_CC_BUSY) - break; - /* user space must know about restart */ + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { + VCPU_EVENT(vcpu, 4, + "sigp restart %x to handle userspace", + cpu_addr); + /* user space must know about restart */ + rc = -EOPNOTSUPP; + } + break; default: return -EOPNOTSUPP; } -- cgit v1.1 From 58bc33b2b700f8524772f3fc20272da2187060c8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 3 Dec 2013 12:54:55 +0100 Subject: KVM: s390: SIGP START has to report BUSY while stopping a CPU Just like the RESTART order, the START order also has to report BUSY while a STOP request is pending, to avoid that the START might be ignored due to a race condition between the STOP and the START order. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/include/asm/sigp.h | 1 + arch/s390/kvm/sigp.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index c002cd5..d091aa1 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -5,6 +5,7 @@ #define SIGP_SENSE 1 #define SIGP_EXTERNAL_CALL 2 #define SIGP_EMERGENCY_SIGNAL 3 +#define SIGP_START 4 #define SIGP_STOP 5 #define SIGP_RESTART 6 #define SIGP_STOP_AND_STORE_STATUS 9 diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 509547d..87c2b3a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -455,6 +455,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense_running(vcpu, cpu_addr, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_START: + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) + rc = -EOPNOTSUPP; /* Handle START in user space */ + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; rc = sigp_check_callable(vcpu, cpu_addr); -- cgit v1.1 From ff1f3cb4b3ac5d039f02679f34cb1498d110d241 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 9 Dec 2013 18:30:01 +0100 Subject: KVM: s390: ioeventfd: ignore leftmost bits The diagnose 500 subcode 3 contains the 32 bit subchannel id in bits 32-63 (counting from the left). As for other I/O instructions, bits 0-31 should be ignored and thus not be passed to kvm_io_bus_write_cookie(). This fixes a bug where the guest passed non-zero bits 0-31 which the host tried to interpret, leading to ioeventfd notification failures. Cc: stable@vger.kernel.org Signed-off-by: Dominik Dingel Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5ff29be..8216c0e 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 4 contains the index on the bus (optionally) */ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], + vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); -- cgit v1.1 From dea24190fbfb340dea5224e32161955514bef31b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 3 Dec 2013 10:06:29 +0100 Subject: s390/smp: only send external call ipi if needed If the per cpu ec_mask bit of the receiving cpu is already set there is no need to send an ipi, since a different cpu has already sent an ipi and the receiving cpu has not yet executed the external call ipi handler. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index dc4a534..86b2913 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { int order; - set_bit(ec_bit, &pcpu->ec_mask); - order = pcpu_running(pcpu) ? - SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; pcpu_sigp_retry(pcpu, order, 0); } -- cgit v1.1 From 1c182a628075af7431edb8155601740867b5ae51 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 3 Dec 2013 11:09:10 +0100 Subject: s390/ptrace: simplify enable/disable single step The user_enable_single_step() and user_disable_sindle_step() functions are always called on the inferior, never for the currently active process. Remove the unnecessary check for the current process and the update_cr_regs() call from the enable/disable functions. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e65c91c..c369a26 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -107,15 +107,11 @@ void update_cr_regs(struct task_struct *task) void user_enable_single_step(struct task_struct *task) { set_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_cr_regs(task); } void user_disable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_cr_regs(task); } /* -- cgit v1.1 From c63badebfebacdba827ab1cc1d420fc81bd8d818 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 3 Dec 2013 14:57:18 +0100 Subject: s390: optimize control register update It is less expensive to update control registers 0 and 2 with two individual stctg/lctlg instructions as with a single one that spans control register 0, 1 and 2. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c369a26..f6be608 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr[3], cr_new[3]; + unsigned long cr, cr_new; - __ctl_store(cr, 0, 2); - cr_new[1] = cr[1]; + __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new[0] = cr[0] & ~(1UL << 55); - else - cr_new[0] = cr[0] | (1UL << 55); + cr_new &= ~(1UL << 55); + if (cr_new != cr) + __ctl_load(cr, 0, 0); /* Set or clear transaction execution TDC bits 62 and 63. */ - cr_new[2] = cr[2] & ~3UL; + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new[2] |= 1UL; + cr_new |= 1UL; else - cr_new[2] |= 2UL; + cr_new |= 2UL; } - if (memcmp(&cr_new, &cr, sizeof(cr))) - __ctl_load(cr_new, 0, 2); + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } #endif /* Copy user specified PER registers */ -- cgit v1.1 From 96619fc1b3d06703113ab4c5cd8c15f35a42dc99 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Dec 2013 12:42:09 +0100 Subject: s390/smp: reduce memory consumption of pcpu_devices array Remove the embedded struct cpu from struct pcpu and replace it with a pointer instead. The struct cpu now gets allocated when a new cpu gets detected. The size of the pcpu_devices array (NR_CPUS * sizeof(struct pcpu)) gets reduced by nearly 120KB. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 86b2913..8ceefc9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -59,7 +59,7 @@ enum { }; struct pcpu { - struct cpu cpu; + struct cpu *cpu; struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long async_stack; /* async stack for the cpu */ unsigned long panic_stack; /* panic stack for the cpu */ @@ -958,7 +958,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &pcpu_devices[cpu].cpu; + struct cpu *c = pcpu_devices[cpu].cpu; struct device *s = &c->dev; int err = 0; @@ -975,10 +975,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, static int smp_add_present_cpu(int cpu) { - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + struct device *s; + struct cpu *c; int rc; + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + pcpu_devices[cpu].cpu = c; + s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); if (rc) -- cgit v1.1 From 41932bc1c86e527f866acfcd26506da3bd20509b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 10 Dec 2013 16:18:07 +0100 Subject: s390/compat: correct check for EFAULT in rt-signal frame creation The return code of the __put_user call to store the rt_sigreturn system call to the user stack if not properly checked, the err variable is only checked before to the __put_user. Use an if statement instead. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 95e7ba0..8b84bc3 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode)); + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ -- cgit v1.1 From 52733e0152dad719ed6374b56fd1c33e784e44b3 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 19:28:39 +0100 Subject: s390/sclp_early: Add function to detect sclp console capabilities Add SCLP console detect functions to encapsulate detection of SCLP console capabilities, for example, VT220 support. Reuse the sclp_send/receive masks that were stored by the most recent sclp_set_event_mask() call to prevent unnecessary SCLP calls. Signed-off-by: Hendrik Brueckner Reviewed-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sclp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 2f39095..220e171 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -52,8 +52,8 @@ int sclp_chp_configure(struct chp_id chpid); int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); -bool sclp_has_linemode(void); -bool sclp_has_vt220(void); +bool __init sclp_has_linemode(void); +bool __init sclp_has_vt220(void); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); -- cgit v1.1 From cf48ad83278aad39d4a0158cd085b6038b2941e6 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 11 Dec 2013 12:15:52 +0100 Subject: s390/oprofile: move hwsampler interfaces to cpu_mf.h Extract and move the oprofile hwsampler data structures and interfaces to the cpu_mf.h header file which contains common interface definitions for the various CPU-measurement facilities. This change is necessary for a new perf PMU. Few interface names have been revised to fit to the latest CPU-measurement facilities documentation. Also declare the data structures as __packed and correct checkpatch findings. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 138 +++++++++++++++++++++++++++++++++++++++++ arch/s390/oprofile/hwsampler.c | 67 +------------------- arch/s390/oprofile/hwsampler.h | 52 +--------------- 3 files changed, 142 insertions(+), 115 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index c879fad..f6dddea 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -56,6 +56,78 @@ struct cpumf_ctr_info { u32 reserved2[12]; } __packed; +/* QUERY SAMPLING INFORMATION block */ +struct hws_qsi_info_block { /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +} __packed; + +/* SET SAMPLING CONTROLS request block */ +struct hws_lsctl_request_block { + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long long b2_53:52;/* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +} __packed; + + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ +} __packed; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + unsigned long long overflow; /* 64 - sample Overflow count */ + unsigned long long timestamp; /* 16 - time-stamp */ + unsigned long long timestamp1; /* */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ + unsigned long long progusage1; /* 48 - reserved for programming use */ + unsigned long long progusage2; /* */ +} __packed; + /* Query counter information */ static inline int qctri(struct cpumf_ctr_info *info) { @@ -99,4 +171,70 @@ static inline int ecctr(u64 ctr, u64 *val) return cc; } +/* Query sampling information */ +static inline int qsi(struct hws_qsi_info_block *info) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xb2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (info) + : "m" (*info) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Load sampling controls */ +static inline int lsctl(struct hws_lsctl_request_block *req) +{ + int cc; + + cc = 1; + asm volatile( + "0: .insn s,0xb2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (req) + : "m" (*req) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Sampling control helper functions */ + +#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL +#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL + +/* Return pointer to trailer entry of an sample data block */ +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *) v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* Return if the entry in the sample data block table (sdbt) + * is a link to the next sdbt */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} #endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 231ceca..bbca76a 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -26,9 +26,6 @@ #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 -#define ALERT_REQ_MASK 0x4000000000000000ul -#define BUFFER_FULL_MASK 0x8000000000000000ul - DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); struct hws_execute_parms { @@ -65,43 +62,6 @@ static unsigned long interval; static unsigned long min_sampler_rate; static unsigned long max_sampler_rate; -static int ssctl(void *buffer) -{ - int cc; - - /* set in order to detect a program check */ - cc = 1; - - asm volatile( - "0: .insn s,0xB2870000,0(%1)\n" - "1: ipm %0\n" - " srl %0,28\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "+d" (cc), "+a" (buffer) - : "m" (*((struct hws_ssctl_request_block *)buffer)) - : "cc", "memory"); - - return cc ? -EINVAL : 0 ; -} - -static int qsi(void *buffer) -{ - int cc; - cc = 1; - - asm volatile( - "0: .insn s,0xB2860000,0(%1)\n" - "1: lhi %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (cc), "+a" (buffer) - : "m" (*((struct hws_qsi_info_block *)buffer)) - : "cc", "memory"); - - return cc ? -EINVAL : 0; -} - static void execute_qsi(void *parms) { struct hws_execute_parms *ep = parms; @@ -113,7 +73,7 @@ static void execute_ssctl(void *parms) { struct hws_execute_parms *ep = parms; - ep->rc = ssctl(ep->buffer); + ep->rc = lsctl(ep->buffer); } static int smp_ctl_ssctl_stop(int cpu) @@ -214,17 +174,6 @@ static int smp_ctl_qsi(int cpu) return ep.rc; } -static inline unsigned long *trailer_entry_ptr(unsigned long v) -{ - void *ret; - - ret = (void *)v; - ret += PAGE_SIZE; - ret -= sizeof(struct hws_trailer_entry); - - return (unsigned long *) ret; -} - static void hws_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -256,16 +205,6 @@ static void init_all_cpu_buffers(void) } } -static int is_link_entry(unsigned long *s) -{ - return *s & 0x1ul ? 1 : 0; -} - -static unsigned long *get_next_sdbt(unsigned long *s) -{ - return (unsigned long *) (*s & ~0x1ul); -} - static int prepare_cpu_buffers(void) { int cpu; @@ -353,7 +292,7 @@ static int allocate_sdbt(int cpu) } *sdbt = sdb; trailer = trailer_entry_ptr(*sdbt); - *trailer = ALERT_REQ_MASK; + *trailer = SDB_TE_ALERT_REQ_MASK; sdbt++; mutex_unlock(&hws_sem_oom); } @@ -829,7 +768,7 @@ static void worker_on_interrupt(unsigned int cpu) trailer = trailer_entry_ptr(*sdbt); /* leave loop if no more work to do */ - if (!(*trailer & BUFFER_FULL_MASK)) { + if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { done = 1; if (!hws_flush_all) continue; diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 0022e1e..a483d06 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -9,27 +9,7 @@ #define HWSAMPLER_H_ #include - -struct hws_qsi_info_block /* QUERY SAMPLING information block */ -{ /* Bit(s) */ - unsigned int b0_13:14; /* 0-13: zeros */ - unsigned int as:1; /* 14: sampling authorisation control*/ - unsigned int b15_21:7; /* 15-21: zeros */ - unsigned int es:1; /* 22: sampling enable control */ - unsigned int b23_29:7; /* 23-29: zeros */ - unsigned int cs:1; /* 30: sampling activation control */ - unsigned int:1; /* 31: reserved */ - unsigned int bsdes:16; /* 4-5: size of sampling entry */ - unsigned int:16; /* 6-7: reserved */ - unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ - unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ - unsigned long tear; /* 24-31: TEAR contents */ - unsigned long dear; /* 32-39: DEAR contents */ - unsigned int rsvrd0; /* 40-43: reserved */ - unsigned int cpu_speed; /* 44-47: CPU speed */ - unsigned long long rsvrd1; /* 48-55: reserved */ - unsigned long long rsvrd2; /* 56-63: reserved */ -}; +#include struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ { /* bytes 0 - 7 Bit(s) */ @@ -68,36 +48,6 @@ struct hws_cpu_buffer { unsigned int stop_mode:1; }; -struct hws_data_entry { - unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int R:4; /* 16-19 reserved */ - unsigned int U:4; /* 20-23 Number of unique instruct. */ - unsigned int z:2; /* zeros */ - unsigned int T:1; /* 26 PSW DAT mode */ - unsigned int W:1; /* 27 PSW wait state */ - unsigned int P:1; /* 28 PSW Problem state */ - unsigned int AS:2; /* 29-30 PSW address-space control */ - unsigned int I:1; /* 31 entry valid or invalid */ - unsigned int:16; - unsigned int prim_asn:16; /* primary ASN */ - unsigned long long ia; /* Instruction Address */ - unsigned long long gpp; /* Guest Program Parameter */ - unsigned long long hpp; /* Host Program Parameter */ -}; - -struct hws_trailer_entry { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned long:62; /* 2 - 63: Reserved */ - unsigned long overflow; /* 64 - sample Overflow count */ - unsigned long timestamp; /* 16 - time-stamp */ - unsigned long timestamp1; /* */ - unsigned long reserved1; /* 32 -Reserved */ - unsigned long reserved2; /* */ - unsigned long progusage1; /* 48 - reserved for programming use */ - unsigned long progusage2; /* */ -}; - int hwsampler_setup(void); int hwsampler_shutdown(void); int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); -- cgit v1.1 From c716832513f30430179b60ac5ffd203c53f7eb40 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 11 Dec 2013 12:44:40 +0100 Subject: s390/cpum_cf: Export event names in sysfs Provide PMU event attributes for supported counters and export their symbolic names to the sysfs "events" directory. See the /sys/devices/cpum_cf/events/ directory for a list of available counters. Note that you might require counter set authorizations for the LPAR to use them. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 23 ++- arch/s390/kernel/Makefile | 3 +- arch/s390/kernel/perf_cpum_cf.c | 1 + arch/s390/kernel/perf_cpum_cf_events.c | 322 +++++++++++++++++++++++++++++++++ arch/s390/kernel/perf_event.c | 12 ++ 5 files changed, 358 insertions(+), 3 deletions(-) create mode 100644 arch/s390/kernel/perf_cpum_cf_events.c (limited to 'arch/s390') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 1141fb3..3418502 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,11 +1,18 @@ /* * Performance event support - s390 specific definitions. * - * Copyright IBM Corp. 2009, 2012 + * Copyright IBM Corp. 2009, 2013 * Author(s): Martin Schwidefsky * Hendrik Brueckner */ +#ifndef _ASM_S390_PERF_EVENT_H +#define _ASM_S390_PERF_EVENT_H + +#ifdef CONFIG_64BIT + +#include +#include #include /* CPU-measurement counter facility */ @@ -15,7 +22,18 @@ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 -#ifdef CONFIG_64BIT +/* Perf defintions for PMU event attributes in sysfs */ +extern __init const struct attribute_group **cpumf_cf_event_group(void); +extern ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); +#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name +#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr) + +#define CPUMF_EVENT_ATTR(cat, name, id) \ + PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show) +#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name) + /* Perf callbacks */ struct pt_regs; @@ -24,3 +42,4 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif /* CONFIG_64BIT */ +#endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2403303..9f1e2ad 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o \ + perf_cpum_cf_events.o obj-y += runtime_instr.o cache.o endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1105502..f51214c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void) goto out; } + cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 0000000..4554a4b --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,322 @@ +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include +#include + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); + +static struct attribute *cpumcf_pmu_event_attr[] = { + CPUMF_EVENT_PTR(cf, CPU_CYCLES), + CPUMF_EVENT_PTR(cf, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_CYCLES), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumcf_pmu_event_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model; + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + default: + model = NULL; + break; + }; + + if (!model) + goto out; + + combined = merge_attr(cpumcf_pmu_event_attr, model); + if (combined) + cpumsf_pmu_events_group.attrs = combined; +out: + return cpumsf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 2343c21..4c1d336 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -172,3 +173,14 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, __store_trace(entry, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); } + +/* Perf defintions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx,name=%s\n", + pmu_attr->id, attr->attr.name); +} -- cgit v1.1 From 8c069ff4bd6063a3f15e606c882e03f75c7e7711 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:32:47 +0100 Subject: s390/perf: add support for the CPU-Measurement Sampling Facility Introduce a perf PMU, "cpum_sf", to support the CPU-Measurement Sampling Facility. You can control the sampling facility through this perf PMU interfaces. Perf sampling events are created for hardware samples. For details about the CPU-Measurement Sampling Facility, see "The Load-Program-Parameter and the CPU-Measurement Facilities" (SA23-2260). Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 14 + arch/s390/include/asm/perf_event.h | 17 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/perf_cpum_sf.c | 1024 ++++++++++++++++++++++++++++++++++++ arch/s390/kernel/perf_event.c | 42 +- 5 files changed, 1086 insertions(+), 13 deletions(-) create mode 100644 arch/s390/kernel/perf_cpum_sf.c (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index f6dddea..d707abc 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -210,6 +210,20 @@ static inline int lsctl(struct hws_lsctl_request_block *req) /* Sampling control helper functions */ +#include + +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + #define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3418502..b4eea25 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -15,12 +15,13 @@ #include #include -/* CPU-measurement counter facility */ -#define PERF_CPUM_CF_MAX_CTR 256 - /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 +#define PMU_F_IN_USE 0x4000 +#define PMU_F_ERR_IBE 0x0100 +#define PMU_F_ERR_LSDA 0x0200 +#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) /* Perf defintions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); @@ -41,5 +42,15 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +/* Perf PMU definitions for the counter facility */ +#define PERF_CPUM_CF_MAX_CTR 256 + +/* Perf PMU definitions for the sampling facility */ +#define PERF_CPUM_SF_MAX_CTR 1 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ + +#define TEAR_REG(hwc) ((hwc)->last_tag) +#define SAMPL_RATE(hwc) ((hwc)->event_base) + #endif /* CONFIG_64BIT */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9f1e2ad..1b3ac09 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o \ +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ perf_cpum_cf_events.o obj-y += runtime_instr.o cache.o endif diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 0000000..141eca0 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,1024 @@ +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013 + * Author(s): Hendrik Brueckner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_sf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Minimum number of sample-data-blocks: + * The minimum designates a single page for sample-data-block, i.e., + * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). + */ +#define CPUM_SF_MIN_SDB 126 + +/* Maximum number of sample-data-blocks: + * The maximum number designates approx. 256K per CPU including + * the given number of sample-data-blocks and taking the number + * of sample-data-block tables into account. + * + * Later, this number can be increased for extending the sampling + * buffer, for example, by factor 2 (512K) or 4 (1M). + */ +#define CPUM_SF_MAX_SDB 6471 + +struct sf_buffer { + unsigned long sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long tail; /* last sample-data-block-table */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return (cpuhw->sfb.sdbt) ? 1 : 0; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long sdbt, *curr; + + if (!sfb->sdbt) + return; + + sdbt = sfb->sdbt; + curr = (unsigned long *) sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the origin */ + if ((unsigned long) curr == sfb->sdbt) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + } + + debug_sprintf_event(sfdbg, 5, + "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); + memset(sfb, 0, sizeof(*sfb)); +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int j, k, rc; + unsigned long *sdbt, *tail, *trailer; + unsigned long sdb; + unsigned long num_sdbt, sdb_per_table; + + if (sfb->sdbt) + return -EINVAL; + sfb->num_sdb = 0; + + /* Compute the number of required sample-data-block-tables (SDBT) */ + num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); + if (num_sdbt < CPUM_SF_MIN_SDBT) + num_sdbt = CPUM_SF_MIN_SDBT; + sdb_per_table = (PAGE_SIZE - 8) / 8; + + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu " + "num_sdb=%lu sdb_per_table=%lu\n", + num_sdbt, num_sdb, sdb_per_table); + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sdbt) { + rc = -ENOMEM; + goto allocate_sdbt_error; + } + + /* save origin of sample-data-block-table */ + if (!sfb->sdbt) + sfb->sdbt = (unsigned long) sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *) sdbt + 1; + + for (k = 0; k < num_sdb && k < sdb_per_table; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + if (!sdb) { + rc = -ENOMEM; + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = SDB_TE_ALERT_REQ_MASK; + sdbt++; + } + num_sdb -= k; + sfb->num_sdb += k; /* count allocated sdb's */ + tail = sdbt; + } + + rc = 0; + if (tail) + *tail = sfb->sdbt + 1; + sfb->tail = (unsigned long) (void *)tail; + +allocate_sdbt_error: + if (rc) + free_sampling_buffer(sfb); + else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", + sfb->sdbt, *(unsigned long *) sfb->sdbt); + return rc; +} + +static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq; + unsigned long factor; + + /* Calculate sampling buffers using 4K pages + * + * 1. Use frequency as input. The samping buffer is designed for + * a complete second. This can be adjusted through the "factor" + * variable. + * In any case, alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger measurement-alert to harvest + * sample-data-blocks (sdb). + * + * 2. Compute the number of sample-data-blocks and ensure a minimum + * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not + * exceed CPUM_SF_MAX_SDB. See also the remarks for these + * symbolic constants. + * + * 3. Compute number of pages used for the sample-data-block-table + * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table + * to manage up to 511 sample-data-blocks). + */ + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + factor = 1; + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); + if (n_sdb < CPUM_SF_MIN_SDB) + n_sdb = CPUM_SF_MIN_SDB; + + /* Return if there is already a sampling buffer allocated. + * XXX Remove this later and check number of available and + * required sdb's and, if necessary, increase the sampling buffer. + */ + if (sf_buffer_available(cpuhw)) + return 0; + + debug_sprintf_event(sfdbg, 3, + "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); + + return alloc_sampling_buffer(&cpuhw->sfb, + min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + int err; + struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + + /* XXX Improve error handling and pass a flag in the *flags + * variable to indicate failures. Alternatively, ignore + * (print) errors here and let the PMU functions fail if + * the per-cpu PMU_F_RESERVED flag is not. + */ + err = 0; + switch (*((int *) flags)) { + case PMC_INIT: + memset(cpusf, 0, sizeof(*cpusf)); + err = qsi(&cpusf->qsi); + if (err) + break; + cpusf->flags |= PMU_F_RESERVED; + err = sf_disable(); + if (err) + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + break; + case PMC_RELEASE: + cpusf->flags &= ~PMU_F_RESERVED; + err = sf_disable(); + if (err) { + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + } else { + if (cpusf->sfb.sdbt) + free_sampling_buffer(&cpusf->sfb); + } + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + break; + } +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); +} + +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Release PMC if this is the last perf event */ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static void hw_reset_registers(struct hw_perf_event *hwc, + unsigned long sdbt_origin) +{ + TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + if (rate < si->min_sampl_rate) + return si->min_sampl_rate; + if (rate > si->max_sampl_rate) + return si->max_sampl_rate; + return rate; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + int cpu, err; + + /* Reserve CPU-measurement sampling facility */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + goto out; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) + qsi(&si); + else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + /* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + */ + rate = 0; + if (attr->freq) { + rate = freq_to_sample_rate(&si, attr->sample_freq); + rate = hw_limit_rate(&si, rate); + attr->freq = 0; + attr->sample_period = rate; + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(&si, hwc->sample_period); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(&si, rate) > + sysctl_perf_event_sample_rate) { + err = -EINVAL; + debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); + goto out; + } + } + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_sdbt(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_sdbt(cpuhw, hwc); + if (err) + break; + } + } +out: + return err; +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + if (event->attr.type != PERF_TYPE_RAW) + return -ENOENT; + + if (event->attr.config != PERF_EVENT_CPUM_SF) + return -ENOENT; + + if (event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + if (event->destroy) + event->destroy(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + cpuhw->flags |= PMU_F_ENABLED; + barrier(); + + err = lsctl(&cpuhw->lsctl); + if (err) { + cpuhw->flags &= ~PMU_F_ENABLED; + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 1, err); + return; + } + + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", + cpuhw->lsctl.es, cpuhw->lsctl.cs, + (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 2, err); + return; + } + + /* Save state of TEAR and DEAR register contents */ + if (!qsi(&si)) { + /* TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + } else + debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " + "qsi() failed with err=%i\n", err); + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, + struct hws_data_entry *sample) +{ + int overflow; + struct pt_regs regs; + struct perf_sample_data data; + + /* Skip samples that are invalid or for which the instruction address + * is not predictable. For the latter, the wait-state bit is set. + */ + if (sample->I || sample->W) + return 0; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + memset(®s, 0, sizeof(regs)); + regs.psw.addr = sample->ia; + if (sample->T) + regs.psw.mask |= PSW_MASK_DAT; + if (sample->W) + regs.psw.mask |= PSW_MASK_WAIT; + if (sample->P) + regs.psw.mask |= PSW_MASK_PSTATE; + switch (sample->AS) { + case 0x0: + regs.psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x1: + regs.psw.mask |= PSW_ASC_ACCREG; + break; + case 0x2: + regs.psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x3: + regs.psw.mask |= PSW_ASC_HOME; + break; + } + + overflow = 0; + if (perf_event_overflow(event, &data, ®s)) { + overflow = 1; + event->pmu->stop(event, 0); + debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped" + " because of an event overflow\n"); + } + perf_event_update_userpage(event); + + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects hardware sample-data that is + * pushed to the perf event subsystem. The overflow reports the number of + * samples that has been discarded due to an event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + struct hws_data_entry *sample; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + sample = (struct hws_data_entry *) *sdbt; + while ((unsigned long *) sample < trailer) { + /* Check for an empty sample */ + if (!sample->def) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check for basic sampling mode */ + if (sample->def == 0x0001) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) + *overflow = perf_push_sample(event, sample); + else + /* Count discarded samples */ + *overflow += 1; + } else + /* Sample slot is not yet written or other record */ + debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " + "Unknown sample data entry format:" + " %i\n", sample->def); + + /* Reset sample slot and advance to next sample */ + sample->def = 0; + sample++; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flash_all flag + * to also walk through partially filled sample-data-blocks. + * + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; + unsigned long long event_overflow, sampl_overflow; + int done; + + sdbt = (unsigned long *) TEAR_REG(hwc); + done = event_overflow = sampl_overflow = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->f) { + done = 1; + if (!flush_all) + break; + } + + /* Check sample overflow count */ + if (te->overflow) { + /* Increment sample overflow counter */ + sampl_overflow += te->overflow; + + /* XXX: If an sample overflow occurs, increase the + * sampling buffer. Set a "realloc" flag because + * the sampler must be re-enabled for changing + * the sample-data-block-table content. + */ + } + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " + "overflow=%llu timestamp=0x%llx\n", + sdbt, te->overflow, + (te->f) ? te->timestamp : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, sdbt, &event_overflow); + + /* Reset trailer */ + xchg(&te->overflow, 0); + xchg((unsigned char *) te, 0x40); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long) sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + + /* If an event overflow happened, discard samples by + * processing any remaining sample-data-blocks. + */ + if (event_overflow) + flush_all = 1; + } + + if (sampl_overflow || event_overflow) + debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " + "overflow stats: sample=%llu event=%llu\n", + sampl_overflow, event_overflow); +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!cpuhw->sfb.sdbt) + return -EINVAL; + + err = 0; + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.tear = cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { + err = -EAGAIN; + goto out; + } + cpuhw->lsctl.es = 1; + + /* Set in_use flag and store event */ + event->hw.idx = 0; /* only one sampling event per CPU supported */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); + +static struct attribute *cpumsf_pmu_events_attr[] = { + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .event_idx = cpumsf_pmu_event_idx, + .attr_groups = cpumsf_pmu_attr_groups, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = &__get_cpu_var(cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) + hw_perf_event_update(cpuhw->event, 0); + else + WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!atomic_read(&num_events)) + return NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init init_cpum_sampling_pmu(void) +{ + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + if (!sfdbg) + pr_err("Registering for s390dbf failed\n"); + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_interrupt(0x1407, cpumf_measurement_alert); + if (err) { + pr_err("Failed to register for CPU-measurement alerts\n"); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_err("Failed to register cpum_sf pmu\n"); + unregister_external_interrupt(0x1407, cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return err; +} +arch_initcall(init_cpum_sampling_pmu); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 4c1d336..b9843ba 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ int perf_num_counters(void) if (cpum_cf_avail()) num += PERF_CPUM_CF_MAX_CTR; + if (cpum_sf_avail()) + num += PERF_CPUM_SF_MAX_CTR; return num; } @@ -93,24 +96,45 @@ unsigned long perf_misc_flags(struct pt_regs *regs) : PERF_RECORD_MISC_KERNEL; } -void perf_event_print_debug(void) +void print_debug_cf(void) { struct cpumf_ctr_info cf_info; - unsigned long flags; - int cpu; - - if (!cpum_cf_avail()) - return; - - local_irq_save(flags); + int cpu = smp_processor_id(); - cpu = smp_processor_id(); memset(&cf_info, 0, sizeof(cf_info)); if (!qctri(&cf_info)) pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", cpu, cf_info.cfvn, cf_info.csvn, cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); +} + +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_err("CPU[%i]: CPM_SF: qsi failed\n"); + return; + } + + pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i" + " min=%i max=%i cpu_speed=%i tear=%p dear=%p\n", + cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes, + si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed, + si.tear, si.dear); +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); local_irq_restore(flags); } -- cgit v1.1 From 55baa2f831ae4a41da9617ab9e7cef5ebc991ec9 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:47:00 +0100 Subject: s390/perf: Improve PMU selection for PERF_COUNT_HW_CPU_CYCLES events The cpum_cf (counter facility) PMU does not support sampling events. With cpum_sf (sampling facility), a PMU for sampling CPU cycles is available. Make cpum_sf the "default" PMU for PERF_COUNT_HW_CPU_CYCLES sampling events but use the more precise cpum_cf PMU for non-sampling events. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 141eca0..52bf36e 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -468,11 +468,29 @@ static int cpumsf_pmu_event_init(struct perf_event *event) { int err; - if (event->attr.type != PERF_TYPE_RAW) - return -ENOENT; - - if (event->attr.config != PERF_EVENT_CPUM_SF) + /* No support for taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if (event->attr.config != PERF_EVENT_CPUM_SF) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: return -ENOENT; + } if (event->cpu >= nr_cpumask_bits || (event->cpu >= 0 && !cpu_online(event->cpu))) -- cgit v1.1 From e28bb79d9935293a8eea5f3c771fde89db645ba7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:52:48 +0100 Subject: s390/perf,oprofile: Share sampling facility Introduce reserve/release functions to share the sampling facility between perf and oprofile. Also improve error handling for the sampling facility support in perf. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 4 ++++ arch/s390/kernel/perf_cpum_sf.c | 17 ++++++++++++----- arch/s390/kernel/perf_event.c | 30 ++++++++++++++++++++++++++++++ arch/s390/oprofile/hwsampler.c | 7 +++++++ arch/s390/oprofile/init.c | 23 ++++++++++++++++++++--- 5 files changed, 73 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index b4eea25..23d2dfa 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -52,5 +52,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) +/* Perf hardware reserve and release functions */ +int perf_reserve_sampling(void); +void perf_release_sampling(void); + #endif /* CONFIG_64BIT */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 52bf36e..ae5e019 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -260,16 +260,12 @@ static int sf_disable(void) #define PMC_INIT 0 #define PMC_RELEASE 1 +#define PMC_FAILURE 2 static void setup_pmc_cpu(void *flags) { int err; struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); - /* XXX Improve error handling and pass a flag in the *flags - * variable to indicate failures. Alternatively, ignore - * (print) errors here and let the PMU functions fail if - * the per-cpu PMU_F_RESERVED flag is not. - */ err = 0; switch (*((int *) flags)) { case PMC_INIT: @@ -299,6 +295,8 @@ static void setup_pmc_cpu(void *flags) "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); break; } + if (err) + *((int *) flags) |= PMC_FAILURE; } static void release_pmc_hardware(void) @@ -307,13 +305,22 @@ static void release_pmc_hardware(void) irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); on_each_cpu(setup_pmc_cpu, &flags, 1); + perf_release_sampling(); } static int reserve_pmc_hardware(void) { int flags = PMC_INIT; + int err; + err = perf_reserve_sampling(); + if (err) + return err; on_each_cpu(setup_pmc_cpu, &flags, 1); + if (flags & PMC_FAILURE) { + release_pmc_hardware(); + return -ENODEV; + } irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index b9843ba..4edcdfa 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -208,3 +208,33 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%04llx,name=%s\n", pmu_attr->id, attr->attr.name); } + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(perf_hw_owner_lock); +static void *perf_sampling_owner; + +int perf_reserve_sampling(void) +{ + int err; + + err = 0; + spin_lock(&perf_hw_owner_lock); + if (perf_sampling_owner) { + pr_warn("The sampling facility is already reserved by %p\n", + perf_sampling_owner); + err = -EBUSY; + } else + perf_sampling_owner = __builtin_return_address(0); + spin_unlock(&perf_hw_owner_lock); + return err; +} +EXPORT_SYMBOL(perf_reserve_sampling); + +void perf_release_sampling(void) +{ + spin_lock(&perf_hw_owner_lock); + WARN_ON(!perf_sampling_owner); + perf_sampling_owner = NULL; + spin_unlock(&perf_hw_owner_lock); +} +EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index bbca76a..eb09587 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -41,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom); static unsigned char hws_flush_all; static unsigned int hws_oom; +static unsigned int hws_alert; static struct workqueue_struct *hws_wq; static unsigned int hws_state; @@ -182,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code, if (!(param32 & CPU_MF_INT_SF_MASK)) return; + if (!hws_alert) + return; + inc_irq_stat(IRQEXT_CMS); atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); @@ -941,6 +945,7 @@ int hwsampler_deallocate(void) goto deallocate_exit; irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1055,6 +1060,7 @@ int hwsampler_shutdown(void) if (hws_state == HWS_STOPPED) { irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; deallocate_sdbt(); } if (hws_wq) { @@ -1129,6 +1135,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ + hws_alert = 1; irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 04e1b6a..9ffe645 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" "(report cpu_type \"timer\""); +static int __oprofile_hwsampler_start(void) +{ + int retval; + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + static int oprofile_hwsampler_start(void) { int retval; @@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void) if (!hwsampler_running) return timer_ops.start(); - retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + retval = perf_reserve_sampling(); if (retval) return retval; - retval = hwsampler_start_all(oprofile_hw_interval); + retval = __oprofile_hwsampler_start(); if (retval) - hwsampler_deallocate(); + perf_release_sampling(); return retval; } @@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void) hwsampler_stop_all(); hwsampler_deallocate(); + perf_release_sampling(); return; } -- cgit v1.1 From 99c64b6679c41d8238b154c1a462724d7101765c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:38:39 +0100 Subject: s390/perf: Add service level information for CPU-Measurement Facilities Register a service level handler to report information about available CPU-Measurement facilities. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 4edcdfa..3bd2bf0 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,17 +16,19 @@ #include #include #include +#include #include #include #include #include #include #include +#include const char *perf_pmu_name(void) { if (cpum_cf_avail() || cpum_sf_avail()) - return "CPU-measurement facilities (CPUMF)"; + return "CPU-Measurement Facilities (CPU-MF)"; return "pmu"; } EXPORT_SYMBOL(perf_pmu_name); @@ -138,6 +140,60 @@ void perf_event_print_debug(void) local_irq_restore(flags); } +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) +{ + struct cpumf_ctr_info ci; + + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) + return; + + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} + +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) + return; + + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + /* See also arch/s390/kernel/traps.c */ static unsigned long __store_trace(struct perf_callchain_entry *entry, unsigned long sp, -- cgit v1.1 From aa3b7c296732b4351dfdbfe70be6b38a0882be14 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:48:32 +0100 Subject: s390/pci: prevent inadvertently triggered bus scans Initialization and scanning of the pci bus is omitted on older machines without pci support or if pci=off was specified. Remember the fact that we ran without pci support and prevent further bus scans during resume from hibernate or after receiving hotplug notifications. Reported-by: Stefan Haberland Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 10 +++++++++- arch/s390/pci/pci_event.c | 18 ++++++++++++++---- 3 files changed, 24 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c129ab2..2583466 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *); void zpci_event_error(void *); void zpci_event_availability(void *); void zpci_rescan(void); +bool zpci_is_enabled(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index bf7c73d..e3265b5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -920,6 +920,7 @@ static void zpci_mem_exit(void) } static unsigned int s390_pci_probe; +static unsigned int s390_pci_initialized; char * __init pcibios_setup(char *str) { @@ -930,6 +931,11 @@ char * __init pcibios_setup(char *str) return str; } +bool zpci_is_enabled(void) +{ + return s390_pci_initialized; +} + static int __init pci_base_init(void) { int rc; @@ -961,6 +967,7 @@ static int __init pci_base_init(void) if (rc) goto out_find; + s390_pci_initialized = 1; return 0; out_find: @@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { - clp_rescan_pci_devices_simple(); + if (zpci_is_enabled()) + clp_rescan_pci_devices_simple(); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 800f064..228787a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -43,9 +43,8 @@ struct zpci_ccdf_avail { u16 pec; /* PCI event code */ } __packed; -void zpci_event_error(void *data) +static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { - struct zpci_ccdf_err *ccdf = data; struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); zpci_err("error CCDF:\n"); @@ -58,9 +57,14 @@ void zpci_event_error(void *data) pci_name(zdev->pdev), ccdf->pec, ccdf->fid); } -void zpci_event_availability(void *data) +void zpci_event_error(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_error(data); +} + +static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { - struct zpci_ccdf_avail *ccdf = data; struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = zdev ? zdev->pdev : NULL; int ret; @@ -115,3 +119,9 @@ void zpci_event_availability(void *data) break; } } + +void zpci_event_availability(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_availability(data); +} -- cgit v1.1 From 704268925d32a0457202371a61580af76b94c53a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:50:53 +0100 Subject: s390/pci: fix removal of nonexistent pci bus If we remove a pci bus after receiving a hotplug notification we need to check if the bus is actually present (creation of the pci bus during an earlier notification may have been failed). Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 228787a..65ea105 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -112,6 +112,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) clp_rescan_pci_devices(); break; case 0x0308: /* Standby -> Reserved */ + if (!zdev) + break; pci_stop_root_bus(zdev->bus); pci_remove_root_bus(zdev->bus); break; -- cgit v1.1 From 0c0c2776926018e7560b99e921467aea1115d03b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:53:11 +0100 Subject: s390/pci: set error state for unavailable functions If we receive a notification that a pci function became unavailable we clean up by removing the pci device. This can confuse the driver since the function is already unaccessible. Improve this situation by setting an appropriate error_state. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 65ea105..7fc4c2c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -102,8 +102,12 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0304: /* Configured -> Standby */ - if (pdev) + if (pdev) { + /* Give the driver a hint that the function is + * already unusable. */ + pdev->error_state = pci_channel_io_perm_failure; pci_stop_and_remove_bus_device(pdev); + } zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; -- cgit v1.1 From f7038b7c3f4924b18390c51c1ec1e49287cc87db Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:53:59 +0100 Subject: s390/pci/dma: fix accounting of allocated_pages allocated_pages sometimes are increased even if s390_dma_alloc fails also this value is never decreased even if s390_dma_free is called. This patch fixes these bugs. Also remove the atomic64_t casts (the members are already of this type). Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9b83d08..60c11a6 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, flags |= ZPCI_TABLE_PROTECTED; if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { - atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages); + atomic64_add(nr_pages, &zdev->fmb->mapped_pages); return dma_addr + (offset & ~PAGE_MASK); } @@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, zpci_err_hex(&dma_addr, sizeof(dma_addr)); } - atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages); + atomic64_add(npages, &zdev->fmb->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; dma_free_iommu(zdev, iommu_page_index, npages); } @@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size, if (!page) return NULL; - atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages); pa = page_to_phys(page); memset((void *) pa, 0, size); @@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, return NULL; } + atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages); if (dma_handle) *dma_handle = map; return (void *) pa; @@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size, void *pa, dma_addr_t dma_handle, struct dma_attrs *attrs) { - s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size), - DMA_BIDIRECTIONAL, NULL); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + size = PAGE_ALIGN(size); + atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long) pa, get_order(size)); } -- cgit v1.1 From 257608fb4112b4cabefd9e33a4fc2df6b64dca6a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:55:22 +0100 Subject: s390/pci: reenable per default HW, FW and Linux support is in a better shape now - let's reenable pci bus probing per default. Acked-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e3265b5..0820362 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -919,13 +919,13 @@ static void zpci_mem_exit(void) kmem_cache_destroy(zdev_fmb_cache); } -static unsigned int s390_pci_probe; +static unsigned int s390_pci_probe = 1; static unsigned int s390_pci_initialized; char * __init pcibios_setup(char *str) { - if (!strcmp(str, "on")) { - s390_pci_probe = 1; + if (!strcmp(str, "off")) { + s390_pci_probe = 0; return NULL; } return str; -- cgit v1.1 From 69f239ed335a4b03265cae3ca930f3f166e42e35 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:03:48 +0100 Subject: s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur Improve the sampling buffer allocation and add a function to reallocate and increase the sampling buffer structure. The number of allocated buffer elements (sample-data-blocks) are accounted. You can control the minimum and maximum number these sample-data-blocks through the cpum_sfb_size kernel parameter. The number hardware sample overflows (if any) are also accounted and stored per perf event. During the PMU disable/enable calls, the accumulated overflow counter is analyzed and, if necessary, the sampling buffer is dynamically increased. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 4 + arch/s390/kernel/perf_cpum_sf.c | 527 ++++++++++++++++++++++++++++--------- 2 files changed, 411 insertions(+), 120 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 23d2dfa..99d7f4e 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define PERF_CPUM_SF_MAX_CTR 1 #define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ +#define REG_NONE 0 +#define REG_OVERFLOW 1 +#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) +#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ae5e019..ea16560 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,34 +28,54 @@ * At least one table is required for the sampling buffer structure. * A single table contains up to 511 pointers to sample-data-blocks. */ -#define CPUM_SF_MIN_SDBT 1 +#define CPUM_SF_MIN_SDBT 1 -/* Minimum number of sample-data-blocks: - * The minimum designates a single page for sample-data-block, i.e., - * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * The table contains SDB origin (8 bytes) and one SDBT origin that + * points to the next table. */ -#define CPUM_SF_MIN_SDB 126 +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) -/* Maximum number of sample-data-blocks: - * The maximum number designates approx. 256K per CPU including - * the given number of sample-data-blocks and taking the number - * of sample-data-block tables into account. +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer + * taking the number of sample-data-block-tables into account. * - * Later, this number can be increased for extending the sampling - * buffer, for example, by factor 2 (512K) or 4 (1M). + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs */ -#define CPUM_SF_MAX_SDB 6471 +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; struct sf_buffer { - unsigned long sdbt; /* Sample-data-block-table origin */ + unsigned long *sdbt; /* Sample-data-block-table origin */ /* buffer characteristics (required for buffer increments) */ - unsigned long num_sdb; /* Number of sample-data-blocks */ - unsigned long tail; /* last sample-data-block-table */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ }; struct cpu_hw_sf { /* CPU-measurement sampling information block */ struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ struct hws_lsctl_request_block lsctl; struct sf_buffer sfb; /* Sampling buffer */ unsigned int flags; /* Status flags */ @@ -65,11 +87,22 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); static debug_info_t *sfdbg; /* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + +/* * sf_buffer_available() - Check for an allocated sampling buffer */ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) { - return (cpuhw->sfb.sdbt) ? 1 : 0; + return !!cpuhw->sfb.sdbt; } /* @@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long sdbt, *curr; + unsigned long *sdbt, *curr; if (!sfb->sdbt) return; sdbt = sfb->sdbt; - curr = (unsigned long *) sdbt; + curr = sdbt; - /* we'll free the SDBT after all SDBs are processed... */ + /* Free the SDBT after all SDBs are processed... */ while (1) { if (!*curr || !sdbt) break; - /* watch for link entry reset if found */ + /* Process table-link entries */ if (is_link_entry(curr)) { curr = get_next_sdbt(curr); if (sdbt) - free_page(sdbt); + free_page((unsigned long) sdbt); - /* we are done if we reach the origin */ - if ((unsigned long) curr == sfb->sdbt) + /* If the origin is reached, sampling buffer is freed */ + if (curr == sfb->sdbt) break; else - sdbt = (unsigned long) curr; + sdbt = curr; } else { - /* process SDB pointer */ + /* Process SDB pointer */ if (*curr) { free_page(*curr); curr++; @@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } debug_sprintf_event(sfdbg, 5, - "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); + "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + unsigned long sdb, *trailer; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + trailer = trailer_entry_ptr(sdb); + *trailer = SDB_TE_ALERT_REQ_MASK; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " + "sampling buffer is not linked: origin=%p" + "tail=%p\n", + (void *) sfb->sdbt, (void *) tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) + break; + sfb->num_sdb++; + tail++; + } + + /* Link sampling buffer to its origin */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" + " settings: sdbt=%lu sdb=%lu\n", + sfb->num_sdbt, sfb->num_sdb); + return rc; +} + /* * allocate_sampling_buffer() - allocate sampler memory * @@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb) */ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) { - int j, k, rc; - unsigned long *sdbt, *tail, *trailer; - unsigned long sdb; - unsigned long num_sdbt, sdb_per_table; + int rc; if (sfb->sdbt) return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; sfb->num_sdb = 0; + sfb->num_sdbt = 1; - /* Compute the number of required sample-data-block-tables (SDBT) */ - num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); - if (num_sdbt < CPUM_SF_MIN_SDBT) - num_sdbt = CPUM_SF_MIN_SDBT; - sdb_per_table = (PAGE_SIZE - 8) / 8; - - debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu " - "num_sdb=%lu sdb_per_table=%lu\n", - num_sdbt, num_sdb, sdb_per_table); - sdbt = NULL; - tail = sdbt; - - for (j = 0; j < num_sdbt; j++) { - sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!sdbt) { - rc = -ENOMEM; - goto allocate_sdbt_error; - } + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; - /* save origin of sample-data-block-table */ - if (!sfb->sdbt) - sfb->sdbt = (unsigned long) sdbt; + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) { + free_sampling_buffer(sfb); + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " + "realloc_sampling_buffer failed with rc=%i\n", rc); + } else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%p dear=%p\n", + sfb->sdbt, (void *) *sfb->sdbt); + return rc; +} - /* link current page to tail of chain */ - if (tail) - *tail = (unsigned long)(void *) sdbt + 1; +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; +} - for (k = 0; k < num_sdb && k < sdb_per_table; k++) { - /* get and set SDB page */ - sdb = get_zeroed_page(GFP_KERNEL); - if (!sdb) { - rc = -ENOMEM; - goto allocate_sdbt_error; - } - *sdbt = sdb; - trailer = trailer_entry_ptr(*sdbt); - *trailer = SDB_TE_ALERT_REQ_MASK; - sdbt++; - } - num_sdb -= k; - sfb->num_sdb += k; /* count allocated sdb's */ - tail = sdbt; - } +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} - rc = 0; - if (tail) - *tail = sfb->sdbt + 1; - sfb->tail = (unsigned long) (void *)tail; +static int sfb_has_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + return sfb_pending_allocs(sfb, hwc) > 0; +} -allocate_sdbt_error: - if (rc) - free_sampling_buffer(sfb); - else - debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", - sfb->sdbt, *(unsigned long *) sfb->sdbt); - return rc; +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; } -static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { unsigned long n_sdb, freq; unsigned long factor; @@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw if (n_sdb < CPUM_SF_MIN_SDB) n_sdb = CPUM_SF_MIN_SDB; - /* Return if there is already a sampling buffer allocated. - * XXX Remove this later and check number of available and - * required sdb's and, if necessary, increase the sampling buffer. + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. */ + sfb_init_allocs(n_sdb, hwc); if (sf_buffer_available(cpuhw)) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", + "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); return alloc_sampling_buffer(&cpuhw->sfb, - min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); + sfb_pending_allocs(&cpuhw->sfb, hwc)); } +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} -/* Number of perf events counting hardware events */ -static atomic_t num_events; -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} -/* - * sf_disable() - Switch off sampling facility +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" + " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. */ -static int sf_disable(void) +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) { - struct hws_lsctl_request_block sreq; + unsigned long num, num_old; + int rc; - memset(&sreq, 0, sizeof(sreq)); - return lsctl(&sreq); + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + num_old = sfb->num_sdb; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); + if (rc) + debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " + "failed with rc=%i\n", rc); + + if (sfb_has_pending_allocs(sfb, hwc)) + debug_sprintf_event(sfdbg, 5, "sfb: extend: " + "req=%lu alloc=%lu remaining=%lu\n", + num, sfb->num_sdb - num_old, + sfb_pending_allocs(sfb, hwc)); } +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + #define PMC_INIT 0 #define PMC_RELEASE 1 #define PMC_FAILURE 2 @@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) } static void hw_reset_registers(struct hw_perf_event *hwc, - unsigned long sdbt_origin) + unsigned long *sdbt_origin) { - TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ + /* (Re)set to first sample-data-block-table */ + TEAR_REG(hwc) = (unsigned long) sdbt_origin; } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, unsigned long rate) { - if (rate < si->min_sampl_rate) - return si->min_sampl_rate; - if (rate > si->max_sampl_rate) - return si->max_sampl_rate; - return rate; + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); } static int __hw_perf_event_init(struct perf_event *event) @@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event) SAMPL_RATE(hwc) = rate; hw_init_period(hwc, SAMPL_RATE(hwc)); + /* Initialize sample data overflow accounting */ + hwc->extra_reg.reg = REG_OVERFLOW; + OVERFLOW_REG(hwc) = 0; + /* Allocate the per-CPU sampling buffer using the CPU information * from the event. If the event is not pinned to a particular * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling @@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) static void cpumsf_pmu_enable(struct pmu *pmu) { struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hw_perf_event *hwc; int err; if (cpuhw->flags & PMU_F_ENABLED) @@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu) if (cpuhw->flags & PMU_F_ERR_MASK) return; + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + if (cpuhw->event) { + hwc = &cpuhw->event->hw; + /* Account number of overflow-designated buffer extents */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + + /* (Re)enable the PMU and sampling facility */ cpuhw->flags |= PMU_F_ENABLED; barrier(); @@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event, if (perf_event_overflow(event, &data, ®s)) { overflow = 1; event->pmu->stop(event, 0); - debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped" - " because of an event overflow\n"); } perf_event_update_userpage(event); @@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow; + unsigned long long event_overflow, sampl_overflow, num_sdb; int done; sdbt = (unsigned long *) TEAR_REG(hwc); - done = event_overflow = sampl_overflow = 0; + done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { /* Get the trailer entry of the sample-data-block */ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); @@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) break; } - /* Check sample overflow count */ - if (te->overflow) { - /* Increment sample overflow counter */ - sampl_overflow += te->overflow; - - /* XXX: If an sample overflow occurs, increase the - * sampling buffer. Set a "realloc" flag because - * the sampler must be re-enabled for changing - * the sample-data-block-table content. + /* Check the sample overflow count */ + if (te->overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). */ - } + sampl_overflow += te->overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " @@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) * is stopped and remaining samples will be discarded. */ hw_collect_samples(event, sdbt, &event_overflow); + num_sdb++; /* Reset trailer */ xchg(&te->overflow, 0); @@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) flush_all = 1; } + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", @@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) */ cpuhw->lsctl.s = 0; cpuhw->lsctl.h = 1; - cpuhw->lsctl.tear = cpuhw->sfb.sdbt; + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); @@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, return NOTIFY_OK; } +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + static int __init init_cpum_sampling_pmu(void) { int err; @@ -1047,3 +1333,4 @@ out: return err; } arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); -- cgit v1.1 From fcc77f507333776eaa336ab4ff49c23422f53703 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:26:51 +0100 Subject: s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks Ensure to reset the sample-data-block full indicator and the overflow counter at the same time. This must be done atomically because the sampling hardware is still active while full sample-data-block is processed. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 13 +++++++++---- arch/s390/kernel/perf_cpum_sf.c | 12 ++++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index d707abc..b0b3059 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -115,10 +115,15 @@ struct hws_data_entry { } __packed; struct hws_trailer_entry { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned long long:61; /* 3 - 63: Reserved */ + union { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + }; + unsigned long long flags; /* 0 - 63: All indicators */ + }; unsigned long long overflow; /* 64 - sample Overflow count */ unsigned long long timestamp; /* 16 - time-stamp */ unsigned long long timestamp1; /* */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ea16560..9202f28 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -953,7 +953,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb; + unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; sdbt = (unsigned long *) TEAR_REG(hwc); @@ -990,9 +990,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) hw_collect_samples(event, sdbt, &event_overflow); num_sdb++; - /* Reset trailer */ - xchg(&te->overflow, 0); - xchg((unsigned char *) te, 0x40); + /* Reset trailer (using compare-double-and-swap) */ + do { + te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te_flags |= SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + te->flags, te->overflow, + te_flags, 0ULL)); /* Advance to next sample-data-block */ sdbt++; -- cgit v1.1 From 443d4beb823d4dccaaf964b59df9dd38b4d6aae7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:38:50 +0100 Subject: s390/cpum_sf: Add helper to read TOD from trailer entries The trailer entry contains a timestamp of the time when the sample-data-block became full. The timestamp specifies a TOD (time-of-day) value in either the STCK or STCKE format. Provide a helper function to return the TOD value depending on the setting of time format indicator. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 14 ++++++++++++-- arch/s390/kernel/perf_cpum_sf.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index b0b3059..09dc5fa 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -125,8 +125,7 @@ struct hws_trailer_entry { unsigned long long flags; /* 0 - 63: All indicators */ }; unsigned long long overflow; /* 64 - sample Overflow count */ - unsigned long long timestamp; /* 16 - time-stamp */ - unsigned long long timestamp1; /* */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ unsigned long long progusage1; /* 48 - reserved for programming use */ @@ -232,6 +231,17 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, #define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL +/* Return TOD timestamp contained in an trailer entry */ +static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* TOD in STCKE format */ + if (te->t) + return *((unsigned long long *) &te->timestamp[1]); + + /* TOD in STCK format */ + return *((unsigned long long *) &te->timestamp[0]); +} + /* Return pointer to trailer entry of an sample data block */ static inline unsigned long *trailer_entry_ptr(unsigned long v) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 9202f28..3ab7e67 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -981,7 +981,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " "overflow=%llu timestamp=0x%llx\n", sdbt, te->overflow, - (te->f) ? te->timestamp : 0ULL); + (te->f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU -- cgit v1.1 From 443e802bab16916f9a51a34f2213f4dee6e8762c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:54:57 +0100 Subject: s390/cpum_sf: Detect KVM guest samples The host-program-parameter (hpp) value of basic sample-data-entries designates a SIE control block that is set by the LPP instruction in sie64a(). Non-zero values indicate guest samples, a value of zero indicates a host sample. For perf samples, host and guest samples are distinguished using particular PERF_MISC_* flags. The perf layer calls perf_misc_flags() to set the flags based on the pt_regs content. For each sample-data-entry, the cpum_sf PMU creates a pt_regs structure with the sample-data information. An additional flag structure is added to easily distinguish between host and guest samples. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 6 ++++++ arch/s390/kernel/perf_cpum_sf.c | 20 ++++++++++++++++++++ arch/s390/kernel/perf_event.c | 25 ++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 99d7f4e..7667bde 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -42,6 +42,12 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +/* Perf pt_regs extension for sample-data-entry indicators */ +struct perf_sf_sde_regs { + unsigned char in_guest:1; /* guest sample */ + unsigned long reserved:63; /* reserved */ +}; + /* Perf PMU definitions for the counter facility */ #define PERF_CPUM_CF_MAX_CTR 256 diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3ab7e67..d611fac 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -840,6 +840,7 @@ static int perf_push_sample(struct perf_event *event, { int overflow; struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; /* Skip samples that are invalid or for which the instruction address @@ -850,7 +851,16 @@ static int perf_push_sample(struct perf_event *event, perf_sample_data_init(&data, 0, event->hw.last_period); + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + regs.psw.addr = sample->ia; if (sample->T) regs.psw.mask |= PSW_MASK_DAT; @@ -873,6 +883,16 @@ static int perf_push_sample(struct perf_event *event, break; } + /* The host-program-parameter (hpp) contains the sie control + * block that is set by sie64a() in entry64.S. Check if hpp + * refers to a valid control block and set sde_regs flags + * accordingly. This would allow to use hpp values for other + * purposes too. + * For now, simply use a non-zero value as guest indicator. + */ + if (sample->hpp) + sde_regs->in_guest = 1; + overflow = 0; if (perf_event_overflow(event, &data, ®s)) { overflow = 1; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 3bd2bf0..60a6826 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,7 +1,7 @@ /* * Performance event support for s390x * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner * * This program is free software; you can redistribute it and/or modify @@ -89,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs) : PERF_RECORD_MISC_GUEST_KERNEL; } +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + unsigned long perf_misc_flags(struct pt_regs *regs) { + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + if (is_in_guest(regs)) return perf_misc_guest_flags(regs); -- cgit v1.1 From dd127b3b977b81eab58d1d7ee037195cf0bbeba7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 18:05:20 +0100 Subject: s390/cpum_sf: Filter perf events based event->attr.exclude_* settings Introduce the perf_exclude_event() function to filter perf samples according to event->attr.exclude_* settings. During event initialization, reset event exclude settings that are not supported. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index d611fac..28fa2f2 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -723,10 +723,19 @@ static int cpumsf_pmu_event_init(struct perf_event *event) return -ENOENT; } + /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= nr_cpumask_bits || (event->cpu >= 0 && !cpu_online(event->cpu))) return -ENODEV; + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + err = __hw_perf_event_init(event); if (unlikely(err)) if (event->destroy) @@ -824,6 +833,29 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } +/* perf_exclude_event() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + /* perf_push_sample() - Push samples to perf * @event: The perf event * @sample: Hardware sample data @@ -894,12 +926,14 @@ static int perf_push_sample(struct perf_event *event, sde_regs->in_guest = 1; overflow = 0; + if (perf_exclude_event(event, ®s, sde_regs)) + goto out; if (perf_event_overflow(event, &data, ®s)) { overflow = 1; event->pmu->stop(event, 0); } perf_event_update_userpage(event); - +out: return overflow; } -- cgit v1.1 From 7e75fc3ff4cffd90684816d69838f8730ac3e072 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 11:42:44 +0100 Subject: s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function Also support the diagnostic-sampling function in addition to the basic-sampling function. Diagnostic-sampling data entries contain hardware model specific sampling data and additional programs are required to analyze the data. To deliver diagnostic-sampling, as well, as basis-sampling data entries to user space, introduce support for sampling "raw data". If this particular perf sampling type (PERF_SAMPLE_RAW) is used, sampling data entries are copied to user space. External programs can then analyze these data. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 40 ++-- arch/s390/include/asm/perf_event.h | 28 ++- arch/s390/kernel/perf_cpum_sf.c | 380 ++++++++++++++++++++++++++++++------- arch/s390/kernel/perf_event.c | 21 +- arch/s390/oprofile/hwsampler.c | 4 +- 5 files changed, 377 insertions(+), 96 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 09dc5fa..cb700d5 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -59,13 +59,15 @@ struct cpumf_ctr_info { /* QUERY SAMPLING INFORMATION block */ struct hws_qsi_info_block { /* Bit(s) */ unsigned int b0_13:14; /* 0-13: zeros */ - unsigned int as:1; /* 14: sampling authorisation control*/ - unsigned int b15_21:7; /* 15-21: zeros */ - unsigned int es:1; /* 22: sampling enable control */ - unsigned int b23_29:7; /* 23-29: zeros */ - unsigned int cs:1; /* 30: sampling activation control */ - unsigned int:1; /* 31: reserved */ - unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int as:1; /* 14: basic-sampling authorization */ + unsigned int ad:1; /* 15: diag-sampling authorization */ + unsigned int b16_21:6; /* 16-21: zeros */ + unsigned int es:1; /* 22: basic-sampling enable control */ + unsigned int ed:1; /* 23: diag-sampling enable control */ + unsigned int b24_29:6; /* 24-29: zeros */ + unsigned int cs:1; /* 30: basic-sampling activation control */ + unsigned int cd:1; /* 31: diag-sampling activation control */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ @@ -82,10 +84,11 @@ struct hws_lsctl_request_block { unsigned int s:1; /* 0: maximum buffer indicator */ unsigned int h:1; /* 1: part. level reserved for VM use*/ unsigned long long b2_53:52;/* 2-53: zeros */ - unsigned int es:1; /* 54: sampling enable control */ - unsigned int b55_61:7; /* 55-61: - zeros */ - unsigned int cs:1; /* 62: sampling activation control */ - unsigned int b63:1; /* 63: zero */ + unsigned int es:1; /* 54: basic-sampling enable control */ + unsigned int ed:1; /* 55: diag-sampling enable control */ + unsigned int b56_61:6; /* 56-61: - zeros */ + unsigned int cs:1; /* 62: basic-sampling activation control */ + unsigned int cd:1; /* 63: diag-sampling activation control */ unsigned long interval; /* 8-15: sampling interval */ unsigned long tear; /* 16-23: TEAR contents */ unsigned long dear; /* 24-31: DEAR contents */ @@ -96,8 +99,7 @@ struct hws_lsctl_request_block { unsigned long rsvrd4; /* reserved */ } __packed; - -struct hws_data_entry { +struct hws_basic_entry { unsigned int def:16; /* 0-15 Data Entry Format */ unsigned int R:4; /* 16-19 reserved */ unsigned int U:4; /* 20-23 Number of unique instruct. */ @@ -114,6 +116,18 @@ struct hws_data_entry { unsigned long long hpp; /* Host Program Parameter */ } __packed; +struct hws_diag_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int I:1; /* 31 entry valid or invalid */ + u8 data[]; /* Machine-dependent sample data */ +} __packed; + +struct hws_combined_entry { + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ +} __packed; + struct hws_trailer_entry { union { struct { diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 7667bde..bd4573f 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -52,15 +52,39 @@ struct perf_sf_sde_regs { #define PERF_CPUM_CF_MAX_CTR 256 /* Perf PMU definitions for the sampling facility */ -#define PERF_CPUM_SF_MAX_CTR 1 -#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ +#define PERF_CPUM_SF_MAX_CTR 2 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ +#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ +#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ + PERF_CPUM_SF_DIAG_MODE) #define REG_NONE 0 #define REG_OVERFLOW 1 #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) +#define RAWSAMPLE_REG(hwc) ((hwc)->config) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) +#define SAMPL_FLAGS(hwc) ((hwc)->config_base) +#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) + +/* Structure for sampling data entries to be passed as perf raw sample data + * to user space. Note that raw sample data must be aligned and, thus, might + * be padded with zeros. + */ +struct sf_raw_sample { +#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE +#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE + u64 format; + u32 size; /* Size of sf_raw_sample */ + u16 bsdes; /* Basic-sampling data entry size */ + u16 dsdes; /* Diagnostic-sampling data entry size */ + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ + u8 padding[]; /* Padding to next multiple of 8 */ +} __packed; /* Perf hardware reserve and release functions */ int perf_reserve_sampling(void); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 28fa2f2..b4ec058 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,8 @@ #define CPUM_SF_MIN_SDBT 1 /* Number of sample-data-blocks per sample-data-block-table (SDBT): - * The table contains SDB origin (8 bytes) and one SDBT origin that - * points to the next table. + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. */ #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) @@ -48,8 +49,11 @@ static inline int require_table_link(const void *sdbt) /* Minimum and maximum sampling buffer sizes: * - * This number represents the maximum size of the sampling buffer - * taking the number of sample-data-block-tables into account. + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. * * Sampling buffer size Buffer characteristics * --------------------------------------------------- @@ -63,6 +67,7 @@ static inline int require_table_link(const void *sdbt) */ static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; struct sf_buffer { unsigned long *sdbt; /* Sample-data-block-table origin */ @@ -290,8 +295,20 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) static void sfb_set_limits(unsigned long min, unsigned long max) { + struct hws_qsi_info_block si; + CPUM_SF_MIN_SDB = min; CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; } static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, @@ -312,8 +329,8 @@ static int sfb_has_pending_allocs(struct sf_buffer *sfb, static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) { - /* Limit the number SDBs to not exceed the maximum */ - num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); if (num) SFB_ALLOC_REG(hwc) += num; } @@ -324,32 +341,89 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) sfb_account_allocs(num, hwc); } -static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +static size_t event_sample_size(struct hw_perf_event *hwc) +{ + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + size_t sample_size; + + /* The sample size depends on the sampling function: The basic-sampling + * function must be always enabled, diagnostic-sampling function is + * optional. + */ + sample_size = sfr->bsdes; + if (SAMPL_DIAG_MODE(hwc)) + sample_size += sfr->dsdes; + + return sample_size; +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (cpuhw->sfb.sdbt) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { - unsigned long n_sdb, freq; - unsigned long factor; + unsigned long n_sdb, freq, factor; + size_t sfr_size, sample_size; + struct sf_raw_sample *sfr; + + /* Allocate raw sample buffer + * + * The raw sample buffer is used to temporarily store sampling data + * entries for perf raw sample processing. The buffer size mainly + * depends on the size of diagnostic-sampling data entries which is + * machine-specific. The exact size calculation includes: + * 1. The first 4 bytes of diagnostic-sampling data entries are + * already reflected in the sf_raw_sample structure. Subtract + * these bytes. + * 2. The perf raw sample data must be 8-byte aligned (u64) and + * perf's internal data size must be considered too. So add + * an additional u32 for correct alignment and subtract before + * allocating the buffer. + * 3. Store the raw sample buffer pointer in the perf event + * hardware structure. + */ + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + + sizeof(u32), sizeof(u64)); + sfr_size -= sizeof(u32); + sfr = kzalloc(sfr_size, GFP_KERNEL); + if (!sfr) + return -ENOMEM; + sfr->size = sfr_size; + sfr->bsdes = cpuhw->qsi.bsdes; + sfr->dsdes = cpuhw->qsi.dsdes; + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; /* Calculate sampling buffers using 4K pages * - * 1. Use frequency as input. The samping buffer is designed for - * a complete second. This can be adjusted through the "factor" - * variable. + * 1. Determine the sample data size which depends on the used + * sampling functions, for example, basic-sampling or + * basic-sampling with diagnostic-sampling. + * + * 2. Use the sampling frequency as input. The sampling buffer is + * designed for almost one second. This can be adjusted through + * the "factor" variable. * In any case, alloc_sampling_buffer() sets the Alert Request - * Control indicator to trigger measurement-alert to harvest + * Control indicator to trigger a measurement-alert to harvest * sample-data-blocks (sdb). * - * 2. Compute the number of sample-data-blocks and ensure a minimum + * 3. Compute the number of sample-data-blocks and ensure a minimum * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not - * exceed CPUM_SF_MAX_SDB. See also the remarks for these - * symbolic constants. + * exceed a "calculated" maximum. The symbolic maximum is + * designed for basic-sampling only and needs to be increased if + * diagnostic-sampling is active. + * See also the remarks for these symbolic constants. * - * 3. Compute number of pages used for the sample-data-block-table - * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table - * to manage up to 511 sample-data-blocks). + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). */ + sample_size = event_sample_size(hwc); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); factor = 1; - n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); if (n_sdb < CPUM_SF_MIN_SDB) n_sdb = CPUM_SF_MIN_SDB; @@ -366,8 +440,10 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", - SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" + " sample_size=%lu cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), + sample_size, cpuhw); return alloc_sampling_buffer(&cpuhw->sfb, sfb_pending_allocs(&cpuhw->sfb, hwc)); @@ -509,10 +585,8 @@ static void setup_pmc_cpu(void *flags) if (err) { pr_err("Switching off the sampling facility failed " "with rc=%i\n", err); - } else { - if (cpusf->sfb.sdbt) - free_sampling_buffer(&cpusf->sfb); - } + } else + deallocate_buffers(cpusf); debug_sprintf_event(sfdbg, 5, "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); break; @@ -550,6 +624,10 @@ static int reserve_pmc_hardware(void) static void hw_perf_event_destroy(struct perf_event *event) { + /* Free raw sample buffer */ + if (RAWSAMPLE_REG(&event->hw)) + kfree((void *) RAWSAMPLE_REG(&event->hw)); + /* Release PMC if this is the last perf event */ if (!atomic_add_unless(&num_events, -1, 1)) { mutex_lock(&pmc_reserve_mutex); @@ -569,8 +647,15 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) static void hw_reset_registers(struct hw_perf_event *hwc, unsigned long *sdbt_origin) { + struct sf_raw_sample *sfr; + /* (Re)set to first sample-data-block-table */ TEAR_REG(hwc) = (unsigned long) sdbt_origin; + + /* (Re)set raw sampling buffer register */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + memset(&sfr->basic, 0, sizeof(sfr->basic)); + memset(&sfr->diag, 0, sfr->dsdes); } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, @@ -634,6 +719,20 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + /* The sampling information (si) contains information about the * min/max sampling intervals and the CPU speed. So calculate the * correct sampling interval and avoid the whole period adjust @@ -679,14 +778,14 @@ static int __hw_perf_event_init(struct perf_event *event) */ if (cpuhw) /* Event is pinned to a particular CPU */ - err = allocate_sdbt(cpuhw, hwc); + err = allocate_buffers(cpuhw, hwc); else { /* Event is not pinned, allocate sampling buffer on * each online CPU */ for_each_online_cpu(cpu) { cpuhw = &per_cpu(cpu_hw_sf, cpu); - err = allocate_sdbt(cpuhw, hwc); + err = allocate_buffers(cpuhw, hwc); if (err) break; } @@ -705,7 +804,8 @@ static int cpumsf_pmu_event_init(struct perf_event *event) switch (event->attr.type) { case PERF_TYPE_RAW: - if (event->attr.config != PERF_EVENT_CPUM_SF) + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) return -ENOENT; break; case PERF_TYPE_HARDWARE: @@ -786,8 +886,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu) return; } - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", - cpuhw->lsctl.es, cpuhw->lsctl.cs, + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, + cpuhw->lsctl.ed, cpuhw->lsctl.cd, (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); } @@ -807,6 +908,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) /* Switch off sampling activation control */ inactive = cpuhw->lsctl; inactive.cs = 0; + inactive.cd = 0; err = lsctl(&inactive); if (err) { @@ -867,21 +969,19 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, * * Return non-zero if an event overflow occurred. */ -static int perf_push_sample(struct perf_event *event, - struct hws_data_entry *sample) +static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) { int overflow; struct pt_regs regs; struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; + struct perf_raw_record raw; - /* Skip samples that are invalid or for which the instruction address - * is not predictable. For the latter, the wait-state bit is set. - */ - if (sample->I || sample->W) - return 0; - + /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); + raw.size = sfr->size; + raw.data = sfr; + data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt * using the Program Request Alert code. The regs.int_parm_long @@ -893,14 +993,14 @@ static int perf_push_sample(struct perf_event *event, regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - regs.psw.addr = sample->ia; - if (sample->T) + regs.psw.addr = sfr->basic.ia; + if (sfr->basic.T) regs.psw.mask |= PSW_MASK_DAT; - if (sample->W) + if (sfr->basic.W) regs.psw.mask |= PSW_MASK_WAIT; - if (sample->P) + if (sfr->basic.P) regs.psw.mask |= PSW_MASK_PSTATE; - switch (sample->AS) { + switch (sfr->basic.AS) { case 0x0: regs.psw.mask |= PSW_ASC_PRIMARY; break; @@ -922,7 +1022,7 @@ static int perf_push_sample(struct perf_event *event, * purposes too. * For now, simply use a non-zero value as guest indicator. */ - if (sample->hpp) + if (sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; @@ -942,51 +1042,155 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } +static int sample_format_is_valid(struct hws_combined_entry *sample, + unsigned int flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + /* Only basic-sampling data entries with data-entry-format + * version of 0x0001 can be processed. + */ + if (sample->basic.def != 0x0001) + return 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + /* The data-entry-format number of diagnostic-sampling data + * entries can vary. Because diagnostic data is just passed + * through, do only a sanity check on the DEF. + */ + if (sample->diag.def < 0x8001) + return 0; + return 1; +} + +static int sample_is_consistent(struct hws_combined_entry *sample, + unsigned long flags) +{ + /* This check applies only to basic-sampling data entries of potentially + * combined-sampling data entries. Invalid entries cannot be processed + * by the PMU and, thus, do not deliver an associated + * diagnostic-sampling data entry. + */ + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) + return 0; + /* + * Samples are skipped, if they are invalid or for which the + * instruction address is not predictable, i.e., the wait-state bit is + * set. + */ + if (sample->basic.I || sample->basic.W) + return 0; + return 1; +} + +static void reset_sample_slot(struct hws_combined_entry *sample, + unsigned long flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + sample->basic.def = 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + sample->diag.def = 0; +} + +static void sfr_store_sample(struct sf_raw_sample *sfr, + struct hws_combined_entry *sample) +{ + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) + sfr->basic = sample->basic; + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); +} + +static void debug_sample_entry(struct hws_combined_entry *sample, + struct hws_trailer_entry *te, + unsigned long flags) +{ + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " + "sampling data entry: te->f=%i basic.def=%04x (%p)" + " diag.def=%04x (%p)\n", te->f, + sample->basic.def, &sample->basic, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? sample->diag.def : 0xFFFF, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? &sample->diag : NULL); +} + /* hw_collect_samples() - Walk through a sample-data-block and collect samples * @event: The perf event * @sdbt: Sample-data-block table * @overflow: Event overflow counter * - * Walks through a sample-data-block and collects hardware sample-data that is - * pushed to the perf event subsystem. The overflow reports the number of - * samples that has been discarded due to an event overflow. + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. */ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, unsigned long long *overflow) { - struct hws_data_entry *sample; - unsigned long *trailer; + unsigned long flags = SAMPL_FLAGS(&event->hw); + struct hws_combined_entry *sample; + struct hws_trailer_entry *te; + struct sf_raw_sample *sfr; + size_t sample_size; + + /* Prepare and initialize raw sample data */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; - trailer = trailer_entry_ptr(*sdbt); - sample = (struct hws_data_entry *) *sdbt; - while ((unsigned long *) sample < trailer) { + sample_size = event_sample_size(&event->hw); + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sample = (struct hws_combined_entry *) *sdbt; + while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->def) + if (!sample->basic.def) break; /* Update perf event period */ perf_event_count_update(event, SAMPL_RATE(&event->hw)); - /* Check for basic sampling mode */ - if (sample->def == 0x0001) { + /* Check sampling data entry */ + if (sample_format_is_valid(sample, flags)) { /* If an event overflow occurred, the PMU is stopped to * throttle event delivery. Remaining sample data is * discarded. */ - if (!*overflow) - *overflow = perf_push_sample(event, sample); - else + if (!*overflow) { + if (sample_is_consistent(sample, flags)) { + /* Deliver sample data to perf */ + sfr_store_sample(sfr, sample); + *overflow = perf_push_sample(event, sfr); + } + } else /* Count discarded samples */ *overflow += 1; - } else - /* Sample slot is not yet written or other record */ - debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " - "Unknown sample data entry format:" - " %i\n", sample->def); + } else { + debug_sample_entry(sample, te, flags); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->f) + break; + } /* Reset sample slot and advance to next sample */ - sample->def = 0; - sample++; + reset_sample_slot(sample, flags); + sample += sample_size; } } @@ -1104,6 +1308,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); event->hw.state = 0; cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; perf_pmu_enable(event->pmu); } @@ -1119,6 +1325,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; event->hw.state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { @@ -1158,11 +1365,13 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) /* Ensure sampling functions are in the disabled state. If disabled, * switch on sampling enable control. */ - if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { err = -EAGAIN; goto out; } cpuhw->lsctl.es = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.ed = 1; /* Set in_use flag and store event */ event->hw.idx = 0; /* only one sampling event per CPU supported */ @@ -1185,6 +1394,7 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) cpumsf_pmu_stop(event, PERF_EF_UPDATE); cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; cpuhw->flags &= ~PMU_F_IN_USE; cpuhw->event = NULL; @@ -1198,9 +1408,11 @@ static int cpumsf_pmu_event_idx(struct perf_event *event) } CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), NULL, }; @@ -1351,8 +1563,9 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp) return rc; sfb_set_limits(min, max); - pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", - CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + pr_info("The sampling buffer limits have changed to: " + "min=%lu max=%lu (diag=x%lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); return 0; } @@ -1362,13 +1575,38 @@ static struct kernel_param_ops param_ops_sfb_size = { .get = param_get_sfb_size, }; +#define RS_INIT_FAILURE_QSI 0x0001 +#define RS_INIT_FAILURE_BSDES 0x0002 +#define RS_INIT_FAILURE_ALRT 0x0003 +#define RS_INIT_FAILURE_PERF 0x0004 +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason=%04x\n", reason); +} + static int __init init_cpum_sampling_pmu(void) { + struct hws_qsi_info_block si; int err; if (!cpum_sf_avail()) return -ENODEV; + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_cpumsf_err(RS_INIT_FAILURE_QSI); + return -ENODEV; + } + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) pr_err("Registering for s390dbf failed\n"); @@ -1376,13 +1614,13 @@ static int __init init_cpum_sampling_pmu(void) err = register_external_interrupt(0x1407, cpumf_measurement_alert); if (err) { - pr_err("Failed to register for CPU-measurement alerts\n"); + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); goto out; } err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); if (err) { - pr_err("Failed to register cpum_sf pmu\n"); + pr_cpumsf_err(RS_INIT_FAILURE_PERF); unregister_external_interrupt(0x1407, cpumf_measurement_alert); goto out; } diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 60a6826..91aa215 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -139,16 +139,21 @@ static void print_debug_sf(void) int cpu = smp_processor_id(); memset(&si, 0, sizeof(si)); - if (qsi(&si)) { - pr_err("CPU[%i]: CPM_SF: qsi failed\n"); + if (qsi(&si)) return; - } - pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i" - " min=%i max=%i cpu_speed=%i tear=%p dear=%p\n", - cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes, - si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed, - si.tear, si.dear); + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%p dear=%p\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%p dear=%p\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); } void perf_event_print_debug(void) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index eb09587..a32c967 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -799,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu) static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, unsigned long *dear) { - struct hws_data_entry *sample_data_ptr; + struct hws_basic_entry *sample_data_ptr; unsigned long *trailer; trailer = trailer_entry_ptr(*sdbt); @@ -809,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, trailer = dear; } - sample_data_ptr = (struct hws_data_entry *)(*sdbt); + sample_data_ptr = (struct hws_basic_entry *)(*sdbt); while ((unsigned long *)sample_data_ptr < trailer) { struct pt_regs *regs = NULL; -- cgit v1.1 From d7528862cf035994972c2c6f42c927db78f2f3a2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:45:01 +0100 Subject: s390/cpum_sf: Add flag to process full SDBs only Add the PERF_CPUM_SF_FULL_BLOCKS flag to process only sample-data-blocks that have the block-full-indicator bit set. Sample-data-blocks that are partially filled are discarded. Use this flag if the sampling buffer is likely to be shared among perf events that use different sampling modes. In such environments, flushing sample-data-blocks that are not completely filled, might cause invalid-data-formats. Setting PERF_CPUM_SF_FULL_BLOCKS prevents potentially invalid sampling data to be processed but, in contrast, also discards valid samples in partially filled sample-data-blocks. Note that sample-data-blocks might not become full for small sampling frequencies or for workload that is scheduled for tiny intervals. To sample with the PERF_CPUM_SF_FULL_BLOCKS flag, set the perf->attr.config1 to 0x0004. For example: perf record -e cpum_sf/config=0xB000,config1=0x0004/ Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 2 ++ arch/s390/kernel/perf_cpum_sf.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index bd4573f..159a8ec 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -59,6 +59,7 @@ struct perf_sf_sde_regs { #define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ #define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ PERF_CPUM_SF_DIAG_MODE) +#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */ #define REG_NONE 0 #define REG_OVERFLOW 1 @@ -69,6 +70,7 @@ struct perf_sf_sde_regs { #define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) +#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) /* Structure for sampling data entries to be passed as perf raw sample data * to user space. Note that raw sample data must be aligned and, thus, might diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index b4ec058..3c3bc8d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -733,6 +733,10 @@ static int __hw_perf_event_init(struct perf_event *event) SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; } + /* Check and set other sampling flags */ + if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; + /* The sampling information (si) contains information about the * min/max sampling intervals and the CPU speed. So calculate the * correct sampling interval and avoid the whole period adjust @@ -1203,8 +1207,10 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * register of the specified perf event. * * Only full sample-data-blocks are processed. Specify the flash_all flag - * to also walk through partially filled sample-data-blocks. - * + * to also walk through partially filled sample-data-blocks. It is ignored + * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag + * enforces the processing of full sample-data-blocks only (trailer entries + * with the block-full-indicator bit set). */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { @@ -1214,6 +1220,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; + if (flush_all && SDB_FULL_BLOCKS(hwc)) + flush_all = 0; + sdbt = (unsigned long *) TEAR_REG(hwc); done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { -- cgit v1.1 From 61aa4884b70cdf3b2d373e18ebbbada43789eade Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 1 Nov 2013 10:08:20 +0100 Subject: s390: use IS_ENABLED to check if a CONFIG is set to y or m This is shorter and should be used instead of the longer form which checks for both possible config options. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 6 +++--- arch/s390/kernel/perf_event.c | 2 +- arch/s390/kernel/s390_ksyms.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e5b43c9..9532fe2 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -74,7 +74,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ .endm .macro LPP newpp -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP jz .+8 .insn s,0xb2800000,\newpp @@ -82,7 +82,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ .endm .macro HANDLE_SIE_INTERCEPT scratch,reason -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) tmhh %r8,0x0001 # interrupting from user ? jnz .+62 lgr \scratch,%r9 @@ -946,7 +946,7 @@ cleanup_idle_insn: .quad __critical_end - __critical_start -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) /* * sie64a calling convention: * %r2 pointer to sie control block diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 91aa215..a76d602 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -60,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs) { if (user_mode(regs)) return false; -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) return instruction_pointer(regs) == (unsigned long) &sie_exit; #else return false; diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 3bac589..9f60467 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -5,7 +5,7 @@ #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie_exit); #endif -- cgit v1.1 From e3fec2f74f7f90d2149a24243a4d040caabe6f30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Dec 2013 21:26:19 -0500 Subject: lib: Add missing arch generic-y entries for asm-generic/hash.h Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/s390/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 7a5288f..8386a4a 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h -- cgit v1.1 From d80512f87474f2dfd67ef931737659acce20fe69 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 16 Dec 2013 14:31:26 +0100 Subject: s390/smp: improve setup of possible cpu mask Since under z/VM we cannot have more than 64 cpus, make sure the cpu_possible_mask does not contain more bits. This avoids wasting memory for dynamic per-cpu allocations if CONFIG_NR_CPUS is larger than 64. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 - arch/s390/include/asm/smp.h | 2 ++ arch/s390/kernel/setup.c | 1 + arch/s390/kernel/smp.c | 25 ++++++++++++++++--------- 4 files changed, 19 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1e1a03d..e9f3125 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -135,7 +135,6 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING - select INIT_ALL_POSSIBLE select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA select OLD_SIGACTION diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index ac9bed8..1607793 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -31,6 +31,7 @@ extern void smp_yield(void); extern void smp_stop_cpu(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); +extern void smp_fill_possible_mask(void); #else /* CONFIG_SMP */ @@ -50,6 +51,7 @@ static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_yield(void) { } static inline void smp_stop_cpu(void) { } +static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4444875..0f3d44e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1023,6 +1023,7 @@ void __init setup_arch(char **cmdline_p) setup_vmcoreinfo(); setup_lowcore(); + smp_fill_possible_mask(); cpu_init(); s390_init_cpu_topology(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index dc4a534..9587047 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -721,18 +721,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) return 0; } -static int __init setup_possible_cpus(char *s) -{ - int max, cpu; +static unsigned int setup_possible_cpus __initdata; - if (kstrtoint(s, 0, &max) < 0) - return 0; - init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) - set_cpu_possible(cpu, true); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); +early_param("possible_cpus", _setup_possible_cpus); #ifdef CONFIG_HOTPLUG_CPU @@ -775,6 +771,17 @@ void __noreturn cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ +void __init smp_fill_possible_mask(void) +{ + unsigned int possible, cpu; + + possible = setup_possible_cpus; + if (!possible) + possible = MACHINE_IS_VM ? 64 : nr_cpu_ids; + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { /* request the 0x1201 emergency signal external interrupt */ -- cgit v1.1 From 9efe4f2992025c3a4027c60bf36ae9d710ca3781 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 17 Dec 2013 13:41:31 +0100 Subject: s390/mm: optimize randomize_et_dyn for !PF_RANDOMIZE Skip the call to brk_rnd() if the PF_RANDOMIZE flag is not set for the process. This avoids the costly get_random_int() call. Modify arch_randomize_brk() as well to make it look like randomize_et_dyn(). Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7ed0d4e..dd14532 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long ret; - if (ret < mm->brk) - return mm->brk; - return ret; + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; } unsigned long randomize_et_dyn(unsigned long base) { - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + unsigned long ret; if (!(current->flags & PF_RANDOMIZE)) return base; - if (ret < base) - return base; - return ret; + ret = PAGE_ALIGN(base + brk_rnd()); + return (ret > base) ? ret : base; } -- cgit v1.1 From 91f3e3eaba4413e76ce8e12e3ef10525a889142f Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Wed, 20 Nov 2013 10:47:13 +0100 Subject: s390/zcrypt: add support for EP11 coprocessor cards This feature extends the generic cryptographic device driver (zcrypt) with a new capability to service EP11 requests for the Crypto Express4S card in EP11 (Enterprise PKCS#11 mode) coprocessor mode. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/zcrypt.h | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index e83fc11..f2b18ea 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -154,6 +154,67 @@ struct ica_xcRB { unsigned short priority_window; unsigned int status; } __attribute__((packed)); + +/** + * struct ep11_cprb - EP11 connectivity programming request block + * @cprb_len: CPRB header length [0x0020] + * @cprb_ver_id: CPRB version id. [0x04] + * @pad_000: Alignment pad bytes + * @flags: Admin cmd [0x80] or functional cmd [0x00] + * @func_id: Function id / subtype [0x5434] + * @source_id: Source id [originator id] + * @target_id: Target id [usage/ctrl domain id] + * @ret_code: Return code + * @reserved1: Reserved + * @reserved2: Reserved + * @payload_len: Payload length + */ +struct ep11_cprb { + uint16_t cprb_len; + unsigned char cprb_ver_id; + unsigned char pad_000[2]; + unsigned char flags; + unsigned char func_id[2]; + uint32_t source_id; + uint32_t target_id; + uint32_t ret_code; + uint32_t reserved1; + uint32_t reserved2; + uint32_t payload_len; +} __attribute__((packed)); + +/** + * struct ep11_target_dev - EP11 target device list + * @ap_id: AP device id + * @dom_id: Usage domain id + */ +struct ep11_target_dev { + uint16_t ap_id; + uint16_t dom_id; +}; + +/** + * struct ep11_urb - EP11 user request block + * @targets_num: Number of target adapters + * @targets: Addr to target adapter list + * @weight: Level of request priority + * @req_no: Request id/number + * @req_len: Request length + * @req: Addr to request block + * @resp_len: Response length + * @resp: Addr to response block + */ +struct ep11_urb { + uint16_t targets_num; + uint64_t targets; + uint64_t weight; + uint64_t req_no; + uint64_t req_len; + uint64_t req; + uint64_t resp_len; + uint64_t resp; +} __attribute__((packed)); + #define AUTOSELECT ((unsigned int)0xFFFFFFFF) #define ZCRYPT_IOCTL_MAGIC 'z' @@ -183,6 +244,9 @@ struct ica_xcRB { * ZSECSENDCPRB * Send an arbitrary CPRB to a crypto card. * + * ZSENDEP11CPRB + * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card. + * * Z90STAT_STATUS_MASK * Return an 64 element array of unsigned chars for the status of * all devices. @@ -256,6 +320,7 @@ struct ica_xcRB { #define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) #define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) #define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) +#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0) /* New status calls */ #define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) -- cgit v1.1 From fed286110f4bab01f93f06c32951fbc120fb71b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Dec 2013 11:28:59 +0100 Subject: crypto: arch - use crypto_memneq instead of memcmp Replace remaining occurences (just as we did in crypto/) under arch/*/crypto/ that make use of memcmp() for comparing keys or authentication tags for usage with crypto_memneq(). It can simply be used as a drop-in replacement for the normal memcmp(). Signed-off-by: Daniel Borkmann Cc: James Yonan Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index bcca01c..200f2a1 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -237,9 +237,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE)) && + if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && + crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], + DES_KEY_SIZE)) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; -- cgit v1.1 From a384c8924a8be3f5cf7dac06c04e9dd4a78b41df Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 16 Dec 2013 09:34:54 +0100 Subject: s390/PCI: Fix single MSI only check Multiple MSIs have never been supported on s390 architecture, but the platform code fails to report single MSI only. Signed-off-by: Alexander Gordeev Signed-off-by: Bjorn Helgaas Acked-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index bf7c73d..6f3788e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -409,6 +409,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) return -EINVAL; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); -- cgit v1.1 From 57b7cb024422bbe85366ef28f5e192997bd16943 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 16 Dec 2013 09:34:55 +0100 Subject: s390/PCI: Remove superfluous check of MSI type arch_setup_msi_irqs() hook can only be called from the generic MSI code which ensures correct MSI type parameter. Signed-off-by: Alexander Gordeev Signed-off-by: Bjorn Helgaas Acked-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6f3788e..4859c40 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -407,8 +407,6 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) struct msi_msg msg; int rc; - if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) - return -EINVAL; if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); -- cgit v1.1 From fcf2f402937a6696f6fa2a1aa882c5075e5fac34 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 18 Dec 2013 16:46:02 +0100 Subject: s390/pci: obtain function handle in hotplug notifier When using the CLP interface to enable or disable a pci device a valid function handle needs to be delivered. So far our assumption was that we always have an up-to-date version of the function handle (since it doesn't change when the device is in use). This assumption is incorrect if the pci device is enabled or disabled outside of our control. When we are notified about such a change we already receive the new function handle. Just use it. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 800f064..0696072 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -75,6 +75,7 @@ void zpci_event_availability(void *data) if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; + zdev->fh = ccdf->fh; ret = zpci_enable_device(zdev); if (ret) break; @@ -101,6 +102,7 @@ void zpci_event_availability(void *data) if (pdev) pci_stop_and_remove_bus_device(pdev); + zdev->fh = ccdf->fh; zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; break; -- cgit v1.1 From 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 14:57:36 +0100 Subject: arch: Introduce smp_load_acquire(), smp_store_release() A number of situations currently require the heavyweight smp_mb(), even though there is no need to order prior stores against later loads. Many architectures have much cheaper ways to handle these situations, but the Linux kernel currently has no portable way to make use of them. This commit therefore supplies smp_load_acquire() and smp_store_release() to remedy this situation. The new smp_load_acquire() primitive orders the specified load against any subsequent reads or writes, while the new smp_store_release() primitive orders the specifed store against any prior reads or writes. These primitives allow array-based circular FIFOs to be implemented without an smp_mb(), and also allow a theoretical hole in rcu_assign_pointer() to be closed at no additional expense on most architectures. In addition, the RCU experience transitioning from explicit smp_read_barrier_depends() and smp_wmb() to rcu_dereference() and rcu_assign_pointer(), respectively resulted in substantial improvements in readability. It therefore seems likely that replacing other explicit barriers with smp_load_acquire() and smp_store_release() will provide similar benefits. It appears that roughly half of the explicit barriers in core kernel code might be so replaced. [Changelog by PaulMck] Reviewed-by: "Paul E. McKenney" Signed-off-by: Peter Zijlstra Acked-by: Will Deacon Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: Russell King Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Victor Kaplansky Cc: Tony Luck Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/barrier.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 16760ee..578680f 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -32,4 +32,19 @@ #define set_mb(var, value) do { var = value; mb(); } while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* __ASM_BARRIER_H */ -- cgit v1.1 From 075dfd82102d2048e43e1cbf48d558d915c50072 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2014 13:35:16 +0100 Subject: s390/compat: fix PSW32_USER_BITS definition PSW32_USER_BITS should define the primary address space for user space instead of the home address space. Symptom of this bug is that gdb doesn't work in compat mode. The bug was introduced with e258d719ff28 "s390/uaccess: always run the kernel in home space" and f26946d7ecad "s390/compat: make psw32_user_bits a constant value again". Cc: stable@vger.kernel.org # v3.13+ Reported-by: Andreas Arnez Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 4bf9da0..5d7e8cf 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -38,7 +38,8 @@ #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \ PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \ - PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME) + PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \ + PSW32_ASC_PRIMARY) #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" -- cgit v1.1 From 28aa39b853fc889e672b73f69ce591d15e6306b0 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 13 Jan 2014 16:02:28 +0100 Subject: s390: delete new instances of __cpuinit usage The patch "s390/perf: add support for the CPU-Measurement Sampling Facility" added a new instance of the __cpuinit macro usage. We removed this a couple versions ago; we now want to remove the compat no-op stubs. Introducing new users is not what we want to see at this point in time, as it will break once the stubs are gone. Cc: Hendrik Brueckner Signed-off-by: Paul Gortmaker Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3c3bc8d..6c0d298 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1512,8 +1512,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code, } } -static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) { unsigned int cpu = (long) hcpu; int flags; -- cgit v1.1 From 1c59a861d6982edf3f9905ad2098575336ae904d Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Wed, 24 Apr 2013 12:00:23 +0200 Subject: s390/qdio: bridgeport support - CHSC part Introduce function for the "Perform network-subchannel operation" CHSC command with operation code "bridgeport information", and bit definitions for "characteristics" pertaning to this command. Signed-off-by: Eugene Crosser Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/css_chars.h | 2 ++ arch/s390/include/asm/qdio.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 7e1c917..09d1dd4 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -29,6 +29,8 @@ struct css_general_char { u32 fcx : 1; /* bit 88 */ u32 : 19; u32 alt_ssi : 1; /* bit 108 */ + u32:1; + u32 narf:1; /* bit 110 */ } __packed; extern struct css_general_char css_general_characteristics; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 57d0d7e..0a1abf1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -378,6 +378,34 @@ struct qdio_initialize { struct qdio_outbuf_state *output_sbal_state_array; }; +/** + * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() + * @l3_ipv6_addr: entry contains IPv6 address + * @l3_ipv4_addr: entry contains IPv4 address + * @l2_addr_lnid: entry contains MAC address and VLAN ID + */ +enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; + +/** + * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() + * @nit: Network interface token + * @addr: Address of one of the three types + * + * The struct is passed to the callback function by qdio_brinfo_desc() + */ +struct qdio_brinfo_entry_l3_ipv6 { + u64 nit; + struct { unsigned char _s6_addr[16]; } addr; +} __packed; +struct qdio_brinfo_entry_l3_ipv4 { + u64 nit; + struct { uint32_t _s_addr; } addr; +} __packed; +struct qdio_brinfo_entry_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); +extern int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv); #endif /* __QDIO_H__ */ -- cgit v1.1 From 59b55a4df24e2f105c87721bec3e6c80c3d7b20b Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Tue, 14 Jan 2014 15:54:12 +0100 Subject: s390/qdio: bridgeport support - CHSC part Introduce function for the "Perform network-subchannel operation" CHSC command with operation code "bridgeport information", and bit definitions for "characteristics" pertaning to this command. Signed-off-by: Eugene Crosser Signed-off-by: Frank Blaschka Reviewed-by: Sebastian Ott Signed-off-by: David S. Miller --- arch/s390/include/asm/css_chars.h | 2 ++ arch/s390/include/asm/qdio.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 7e1c917..09d1dd4 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -29,6 +29,8 @@ struct css_general_char { u32 fcx : 1; /* bit 88 */ u32 : 19; u32 alt_ssi : 1; /* bit 108 */ + u32:1; + u32 narf:1; /* bit 110 */ } __packed; extern struct css_general_char css_general_characteristics; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 57d0d7e..0a1abf1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -378,6 +378,34 @@ struct qdio_initialize { struct qdio_outbuf_state *output_sbal_state_array; }; +/** + * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() + * @l3_ipv6_addr: entry contains IPv6 address + * @l3_ipv4_addr: entry contains IPv4 address + * @l2_addr_lnid: entry contains MAC address and VLAN ID + */ +enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; + +/** + * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() + * @nit: Network interface token + * @addr: Address of one of the three types + * + * The struct is passed to the callback function by qdio_brinfo_desc() + */ +struct qdio_brinfo_entry_l3_ipv6 { + u64 nit; + struct { unsigned char _s6_addr[16]; } addr; +} __packed; +struct qdio_brinfo_entry_l3_ipv4 { + u64 nit; + struct { uint32_t _s_addr; } addr; +} __packed; +struct qdio_brinfo_entry_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); +extern int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv); #endif /* __QDIO_H__ */ -- cgit v1.1 From aee636c4809fa54848ff07a899b326eb1f9987a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jan 2014 06:50:07 -0800 Subject: bpf: do not use reciprocal divide At first Jakub Zawadzki noticed that some divisions by reciprocal_divide were not correct. (off by one in some cases) http://www.wireshark.org/~darkjames/reciprocal-buggy.c He could also show this with BPF: http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c The reciprocal divide in linux kernel is not generic enough, lets remove its use in BPF, as it is not worth the pain with current cpus. Signed-off-by: Eric Dumazet Reported-by: Jakub Zawadzki Cc: Mircea Gherzan Cc: Daniel Borkmann Cc: Hannes Frederic Sowa Cc: Matt Evans Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David S. Miller Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 16871da..fc0fa77 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -371,11 +371,13 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dr %r4,%r12 */ EMIT2(0x1d4c); break; - case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */ - /* m %r4,(%r13) */ - EMIT4_DISP(0x5c40d000, EMIT_CONST(K)); - /* lr %r5,%r4 */ - EMIT2(0x1854); + case BPF_S_ALU_DIV_K: /* A /= K */ + if (K == 1) + break; + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* d %r4,(%r13) */ + EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); break; case BPF_S_ALU_MOD_X: /* A %= X */ jit->seen |= SEEN_XREG | SEEN_RET0; @@ -391,6 +393,11 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, EMIT2(0x1854); break; case BPF_S_ALU_MOD_K: /* A %= K */ + if (K == 1) { + /* lhi %r5,0 */ + EMIT4(0xa7580000); + break; + } /* lhi %r4,0 */ EMIT4(0xa7480000); /* d %r4,(%r13) */ -- cgit v1.1 From b4a960159e6f5254ac3c95dd183789f402431977 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:53:42 +0100 Subject: s390: Fix misspellings using 'codespell' tool Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 2 +- arch/s390/kernel/entry64.S | 2 +- arch/s390/kernel/setup.c | 2 +- arch/s390/kvm/priv.c | 2 +- arch/s390/lib/uaccess_pt.c | 4 ++-- arch/s390/mm/pgtable.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 0a1abf1..d786c63 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, #define QDIO_FLAG_CLEANUP_USING_HALT 0x02 /** - * struct qdio_initialize - qdio initalization data + * struct qdio_initialize - qdio initialization data * @cdev: associated ccw device * @q_format: queue format * @adapter_name: name for the adapter diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9532fe2..384e609 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -975,7 +975,7 @@ sie_done: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions beween sie64a and sie_done should not cause program +# instructions between sie64a and sie_done should not cause program # interrupts. So lets use a nop (47 00 00 00) as a landing pad. # See also HANDLE_SIE_INTERCEPT rewind_pad: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4444875..36e81d7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -373,7 +373,7 @@ static void __init setup_lowcore(void) /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant - * restart data to the absolute zero lowcore. This is necesary if + * restart data to the absolute zero lowcore. This is necessary if * PSW restart is done on an offline CPU that has lowcore zero. */ lc->restart_stack = (unsigned long) restart_stack; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2440602..d101dae 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } else { /* - * Set condition code 3 to stop the guest from issueing channel + * Set condition code 3 to stop the guest from issuing channel * I/O instructions. */ kvm_s390_set_psw_cc(vcpu, 3); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index dbdab3e..0632dc5 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to, /* * Returns kernel address for user virtual address. If the returned address is - * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address - * contains the (negative) exception code. + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the + * address contains the (negative) exception code. */ #ifdef CONFIG_64BIT diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e794c88..3584ed9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap, * @addr: address in the guest address space * @len: length of the memory area to unmap * - * Returns 0 if the unmap succeded, -EINVAL if not. + * Returns 0 if the unmap succeeded, -EINVAL if not. */ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) { @@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment); * @from: source address in the parent address space * @to: target address in the guest address space * - * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. */ int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len) -- cgit v1.1 From f85168e4d96b31b09ecf09a679820b031224e69e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Jan 2014 16:45:39 +0100 Subject: s390/cpum_sf: fix printk format warnings Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index a76d602..5d2dfa3 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -142,17 +142,17 @@ static void print_debug_sf(void) if (qsi(&si)) return; - pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n", + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed); if (si.as) pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" - " bsdes=%i tear=%p dear=%p\n", cpu, + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); if (si.ad) pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" - " dsdes=%i tear=%p dear=%p\n", cpu, + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); } -- cgit v1.1 From 7feb6bb8e6dbd129c11fc93bf206daa156bf1c0f Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 28 Jun 2013 13:30:24 +0200 Subject: KVM: s390: enable Transactional Execution This patch enables transactional execution for KVM guests on s390 systems zec12 or later. We rework the allocation of the page containing the sie_block to also back the Interception Transaction Diagnostic Block. If available the TE facilities will be enabled. Setting bit 73 and 50 in vfacilities bitmask reveals the HW facilities Transactional Memory and Constraint Transactional Memory respectively to the KVM guest. Furthermore, the patch restores the Program-Interruption TDB from the Interception TDB in case a program interception has occurred and the ITDB has a valid format. Signed-off-by: Michael Mueller Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 15 ++++++++++++++- arch/s390/kvm/intercept.c | 11 +++++++++++ arch/s390/kvm/kvm-s390.c | 17 +++++++++++------ arch/s390/kvm/kvm-s390.h | 6 ++++++ 4 files changed, 42 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d5bc375..eef3dd3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -106,9 +106,22 @@ struct kvm_s390_sie_block { __u64 gbea; /* 0x0180 */ __u8 reserved188[24]; /* 0x0188 */ __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[92]; /* 0x01a4 */ + __u8 reserved1a4[68]; /* 0x01a4 */ + __u64 itdba; /* 0x01e8 */ + __u8 reserved1f0[16]; /* 0x01f0 */ } __attribute__((packed)); +struct kvm_s390_itdb { + __u8 data[256]; +} __packed; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + __u8 reserved200[1024]; /* 0x0200 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +} __packed; + struct kvm_vcpu_stat { u32 exit_userspace; u32 exit_null; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5ddbbde..eeb1ac7 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -112,6 +112,17 @@ static int handle_instruction(struct kvm_vcpu *vcpu) static int handle_prog(struct kvm_vcpu *vcpu) { vcpu->stat.exit_program_interruption++; + + /* Restore ITDB to Program-Interruption TDB in guest memory */ + if (IS_TE_ENABLED(vcpu) && + !(current->thread.per_flags & PER_FLAG_NO_TE) && + IS_ITDB_VALID(vcpu)) { + copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba, + sizeof(struct kvm_s390_itdb)); + memset((void *) vcpu->arch.sie_block->itdba, 0, + sizeof(struct kvm_s390_itdb)); + } + trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc); return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1bb1dda..0084c2c2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -395,6 +395,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) CPUSTAT_STOPPED | CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; + if (test_vfacility(50) && test_vfacility(73)) + vcpu->arch.sie_block->ecb |= 0x10; + vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) vfacilities; @@ -411,6 +414,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; + struct sie_page *sie_page; int rc = -EINVAL; if (id >= KVM_MAX_VCPUS) @@ -422,12 +426,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (!vcpu) goto out; - vcpu->arch.sie_block = (struct kvm_s390_sie_block *) - get_zeroed_page(GFP_KERNEL); - - if (!vcpu->arch.sie_block) + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + if (!sie_page) goto out_free_cpu; + vcpu->arch.sie_block = &sie_page->sie_block; + vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->icpua = id; if (!kvm_is_ucontrol(kvm)) { if (!kvm->arch.sca) { @@ -1178,8 +1183,8 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); - vfacilities[0] &= 0xff82fff3f47c0000UL; - vfacilities[1] &= 0x001c000000000000UL; + vfacilities[0] &= 0xff82fff3f47c2000UL; + vfacilities[1] &= 0x005c000000000000UL; return 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 095cf51..f9559b0 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -26,6 +26,12 @@ extern unsigned long *vfacilities; int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +/* Transactional Memory Execution related macros */ +#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) +#define TDB_ADDR 0x1800UL +#define TDB_FORMAT1 1 +#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) + #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ do { \ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ -- cgit v1.1 From d208c79d63e06457eef077af770d23dc4cde4d43 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 Dec 2013 13:40:40 +0100 Subject: KVM: s390: Enable the LPP facility for guests The Load-Program-Parameter Facility is available for guests without any further ado, so we should indicate its availability by setting facility bit 40 if it is supported by the host. Signed-off-by: Thomas Huth Reviewed-by: Michael Mueller Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0084c2c2..597114b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1183,7 +1183,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); - vfacilities[0] &= 0xff82fff3f47c2000UL; + vfacilities[0] &= 0xff82fff3f4fc2000UL; vfacilities[1] &= 0x005c000000000000UL; return 0; } -- cgit v1.1 From 3af57f78c38131b7a66e2b01e06fdacae01992a3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Jan 2014 09:37:15 +0100 Subject: s390/bpf,jit: fix 32 bit divisions, use unsigned divide instructions The s390 bpf jit compiler emits the signed divide instructions "dr" and "d" for unsigned divisions. This can cause problems: the dividend will be zero extended to a 64 bit value and the divisor is the 32 bit signed value as specified A or X accumulator, even though A and X are supposed to be treated as unsigned values. The divide instrunctions will generate an exception if the result cannot be expressed with a 32 bit signed value. This is the case if e.g. the dividend is 0xffffffff and the divisor either 1 or also 0xffffffff (signed: -1). To avoid all these issues simply use unsigned divide instructions. Signed-off-by: Heiko Carstens Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fc0fa77..708d60e 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -368,16 +368,16 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); /* lhi %r4,0 */ EMIT4(0xa7480000); - /* dr %r4,%r12 */ - EMIT2(0x1d4c); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); break; case BPF_S_ALU_DIV_K: /* A /= K */ if (K == 1) break; /* lhi %r4,0 */ EMIT4(0xa7480000); - /* d %r4,(%r13) */ - EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); + /* dl %r4,(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); break; case BPF_S_ALU_MOD_X: /* A %= X */ jit->seen |= SEEN_XREG | SEEN_RET0; @@ -387,8 +387,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); /* lhi %r4,0 */ EMIT4(0xa7480000); - /* dr %r4,%r12 */ - EMIT2(0x1d4c); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); /* lr %r5,%r4 */ EMIT2(0x1854); break; @@ -400,8 +400,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, } /* lhi %r4,0 */ EMIT4(0xa7480000); - /* d %r4,(%r13) */ - EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); + /* dl %r4,(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); /* lr %r5,%r4 */ EMIT2(0x1854); break; -- cgit v1.1 From ea02f9411d9faa3553ed09ce0ec9f00ceae9885e Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Fri, 17 Jan 2014 17:09:45 +0100 Subject: net: introduce SO_BPF_EXTENSIONS For user space packet capturing libraries such as libpcap, there's currently only one way to check which BPF extensions are supported by the kernel, that is, commit aa1113d9f85d ("net: filter: return -EINVAL if BPF_S_ANC* operation is not supported"). For querying all extensions at once this might be rather inconvenient. Therefore, this patch introduces a new option which can be used as an argument for getsockopt(), and allows one to obtain information about which BPF extensions are supported by the current kernel. As David Miller suggests, we do not need to define any bits right now and status quo can just return 0 in order to state that this versions supports SKF_AD_PROTOCOL up to SKF_AD_PAY_OFFSET. Later additions to BPF extensions need to add their bits to the bpf_tell_extensions() function, as documented in the comment. Signed-off-by: Michal Sekletar Cc: David Miller Reviewed-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/s390/include/uapi/asm/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index c286c2e..e031332 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -84,4 +84,6 @@ #define SO_MAX_PACING_RATE 47 +#define SO_BPF_EXTENSIONS 48 + #endif /* _ASM_SOCKET_H */ -- cgit v1.1 From 699bde3b6c95319749a8e1b7aa2b3f6bee84bff8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 20 Jan 2014 12:34:13 +0100 Subject: KVM: s390: Fix memory access error detection Seems that commit 210b1607012cc9034841a393e0591b2c86d9e26c (KVM: s390: Removed SIE_INTERCEPT_UCONTROL) lost a hunk when we reworked our patch queue to rework the async_fp code. We now ignore faults on the sie instruction (guest accesses non-existing memory) instead of sending a fault into the guest. This leads to hang situations with the old virtio transport that checks for descriptor memory after guest memory. Instead of bailing out this code now goes wild... Lets re-add the check. Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1bb1dda..7635c00 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -738,6 +738,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; rc = -EREMOTE; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); -- cgit v1.1 From 38ea1f358b3b7dd4b965ef992894b5857471dc45 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2014 14:03:58 +0100 Subject: s390/32bit: fix cmpxchg64 Fix broken inline assembly contraints for cmpxchg64 on 32bit. Fixes this crash: specification exception: 0006 [#1] SMP CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.13.0 #4 task: 005a16c8 ti: 00592000 task.ti: 00592000 Krnl PSW : 070ce000 8029abd6 (lockref_get+0x3e/0x9c) ... Krnl Code: 8029abcc: a71a0001 ahi %r1,1 8029abd0: 1852 lr %r5,%r2 #8029abd2: bb40f064 cds %r4,%r0,100(%r15) >8029abd6: 1943 cr %r4,%r3 8029abd8: 1815 lr %r1,%r5 Call Trace: ([<0000000078e01870>] 0x78e01870) [<000000000021105a>] sysfs_mount+0xd2/0x1c8 [<00000000001b551e>] mount_fs+0x3a/0x134 [<00000000001ce768>] vfs_kern_mount+0x44/0x11c [<00000000001ce864>] kern_mount_data+0x24/0x3c [<00000000005cc4b8>] sysfs_init+0x74/0xd4 [<00000000005cb5b4>] mnt_init+0xe0/0x1fc [<00000000005cb16a>] vfs_caches_init+0xb6/0x14c [<00000000005be794>] start_kernel+0x318/0x33c [<000000000010001c>] _stext+0x1c/0x80 Reported-by: Mike Frysinger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cmpxchg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 0f636cb..4236408 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -185,11 +185,12 @@ static inline unsigned long long __cmpxchg64(void *ptr, { register_pair rp_old = {.pair = old}; register_pair rp_new = {.pair = new}; + unsigned long long *ullptr = ptr; asm volatile( " cds %0,%2,%1" - : "+&d" (rp_old), "=Q" (ptr) - : "d" (rp_new), "Q" (ptr) + : "+d" (rp_old), "+Q" (*ullptr) + : "d" (rp_new) : "memory", "cc"); return rp_old.pair; } -- cgit v1.1 From 05e9181bdba4b1eb8f8eac5fd925df5223d16308 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2014 14:08:38 +0100 Subject: s390/zfcpdump: make zfcpdump depend on 64BIT Get rid of this link error: arch/s390/built-in.o: In function `smp_prepare_cpus': (.init.text+0x301e): undefined reference to `dump_save_area_create' Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e9f3125..40f0c69 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -596,7 +596,7 @@ config CRASH_DUMP config ZFCPDUMP def_bool n prompt "zfcpdump support" - depends on SMP + depends on 64BIT && SMP help Select this option if you want to build an zfcpdump enabled kernel. Refer to for more details on this. -- cgit v1.1 From b03b467944b3e88a36a33b5429425c42dbd5b8a0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Jan 2014 13:12:34 +0100 Subject: s390/uaccess: test if current->mm is set before walking page tables If get_fs() == USER_DS we better test if current->mm is not zero before walking page tables. The page table walk code would try to lock mm->page_table_lock, however if mm is zero this might crash. Now it is arguably incorrect trying to access userspace if current->mm is zero, however we have seen that and s390 would be the only architecture which would crash in such a case. So we better make the page table walk code a bit more robust and report always a fault instead. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess_pt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 0632dc5..5b77c34 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -153,6 +153,8 @@ static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, unsigned long offset, done, size, kaddr; void *from, *to; + if (!mm) + return n; done = 0; retry: spin_lock(&mm->page_table_lock); @@ -262,6 +264,8 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) return 0; if (segment_eq(get_fs(), KERNEL_DS)) return strnlen_kernel(count, src); + if (!mm) + return 0; done = 0; retry: spin_lock(&mm->page_table_lock); @@ -323,6 +327,8 @@ static size_t copy_in_user_pt(size_t n, void __user *to, if (segment_eq(get_fs(), KERNEL_DS)) return copy_in_kernel(n, to, from); + if (!mm) + return n; done = 0; retry: spin_lock(&mm->page_table_lock); @@ -411,6 +417,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); + if (unlikely(!current->mm)) + return -EFAULT; spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) __dat_user_addr((__force unsigned long) uaddr, 1); @@ -448,6 +456,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); + if (unlikely(!current->mm)) + return -EFAULT; spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) __dat_user_addr((__force unsigned long) uaddr, 1); -- cgit v1.1 From 0ff2fe5236a8b07742ae8f5d6f16bed2e31a1cf1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jan 2014 17:07:42 +0100 Subject: s390/uaccess: remove dead extern declarations, make functions static Remove some dead uaccess extern declarations and also make some functions static, since they are only used locally. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess.h | 9 --------- arch/s390/lib/uaccess_pt.c | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index 315dbe0..b1a2217 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -6,15 +6,6 @@ #ifndef __ARCH_S390_LIB_UACCESS_H #define __ARCH_S390_LIB_UACCESS_H -extern size_t copy_from_user_std(size_t, const void __user *, void *); -extern size_t copy_to_user_std(size_t, void __user *, const void *); -extern size_t strnlen_user_std(size_t, const char __user *); -extern size_t strncpy_from_user_std(size_t, const char __user *, char *); -extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32); -extern int futex_atomic_op_std(int, u32 __user *, int, int *); - -extern size_t copy_from_user_pt(size_t, const void __user *, void *); -extern size_t copy_to_user_pt(size_t, void __user *, const void *); extern int futex_atomic_op_pt(int, u32 __user *, int, int *); extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 5b77c34..61ebcc9 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -211,7 +211,7 @@ fault: return 0; } -size_t copy_from_user_pt(size_t n, const void __user *from, void *to) +static size_t copy_from_user_pt(size_t n, const void __user *from, void *to) { size_t rc; @@ -223,7 +223,7 @@ size_t copy_from_user_pt(size_t n, const void __user *from, void *to) return rc; } -size_t copy_to_user_pt(size_t n, void __user *to, const void *from) +static size_t copy_to_user_pt(size_t n, void __user *to, const void *from) { if (segment_eq(get_fs(), KERNEL_DS)) return copy_in_kernel(n, to, (void __user *) from); -- cgit v1.1 From 4e078146dff728f4865270a47710d57797e81eb6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2014 17:31:10 +0100 Subject: s390/uapi: fix struct statfs64 definition With b8668fd0a7e1b59f "s390/uapi: change struct statfs[64] member types to unsigned values" the size of a couple of struct statfs64 member got incorrectly changed from 64 to 32 bit for 32 bit builds. Fix this by changing the type of couple of struct statfs64 members from unsigned long to unsigned long long. The definition of struct compat_statfs64 was correct however. Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/statfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h index a61d538..471eb09 100644 --- a/arch/s390/include/uapi/asm/statfs.h +++ b/arch/s390/include/uapi/asm/statfs.h @@ -35,11 +35,11 @@ struct statfs { struct statfs64 { unsigned int f_type; unsigned int f_bsize; - unsigned long f_blocks; - unsigned long f_bfree; - unsigned long f_bavail; - unsigned long f_files; - unsigned long f_ffree; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; __kernel_fsid_t f_fsid; unsigned int f_namelen; unsigned int f_frsize; -- cgit v1.1 From fded4329da9e8486b434d6c4aceab1ab3f433d8a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 22 Jan 2014 12:52:38 +0100 Subject: s390: wire up sys_sched_setattr/sys_sched_getattr Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 2 ++ arch/s390/kernel/compat_wrapper.S | 11 +++++++++++ arch/s390/kernel/syscalls.S | 2 ++ 3 files changed, 15 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 864f693..5eb5c9d 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -280,6 +280,8 @@ #define __NR_s390_runtime_instr 342 #define __NR_kcmp 343 #define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 #define NR_syscalls 345 /* diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 9cb1b97..59c8efc 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1412,3 +1412,14 @@ ENTRY(sys_finit_module_wrapper) llgtr %r3,%r3 # const char __user * lgfr %r4,%r4 # int jg sys_finit_module + +ENTRY(sys_sched_setattr_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # struct sched_attr __user * + jg sys_sched_setattr + +ENTRY(sys_sched_getattr_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # const char __user * + llgfr %r3,%r3 # unsigned int + jg sys_sched_getattr diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 913410b..1439921 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -353,3 +353,5 @@ SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper) SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper) SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper) +SYSCALL(sys_sched_setattr,sys_sched_setattr,sys_sched_setattr_wrapper) /* 345 */ +SYSCALL(sys_sched_getattr,sys_sched_getattr,sys_sched_getattr_wrapper) -- cgit v1.1 From 4a474157747ab7c4432ac269247e0e0e15f85584 Mon Sep 17 00:00:00 2001 From: Robert Graffham Date: Thu, 23 Jan 2014 15:55:29 -0800 Subject: Kconfig: update flightly outdated CONFIG_SMP documentation Remove an outdated reference to "most personal computers" having only one CPU, and change the use of "singleprocessor" and "single processor" in CONFIG_SMP's documentation to "uniprocessor" across all arches where that documentation is present. Signed-off-by: Robert Graffham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e9f3125..4f858f7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -334,10 +334,10 @@ config SMP a system with only one CPU, like most personal computers, say N. If you have a system with more than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also the SMP-HOWTO available at -- cgit v1.1 From 07be0382097027cde68d9268cc628741069b5762 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Jan 2014 09:18:52 +0100 Subject: s390/hypfs: add interface for diagnose 0x304 To provide access to the set-partition-resource-parameter interface to user space add a new attribute to hypfs/debugfs: * s390_hypsfs/diag_304 The data for the query-partition-resource-parameters command can be access by a read on the attribute. All other diagnose 0x304 requests need to be submitted via ioctl with CAP_SYS_ADMIN rights. Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/Makefile | 2 +- arch/s390/hypfs/hypfs.h | 7 ++ arch/s390/hypfs/hypfs_dbfs.c | 16 +++++ arch/s390/hypfs/hypfs_sprp.c | 141 +++++++++++++++++++++++++++++++++++++ arch/s390/hypfs/inode.c | 15 ++-- arch/s390/include/asm/sclp.h | 1 + arch/s390/include/uapi/asm/hypfs.h | 25 +++++++ 7 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 arch/s390/hypfs/hypfs_sprp.c create mode 100644 arch/s390/include/uapi/asm/hypfs.h (limited to 'arch/s390') diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 2e671d5..06f8d95 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o -s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 79f2ac5..b34b5ab 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -13,6 +13,7 @@ #include #include #include +#include #define REG_FILE_MODE 0440 #define UPDATE_FILE_MODE 0220 @@ -36,6 +37,10 @@ extern int hypfs_vm_init(void); extern void hypfs_vm_exit(void); extern int hypfs_vm_create_files(struct dentry *root); +/* Set Partition-Resource Parameter */ +int hypfs_sprp_init(void); +void hypfs_sprp_exit(void); + /* debugfs interface */ struct hypfs_dbfs_file; @@ -52,6 +57,8 @@ struct hypfs_dbfs_file { int (*data_create)(void **data, void **data_free_ptr, size_t *size); void (*data_free)(const void *buf_free_ptr); + long (*unlocked_ioctl) (struct file *, unsigned int, + unsigned long); /* Private data for hypfs_dbfs.c */ struct hypfs_dbfs_data *data; diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 17ab8b7..2badf2b 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -81,9 +81,25 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, return rc; } +static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct hypfs_dbfs_file *df; + long rc; + + df = file->f_path.dentry->d_inode->i_private; + mutex_lock(&df->lock); + if (df->unlocked_ioctl) + rc = df->unlocked_ioctl(file, cmd, arg); + else + rc = -ENOTTY; + mutex_unlock(&df->lock); + return rc; +} + static const struct file_operations dbfs_ops = { .read = dbfs_read, .llseek = no_llseek, + .unlocked_ioctl = dbfs_ioctl, }; int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c new file mode 100644 index 0000000..f043c3c --- /dev/null +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -0,0 +1,141 @@ +/* + * Hypervisor filesystem for Linux on s390. + * Set Partition-Resource Parameter interface. + * + * Copyright IBM Corp. 2013 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hypfs.h" + +#define DIAG304_SET_WEIGHTS 0 +#define DIAG304_QUERY_PRP 1 +#define DIAG304_SET_CAPPING 2 + +#define DIAG304_CMD_MAX 2 + +static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +{ + register unsigned long _data asm("2") = (unsigned long) data; + register unsigned long _rc asm("3"); + register unsigned long _cmd asm("4") = cmd; + + asm volatile("diag %1,%2,0x304\n" + : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory"); + + return _rc; +} + +static void hypfs_sprp_free(const void *data) +{ + free_page((unsigned long) data); +} + +static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size) +{ + unsigned long rc; + void *data; + + data = (void *) get_zeroed_page(GFP_KERNEL); + if (!data) + return -ENOMEM; + rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP); + if (rc != 1) { + *data_ptr = *free_ptr = NULL; + *size = 0; + free_page((unsigned long) data); + return -EIO; + } + *data_ptr = *free_ptr = data; + *size = PAGE_SIZE; + return 0; +} + +static int __hypfs_sprp_ioctl(void __user *user_area) +{ + struct hypfs_diag304 diag304; + unsigned long cmd; + void __user *udata; + void *data; + int rc; + + if (copy_from_user(&diag304, user_area, sizeof(diag304))) + return -EFAULT; + if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX) + return -EINVAL; + + data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!data) + return -ENOMEM; + + udata = (void __user *)(unsigned long) diag304.data; + if (diag304.args[1] == DIAG304_SET_WEIGHTS || + diag304.args[1] == DIAG304_SET_CAPPING) + if (copy_from_user(data, udata, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + cmd = *(unsigned long *) &diag304.args[0]; + diag304.rc = hypfs_sprp_diag304(data, cmd); + + if (diag304.args[1] == DIAG304_QUERY_PRP) + if (copy_to_user(udata, data, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0; +out: + free_page((unsigned long) data); + return rc; +} + +static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (is_compat_task()) + argp = compat_ptr(arg); + else + argp = (void __user *) arg; + switch (cmd) { + case HYPFS_DIAG304: + return __hypfs_sprp_ioctl(argp); + default: /* unknown ioctl number */ + return -ENOTTY; + } + return 0; +} + +static struct hypfs_dbfs_file hypfs_sprp_file = { + .name = "diag_304", + .data_create = hypfs_sprp_create, + .data_free = hypfs_sprp_free, + .unlocked_ioctl = hypfs_sprp_ioctl, +}; + +int hypfs_sprp_init(void) +{ + if (!sclp_has_sprp()) + return 0; + return hypfs_dbfs_create_file(&hypfs_sprp_file); +} + +void hypfs_sprp_exit(void) +{ + if (!sclp_has_sprp()) + return; + hypfs_dbfs_remove_file(&hypfs_sprp_file); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index ddfe09b..c952b98 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -478,10 +478,14 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_diag_exit; } + if (hypfs_sprp_init()) { + rc = -ENODATA; + goto fail_hypfs_vm_exit; + } s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); if (!s390_kobj) { rc = -ENOMEM; - goto fail_hypfs_vm_exit; + goto fail_hypfs_sprp_exit; } rc = register_filesystem(&hypfs_type); if (rc) @@ -490,6 +494,8 @@ static int __init hypfs_init(void) fail_filesystem: kobject_put(s390_kobj); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); fail_hypfs_vm_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: @@ -502,11 +508,12 @@ fail_dbfs_exit: static void __exit hypfs_exit(void) { - hypfs_diag_exit(); - hypfs_vm_exit(); - hypfs_dbfs_exit(); unregister_filesystem(&hypfs_type); kobject_put(s390_kobj); + hypfs_sprp_exit(); + hypfs_vm_exit(); + hypfs_diag_exit(); + hypfs_dbfs_exit(); } module_init(hypfs_init) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 220e171..abaca22 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -54,6 +54,7 @@ int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); bool __init sclp_has_linemode(void); bool __init sclp_has_vt220(void); +bool sclp_has_sprp(void); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h new file mode 100644 index 0000000..37998b4 --- /dev/null +++ b/arch/s390/include/uapi/asm/hypfs.h @@ -0,0 +1,25 @@ +/* + * IOCTL interface for hypfs + * + * Copyright IBM Corp. 2013 + * + * Author: Martin Schwidefsky + */ + +#ifndef _ASM_HYPFS_CTL_H +#define _ASM_HYPFS_CTL_H + +#include + +struct hypfs_diag304 { + __u32 args[2]; + __u64 data; + __u64 rc; +} __attribute__((packed)); + +#define HYPFS_IOCTL_MAGIC 0x10 + +#define HYPFS_DIAG304 \ + _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304) + +#endif -- cgit v1.1 From b7c5b1aa2836c933ab03f90391619ebdc9112e46 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 28 Jan 2014 18:40:12 +0100 Subject: s390/appldata: restore missing init_virt_timer() Commit 27f6b416 "s390/vtimer: rework virtual timer interface" removed the call to init_virt_timer() by mistake, which is added again by this patch. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 4c4a1ce..47c8630 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -529,6 +529,7 @@ static int __init appldata_init(void) { int rc; + init_virt_timer(&appldata_timer); appldata_timer.function = appldata_timer_function; appldata_timer.data = (unsigned long) &appldata_work; -- cgit v1.1 From 49382d93852f1ba4a4fbbce20d094f600cc8aff8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 29 Jan 2014 14:05:47 -0800 Subject: s390/compat: change parameter types from unsigned long to compat_ulong_t Change parameter types of s390's compat ipc syscall from unsigned long to compat_ulong_t to enforce zero extension of these parameters. This is not really a bug, since s390_ipc compat syscall is only a wrapper to the generic compat_sys_ipc() syscall, which performs correct zero and sign extension. This was introduced with commit 56e41d3c5aa8 ("merge compat sys_ipc instances"). Signed-off-by: Heiko Carstens Cc: Al Viro Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Hendrik Brueckner Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_linux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index e030d2b..db02052 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -286,8 +286,8 @@ asmlinkage long sys32_getegid16(void) } #ifdef CONFIG_SYSVIPC -COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, - unsigned long, third, compat_uptr_t, ptr) +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, + compat_ulong_t, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; -- cgit v1.1 From 0519e9ad89e5cd6e6b08398f57c6a71d9580564c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 16 Jan 2014 16:01:11 +0100 Subject: crypto: s390 - fix concurrency issue in aes-ctr mode The aes-ctr mode uses one preallocated page without any concurrency protection. When multiple threads run aes-ctr encryption or decryption this can lead to data corruption. The patch introduces locking for the page and a fallback solution with slower en/decryption performance in concurrency situations. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 65 ++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 19 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b3feabd..cf3c008 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "crypt_s390.h" #define AES_KEYLEN_128 1 @@ -32,6 +33,7 @@ #define AES_KEYLEN_256 4 static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); static char keylen_flag; struct s390_aes_ctx { @@ -758,43 +760,67 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return aes_set_key(tfm, in_key, key_len); } +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr + i, AES_BLOCK_SIZE); + } + return n; +} + static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); - unsigned int i, n, nbytes; - u8 buf[AES_BLOCK_SIZE]; - u8 *out, *in; + unsigned int n, nbytes; + u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; if (!walk->nbytes) return ret; - memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE); + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { out = walk->dst.virt.addr; in = walk->src.virt.addr; while (nbytes >= AES_BLOCK_SIZE) { - /* only use complete blocks, max. PAGE_SIZE */ - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : - nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { - memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(ctrblk + i, AES_BLOCK_SIZE); - } - ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk); - if (ret < 0 || ret != n) + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = AES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, sctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); return -EIO; + } if (n > AES_BLOCK_SIZE) - memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE, + memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - crypto_inc(ctrblk, AES_BLOCK_SIZE); + crypto_inc(ctrptr, AES_BLOCK_SIZE); out += n; in += n; nbytes -= n; } ret = blkcipher_walk_done(desc, walk, nbytes); } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ @@ -802,14 +828,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, out = walk->dst.virt.addr; in = walk->src.virt.addr; ret = crypt_s390_kmctr(func, sctx->key, buf, in, - AES_BLOCK_SIZE, ctrblk); + AES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != AES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); + crypto_inc(ctrbuf, AES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE); } - memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE); + return ret; } -- cgit v1.1 From adc3fcf1552b6e406d172fd9690bbd1395053d13 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 22 Jan 2014 13:00:04 +0100 Subject: crypto: s390 - fix des and des3_ede cbc concurrency issue In s390 des and des3_ede cbc mode the iv value is not protected against concurrency access and modifications from another running en/decrypt operation which is using the very same tfm struct instance. This fix copies the iv to the local stack before the crypto operation and stores the value back when done. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 200f2a1..82a0a2a 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -105,29 +105,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, } static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, - u8 *iv, struct blkcipher_walk *walk) + struct blkcipher_walk *walk) { + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; + struct { + u8 iv[DES_BLOCK_SIZE]; + u8 key[DES3_KEY_SIZE]; + } param; if (!nbytes) goto out; - memcpy(iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.key, ctx->key, DES3_KEY_SIZE); do { /* only use complete blocks */ unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, iv, out, in, n); + ret = crypt_s390_kmc(func, ¶m, out, in, n); if (ret < 0 || ret != n) return -EIO; nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, iv, DES_BLOCK_SIZE); + memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); out: return ret; @@ -179,22 +185,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk); } static struct crypto_alg cbc_des_alg = { @@ -327,22 +331,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk); } static int cbc_des3_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk); } static struct crypto_alg cbc_des3_alg = { -- cgit v1.1 From ee97dc7db4cbda33e4241c2d85b42d1835bc8a35 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 22 Jan 2014 13:01:33 +0100 Subject: crypto: s390 - fix des and des3_ede ctr concurrency issue In s390 des and 3des ctr mode there is one preallocated page used to speed up the en/decryption. This page is not protected against concurrent usage and thus there is a potential of data corruption with multiple threads. The fix introduces locking/unlocking the ctr page and a slower fallback solution at concurrency situations. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 69 +++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 21 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 82a0a2a..0a5aac8 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -25,6 +25,7 @@ #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; @@ -368,54 +369,80 @@ static struct crypto_alg cbc_des3_alg = { } }; +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + crypto_inc(ctrptr + i, DES_BLOCK_SIZE); + } + return n; +} + static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, - struct s390_des_ctx *ctx, struct blkcipher_walk *walk) + struct s390_des_ctx *ctx, + struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); - unsigned int i, n, nbytes; - u8 buf[DES_BLOCK_SIZE]; - u8 *out, *in; + unsigned int n, nbytes; + u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; + + if (!walk->nbytes) + return ret; - memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE); + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { out = walk->dst.virt.addr; in = walk->src.virt.addr; while (nbytes >= DES_BLOCK_SIZE) { - /* align to block size, max. PAGE_SIZE */ - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : - nbytes & ~(DES_BLOCK_SIZE - 1); - for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { - memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE, - DES_BLOCK_SIZE); - crypto_inc(ctrblk + i, DES_BLOCK_SIZE); - } - ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk); - if (ret < 0 || ret != n) + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = DES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, ctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); return -EIO; + } if (n > DES_BLOCK_SIZE) - memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE, + memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, DES_BLOCK_SIZE); - crypto_inc(ctrblk, DES_BLOCK_SIZE); + crypto_inc(ctrptr, DES_BLOCK_SIZE); out += n; in += n; nbytes -= n; } ret = blkcipher_walk_done(desc, walk, nbytes); } - + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; ret = crypt_s390_kmctr(func, ctx->key, buf, in, - DES_BLOCK_SIZE, ctrblk); + DES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != DES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); - crypto_inc(ctrblk, DES_BLOCK_SIZE); + crypto_inc(ctrbuf, DES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE); } - memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE); return ret; } -- cgit v1.1 From d7736ff5be31edaa4fe5ab62810c64529a24b149 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 30 Jan 2014 16:14:02 +0100 Subject: s390/dump: Fix dump memory detection Dumps created by kdump or zfcpdump can contain invalid memory holes when dumping z/VM systems that have memory pressure. For example: # zgetdump -i /proc/vmcore. Memory map: 0000000000000000 - 0000000000bfffff (12 MB) 0000000000e00000 - 00000000014fffff (7 MB) 000000000bd00000 - 00000000f3bfffff (3711 MB) The memory detection function find_memory_chunks() issues tprot to find valid memory chunks. In case of CMM it can happen that pages are marked as unstable via set_page_unstable() in arch_free_page(). If z/VM has released that pages, tprot returns -EFAULT and indicates a memory hole. So fix this and switch off CMM in case of kdump or zfcpdump. Cc: Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/mm/page-states.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index a90d45e..27c50f4 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #define ESSA_SET_STABLE 1 #define ESSA_SET_UNUSED 2 @@ -41,6 +43,14 @@ void __init cmma_init(void) if (!cmma_flag) return; + /* + * Disable CMM for dump, otherwise the tprot based memory + * detection can fail because of unstable pages. + */ + if (OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP) { + cmma_flag = 0; + return; + } asm volatile( " .insn rrf,0xb9ab0000,%1,%1,0,0\n" "0: la %0,0\n" -- cgit v1.1 From 8d7f6690cedb83456edd41c9bd583783f0703bf0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 3 Feb 2014 17:37:15 +0100 Subject: s390: fix kernel crash due to linkage stack instructions The kernel currently crashes with a low-address-protection exception if a user space process executes an instruction that tries to use the linkage stack. Set the base-ASTE origin and the subspace-ASTE origin of the dispatchable-unit-control-table to point to a dummy ASTE. Set up control register 15 to point to an empty linkage stack with no room left. A user space process with a linkage stack instruction will still crash but with a different exception which is correctly translated to a segmentation fault instead of a kernel oops. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head64.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index b9e25ae..d7c0050 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -59,7 +59,7 @@ ENTRY(startup_continue) .quad 0 # cr12: tracing off .quad 0 # cr13: home space segment table .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations + .quad .Llinkage_stack # cr15: linkage stack operations .Lpcmsk:.quad 0x0000000180000000 .L4malign:.quad 0xffffffffffc00000 .Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 @@ -67,12 +67,15 @@ ENTRY(startup_continue) .Lparmaddr: .quad PARMAREA .align 64 -.Lduct: .long 0,0,0,0,.Lduald,0,0,0 +.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 .long 0,0,0,0,0,0,0,0 +.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 .align 128 .Lduald:.rept 8 .long 0x80000000,0,0,0 # invalid access-list entries .endr +.Llinkage_stack: + .long 0,0,0x89000000,0,0,0,0x8a000000,0 ENTRY(_ehead) -- cgit v1.1