diff options
40 files changed, 388 insertions, 214 deletions
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index c907be4..dc23e58 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -147,6 +147,25 @@ Example signal handler: fix_the_problem(ucp->dar); } +When in an active transaction that takes a signal, we need to be careful with +the stack. It's possible that the stack has moved back up after the tbegin. +The obvious case here is when the tbegin is called inside a function that +returns before a tend. In this case, the stack is part of the checkpointed +transactional memory state. If we write over this non transactionally or in +suspend, we are in trouble because if we get a tm abort, the program counter and +stack pointer will be back at the tbegin but our in memory stack won't be valid +anymore. + +To avoid this, when taking a signal in an active transaction, we need to use +the stack pointer from the checkpointed state, rather than the speculated +state. This ensures that the signal context (written tm suspended) will be +written below the stack required for the rollback. The transaction is aborted +becuase of the treclaim, so any memory written between the tbegin and the +signal will be rolled back anyway. + +For signals taken in non-TM or suspended mode, we use the +normal/non-checkpointed stack pointer. + Failure cause codes used by kernel ================================== @@ -155,14 +174,18 @@ These are defined in <asm/reg.h>, and distinguish different reasons why the kernel aborted a transaction: TM_CAUSE_RESCHED Thread was rescheduled. + TM_CAUSE_TLBI Software TLB invalide. TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort transactions for consistency will use this. TM_CAUSE_SIGNAL Signal delivered. TM_CAUSE_MISC Currently unused. + TM_CAUSE_ALIGNMENT Alignment fault. + TM_CAUSE_EMULATE Emulation that touched memory. -These can be checked by the user program's abort handler as TEXASR[0:7]. - +These can be checked by the user program's abort handler as TEXASR[0:7]. If +bit 7 is set, it indicates that the error is consider persistent. For example +a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q GDB === diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index cf4df8e..0c7f2bf 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -264,6 +264,7 @@ #define H_GET_MPP 0x2D4 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 +#define H_XIRR_X 0x2FC #define H_RANDOM 0x300 #define H_COP 0x304 #define H_GET_MPP_X 0x314 diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cea8496..2f1b6c5 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -523,6 +523,17 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946) #define PPC440EP_ERR42 #endif +/* The following stops all load and store data streams associated with stream + * ID (ie. streams created explicitly). The embedded and server mnemonics for + * dcbt are different so we use machine "power4" here explicitly. + */ +#define DCBT_STOP_ALL_STREAM_IDS(scratch) \ +.machine push ; \ +.machine "power4" ; \ + lis scratch,0x60000000@h; \ + dcbt r0,scratch,0b01010; \ +.machine pop + /* * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them * keep the address intact to be compatible with code shared with diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 594db6b..14a6583 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -409,21 +409,16 @@ static inline void prefetchw(const void *x) #endif #ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - unsigned long sp; - if (is_32) - sp = regs->gpr[1] & 0x0ffffffffUL; - else - sp = regs->gpr[1]; - + return sp & 0x0ffffffffUL; return sp; } #else -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - return regs->gpr[1]; + return sp; } #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a613651..4a9e408 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -111,17 +111,6 @@ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) -/* Reason codes describing kernel causes for transaction aborts. By - convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if - the failure is persistent. -*/ -#define TM_CAUSE_RESCHED 0xfe -#define TM_CAUSE_TLBI 0xfc -#define TM_CAUSE_FAC_UNAV 0xfa -#define TM_CAUSE_SYSCALL 0xf9 /* Persistent */ -#define TM_CAUSE_MISC 0xf6 -#define TM_CAUSE_SIGNAL 0xf4 - #if defined(CONFIG_PPC_BOOK3S_64) #define MSR_64BIT MSR_SF diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index fbe66c4..9322c28 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -3,5 +3,8 @@ #define __ARCH_HAS_SA_RESTORER #include <uapi/asm/signal.h> +#include <uapi/asm/ptrace.h> + +extern unsigned long get_tm_stackpointer(struct pt_regs *regs); #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index 4b4449a..9dfbc34 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -5,6 +5,8 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include <uapi/asm/tm.h> + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void do_load_up_transact_fpu(struct thread_struct *thread); extern void do_load_up_transact_altivec(struct thread_struct *thread); diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index f7bca63..5182c86 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -40,6 +40,7 @@ header-y += statfs.h header-y += swab.h header-y += termbits.h header-y += termios.h +header-y += tm.h header-y += types.h header-y += ucontext.h header-y += unistd.h diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h new file mode 100644 index 0000000..85059a0 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -0,0 +1,18 @@ +#ifndef _ASM_POWERPC_TM_H +#define _ASM_POWERPC_TM_H + +/* Reason codes describing kernel causes for transaction aborts. By + * convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if + * the failure is persistent. PAPR saves 0xff-0xe0 for the hypervisor. + */ +#define TM_CAUSE_PERSISTENT 0x01 +#define TM_CAUSE_RESCHED 0xde +#define TM_CAUSE_TLBI 0xdc +#define TM_CAUSE_FAC_UNAV 0xda +#define TM_CAUSE_SYSCALL 0xd8 /* future use */ +#define TM_CAUSE_MISC 0xd6 /* future use */ +#define TM_CAUSE_SIGNAL 0xd4 +#define TM_CAUSE_ALIGNMENT 0xd2 +#define TM_CAUSE_EMULATE 0xd0 + +#endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c60bbec..1f0937d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -453,7 +453,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .oprofile_cpu_type = 0, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .platform = "power8", @@ -482,7 +482,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features = COMMON_USER2_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, @@ -506,7 +506,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", + .oprofile_cpu_type = 0, .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d22e73e..22b45a4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -849,7 +849,7 @@ resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_FLAGS(r9) - andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + andis. r0,r8,_TIF_EMULATE_STACK_STORE@h beq+ 1f addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0e9095e..246b11c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -501,6 +501,13 @@ BEGIN_FTR_SECTION ldarx r6,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) +#ifdef CONFIG_PPC_BOOK3S +/* Cancel all explict user streams as they will have no use after context + * switch and will stop the HW from creating streams itself + */ + DCBT_STOP_ALL_STREAM_IDS(r6) +#endif + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e9acf50..7f2273c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -657,15 +657,6 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, * ranges. However, some machines (thanks Apple !) tend to split their * space into lots of small contiguous ranges. So we have to coalesce. * - * - We can only cope with all memory ranges having the same offset - * between CPU addresses and PCI addresses. Unfortunately, some bridges - * are setup for a large 1:1 mapping along with a small "window" which - * maps PCI address 0 to some arbitrary high address of the CPU space in - * order to give access to the ISA memory hole. - * The way out of here that I've chosen for now is to always set the - * offset based on the first resource found, then override it if we - * have a different offset and the previous was set by an ISA hole. - * * - Some busses have IO space not starting at 0, which causes trouble with * the way we do our IO resource renumbering. The code somewhat deals with * it for 64 bits but I would expect problems on 32 bits. @@ -680,10 +671,9 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, int rlen; int pna = of_n_addr_cells(dev); int np = pna + 5; - int memno = 0, isa_hole = -1; + int memno = 0; u32 pci_space; unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; - unsigned long long isa_mb = 0; struct resource *res; printk(KERN_INFO "PCI host bridge %s %s ranges:\n", @@ -777,8 +767,6 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, } /* Handles ISA memory hole space here */ if (pci_addr == 0) { - isa_mb = cpu_addr; - isa_hole = memno; if (primary || isa_mem_base == 0) isa_mem_base = cpu_addr; hose->isa_mem_phys = cpu_addr; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 577a8aa..457e97a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/debug.h> +#include <asm/tm.h> #include "signal.h" @@ -30,13 +31,13 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32) { unsigned long oldsp, newsp; /* Default to using normal stack */ - oldsp = get_clean_sp(regs, is_32); + oldsp = get_clean_sp(sp, is_32); /* Check for alt stack */ if ((ka->sa.sa_flags & SA_ONSTACK) && @@ -175,3 +176,38 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } + +unsigned long get_tm_stackpointer(struct pt_regs *regs) +{ + /* When in an active transaction that takes a signal, we need to be + * careful with the stack. It's possible that the stack has moved back + * up after the tbegin. The obvious case here is when the tbegin is + * called inside a function that returns before a tend. In this case, + * the stack is part of the checkpointed transactional memory state. + * If we write over this non transactionally or in suspend, we are in + * trouble because if we get a tm abort, the program counter and stack + * pointer will be back at the tbegin but our in memory stack won't be + * valid anymore. + * + * To avoid this, when taking a signal in an active transaction, we + * need to use the stack pointer from the checkpointed state, rather + * than the speculated state. This ensures that the signal context + * (written tm suspended) will be written below the stack required for + * the rollback. The transaction is aborted becuase of the treclaim, + * so any memory written between the tbegin and the signal will be + * rolled back anyway. + * + * For signals taken in non-TM or suspended mode, we use the + * normal/non-checkpointed stack pointer. + */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + tm_enable(); + tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + if (MSR_TM_TRANSACTIONAL(regs->msr)) + return current->thread.ckpt_regs.gpr[1]; + } +#endif + return regs->gpr[1]; +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index ec84c90..c69b9ae 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,7 +12,7 @@ extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); -extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32); extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 95068bf..201385c 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -503,12 +503,6 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; - /* tm_reclaim rolls back all reg states, updating thread.ckpt_regs, - * thread.transact_fpr[], thread.transact_vr[], etc. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -965,7 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; @@ -1403,7 +1397,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, unsigned long tramp; /* Set up Signal Frame */ - frame = get_sigframe(ka, regs, sizeof(*frame), 1); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c179428..3459473 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -154,11 +154,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * As above, but Transactional Memory is in use, so deliver sigcontexts * containing checkpointed and transactional register states. * - * To do this, we treclaim to gather both sets of registers and set up the - * 'normal' sigcontext registers with rolled-back register values such that a - * simple signal handler sees a correct checkpointed register state. - * If interested, a TM-aware sighandler can examine the transactional registers - * in the 2nd sigcontext to determine the real origin of the signal. + * To do this, we treclaim (done before entering here) to gather both sets of + * registers and set up the 'normal' sigcontext registers with rolled-back + * register values such that a simple signal handler sees a correct + * checkpointed register state. If interested, a TM-aware sighandler can + * examine the transactional registers in the 2nd sigcontext to determine the + * real origin of the signal. */ static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, @@ -184,16 +185,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); - /* tm_reclaim rolls back all reg states, saving checkpointed (older) - * GPRs to thread.ckpt_regs and (if used) FPRs to (newer) - * thread.transact_fp and/or VRs to (newer) thread.transact_vr. - * THEN we save out FP/VRs, if necessary, to the checkpointed (older) - * thread.fr[]/vr[]s. The transactional (newer) GPRs are on the - * stack, in *regs. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -711,7 +702,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame), 0); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a7a648f..f18c79c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -53,6 +53,7 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #include <asm/processor.h> +#include <asm/tm.h> #endif #include <asm/kexec.h> #include <asm/ppc-opcode.h> @@ -932,6 +933,28 @@ static int emulate_isel(struct pt_regs *regs, u32 instword) return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline bool tm_abort_check(struct pt_regs *regs, int cause) +{ + /* If we're emulating a load/store in an active transaction, we cannot + * emulate it as the kernel operates in transaction suspended context. + * We need to abort the transaction. This creates a persistent TM + * abort so tell the user what caused it with a new code. + */ + if (MSR_TM_TRANSACTIONAL(regs->msr)) { + tm_enable(); + tm_abort(cause); + return true; + } + return false; +} +#else +static inline bool tm_abort_check(struct pt_regs *regs, int reason) +{ + return false; +} +#endif + static int emulate_instruction(struct pt_regs *regs) { u32 instword; @@ -971,6 +994,9 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { + if (tm_abort_check(regs, + TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) + return -EINVAL; PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } @@ -1148,6 +1174,9 @@ void alignment_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) + goto bail; + /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) fixed = fix_alignment(regs); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9de24f8..550f592 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -562,6 +562,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) { ret = kvmppc_xics_hcall(vcpu, req); break; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index b24309c..da0e0bc 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -257,6 +257,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) return kvmppc_h_pr_xics_hcall(vcpu, cmd); break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index f7a1037..94c1dd4 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -650,6 +650,23 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, return H_SUCCESS; } +static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) +{ + union kvmppc_icp_state state; + struct kvmppc_icp *icp; + + icp = vcpu->arch.icp; + if (icp->server_num != server) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + state = ACCESS_ONCE(icp->state); + kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr); + kvmppc_set_gpr(vcpu, 5, state.mfrr); + return H_SUCCESS; +} + static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { union kvmppc_icp_state old_state, new_state; @@ -787,6 +804,18 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) if (!xics || !vcpu->arch.icp) return H_HARDWARE; + /* These requests don't have real-mode implementations at present */ + switch (req) { + case H_XIRR_X: + res = kvmppc_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + kvmppc_set_gpr(vcpu, 5, get_tb()); + return rc; + case H_IPOLL: + rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); + return rc; + } + /* Check for real mode returning too hard */ if (xics->real_mode) return kvmppc_xics_rm_complete(vcpu, req); diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 0ef75bf..395c594 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -28,13 +28,14 @@ _GLOBAL(copypage_power7) * aligned we don't need to clear the bottom 7 bits of either * address. */ - ori r9,r3,1 /* stream=1 */ + ori r9,r3,1 /* stream=1 => to */ #ifdef CONFIG_PPC_64K_PAGES - lis r7,0x0E01 /* depth=7, units=512 */ + lis r7,0x0E01 /* depth=7 + * units/cachelines=512 */ #else lis r7,0x0E00 /* depth=7 */ - ori r7,r7,0x1000 /* units=32 */ + ori r7,r7,0x1000 /* units/cachelines=32 */ #endif ori r10,r7,1 /* stream=1 */ @@ -43,12 +44,14 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" - dcbt r0,r4,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r4,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 0d24ff1..d1f1179 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -318,12 +318,14 @@ err1; stb r0,0(r3) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r6,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 6a2aead..4c122c3 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -336,11 +336,18 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { - native_unlock_hpte(hptep); - return -1; + actual_psize = psize; + ret = -1; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) { DBG_LOW(" -> miss\n"); ret = -1; @@ -350,6 +357,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); } +err_out: native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ @@ -409,7 +417,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, hptep = htab_address + slot; actual_psize = hpte_actual_psize(hptep, psize); if (actual_psize < 0) - return; + actual_psize = psize; /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | @@ -437,21 +445,27 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { + actual_psize = psize; native_unlock_hpte(hptep); - local_irq_restore(flags); - return; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) native_unlock_hpte(hptep); else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; +err_out: /* Invalidate the TLB */ tlbie(vpn, psize, actual_psize, ssize, local); - local_irq_restore(flags); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 426180b..845c867 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -110,7 +110,7 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static bool regs_use_siar(struct pt_regs *regs) { - return !!(regs->result & 1); + return !!regs->result; } /* @@ -136,22 +136,30 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * If we're not doing instruction sampling, give them the SDAR * (sampled data address). If we are doing instruction sampling, then * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or - * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA. + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the + * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; - unsigned long sdsync; + bool sdar_valid; - if (ppmu->flags & PPMU_SIAR_VALID) - sdsync = POWER7P_MMCRA_SDAR_VALID; - else if (ppmu->flags & PPMU_ALT_SIPR) - sdsync = POWER6_MMCRA_SDSYNC; - else - sdsync = MMCRA_SDSYNC; + if (ppmu->flags & PPMU_HAS_SIER) + sdar_valid = regs->dar & SIER_SDAR_VALID; + else { + unsigned long sdsync; + + if (ppmu->flags & PPMU_SIAR_VALID) + sdsync = POWER7P_MMCRA_SDAR_VALID; + else if (ppmu->flags & PPMU_ALT_SIPR) + sdsync = POWER6_MMCRA_SDSYNC; + else + sdsync = MMCRA_SDSYNC; + + sdar_valid = mmcra & sdsync; + } - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); } @@ -181,11 +189,6 @@ static bool regs_sipr(struct pt_regs *regs) return !!(regs->dsisr & sipr); } -static bool regs_no_sipr(struct pt_regs *regs) -{ - return !!(regs->result & 2); -} - static inline u32 perf_flags_from_msr(struct pt_regs *regs) { if (regs->msr & MSR_PR) @@ -208,7 +211,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs) * SIAR which should give slightly more reliable * results */ - if (regs_no_sipr(regs)) { + if (ppmu->flags & PPMU_NO_SIPR) { unsigned long siar = mfspr(SPRN_SIAR); if (siar >= PAGE_OFFSET) return PERF_RECORD_MISC_KERNEL; @@ -239,22 +242,9 @@ static inline void perf_read_regs(struct pt_regs *regs) int use_siar; regs->dsisr = mmcra; - regs->result = 0; - - if (ppmu->flags & PPMU_NO_SIPR) - regs->result |= 2; - - /* - * On power8 if we're in random sampling mode, the SIER is updated. - * If we're in continuous sampling mode, we don't have SIPR. - */ - if (ppmu->flags & PPMU_HAS_SIER) { - if (marked) - regs->dar = mfspr(SPRN_SIER); - else - regs->result |= 2; - } + if (ppmu->flags & PPMU_HAS_SIER) + regs->dar = mfspr(SPRN_SIER); /* * If this isn't a PMU exception (eg a software event) the SIAR is @@ -279,12 +269,12 @@ static inline void perf_read_regs(struct pt_regs *regs) use_siar = 1; else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) use_siar = 0; - else if (!regs_no_sipr(regs) && regs_sipr(regs)) + else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs)) use_siar = 0; else use_siar = 1; - regs->result |= use_siar; + regs->result = use_siar; } /* @@ -308,8 +298,13 @@ static inline int siar_valid(struct pt_regs *regs) unsigned long mmcra = regs->dsisr; int marked = mmcra & MMCRA_SAMPLE_ENABLE; - if ((ppmu->flags & PPMU_SIAR_VALID) && marked) - return mmcra & POWER7P_MMCRA_SIAR_VALID; + if (marked) { + if (ppmu->flags & PPMU_HAS_SIER) + return regs->dar & SIER_SIAR_VALID; + + if (ppmu->flags & PPMU_SIAR_VALID) + return mmcra & POWER7P_MMCRA_SIAR_VALID; + } return 1; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 023b288..4459eff 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -19,6 +19,8 @@ config PPC_PSERIES select ZLIB_DEFLATE select PPC_DOORBELL select HAVE_CONTEXT_TRACKING + select HOTPLUG if SMP + select HOTPLUG_CPU if SMP default y config PPC_SPLPAR diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 0a13ecb..3cc2f91 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -54,7 +54,7 @@ static DEFINE_RAW_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS -#define distribute_irqs (!(mpic->flags & MPIC_SINGLE_DEST_CPU)) +#define distribute_irqs (1) #else #define distribute_irqs (0) #endif @@ -1703,7 +1703,7 @@ void mpic_setup_this_cpu(void) * it differently, then we should make sure we also change the default * values of irq_desc[].affinity in irq.c. */ - if (distribute_irqs) { + if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) { for (i = 0; i < mpic->num_sources ; i++) mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION), mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index b08ca7a..3f3f041 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2227,6 +2227,27 @@ static void srpt_close_ch(struct srpt_rdma_ch *ch) } /** + * srpt_shutdown_session() - Whether or not a session may be shut down. + */ +static int srpt_shutdown_session(struct se_session *se_sess) +{ + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; + unsigned long flags; + + spin_lock_irqsave(&ch->spinlock, flags); + if (ch->in_shutdown) { + spin_unlock_irqrestore(&ch->spinlock, flags); + return true; + } + + ch->in_shutdown = true; + target_sess_cmd_list_set_waiting(se_sess); + spin_unlock_irqrestore(&ch->spinlock, flags); + + return true; +} + +/** * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. * @cm_id: Pointer to the CM ID of the channel to be drained. * @@ -2264,6 +2285,9 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id) spin_unlock_irq(&sdev->spinlock); if (do_reset) { + if (ch->sess) + srpt_shutdown_session(ch->sess); + ret = srpt_ch_qp_err(ch); if (ret < 0) printk(KERN_ERR "Setting queue pair in error state" @@ -2328,7 +2352,7 @@ static void srpt_release_channel_work(struct work_struct *w) se_sess = ch->sess; BUG_ON(!se_sess); - target_wait_for_sess_cmds(se_sess, 0); + target_wait_for_sess_cmds(se_sess); transport_deregister_session_configfs(se_sess); transport_deregister_session(se_sess); @@ -3467,14 +3491,6 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) } /** - * srpt_shutdown_session() - Whether or not a session may be shut down. - */ -static int srpt_shutdown_session(struct se_session *se_sess) -{ - return true; -} - -/** * srpt_close_session() - Forcibly close a session. * * Callback function invoked by the TCM core to clean up sessions associated diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 4caf55c..3dae156 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -325,6 +325,7 @@ struct srpt_rdma_ch { u8 sess_name[36]; struct work_struct release_work; struct completion *release_done; + bool in_shutdown; }; /** diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index d182c96..7a3870f 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1370,7 +1370,7 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess) dump_stack(); return; } - target_wait_for_sess_cmds(se_sess, 0); + target_wait_for_sess_cmds(se_sess); transport_deregister_session_configfs(sess->se_sess); transport_deregister_session(sess->se_sess); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 262ef1f..d7705e5 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -651,7 +651,7 @@ static int iscsit_add_reject( cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { pr_err("Unable to allocate memory for cmd->buf_ptr\n"); - iscsit_release_cmd(cmd); + iscsit_free_cmd(cmd, false); return -1; } @@ -697,7 +697,7 @@ int iscsit_add_reject_from_cmd( cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { pr_err("Unable to allocate memory for cmd->buf_ptr\n"); - iscsit_release_cmd(cmd); + iscsit_free_cmd(cmd, false); return -1; } @@ -1743,7 +1743,7 @@ int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, return 0; out: if (cmd) - iscsit_release_cmd(cmd); + iscsit_free_cmd(cmd, false); ping_out: kfree(ping_data); return ret; @@ -2251,7 +2251,7 @@ iscsit_handle_logout_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) { pr_err("Received logout request on connection that" " is not in logged in state, ignoring request.\n"); - iscsit_release_cmd(cmd); + iscsit_free_cmd(cmd, false); return 0; } @@ -3665,7 +3665,7 @@ iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state list_del(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, false); break; case ISTATE_SEND_NOPIN_WANT_RESPONSE: iscsit_mod_nopin_response_timer(conn); @@ -4122,7 +4122,7 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) iscsit_increment_maxcmdsn(cmd, sess); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); } diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index ba6091b..45a5afd 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -143,7 +143,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_del(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock(&cr->conn_recovery_cmd_lock); } spin_unlock(&cr->conn_recovery_cmd_lock); @@ -165,7 +165,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_del(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock(&cr->conn_recovery_cmd_lock); } spin_unlock(&cr->conn_recovery_cmd_lock); @@ -248,7 +248,7 @@ void iscsit_discard_cr_cmds_by_expstatsn( iscsit_remove_cmd_from_connection_recovery(cmd, sess); spin_unlock(&cr->conn_recovery_cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock(&cr->conn_recovery_cmd_lock); } spin_unlock(&cr->conn_recovery_cmd_lock); @@ -302,7 +302,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) list_del(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); } spin_unlock_bh(&conn->cmd_lock); @@ -355,7 +355,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) list_del(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); continue; } @@ -375,7 +375,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) iscsi_sna_gte(cmd->cmd_sn, conn->sess->exp_cmd_sn)) { list_del(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - iscsit_free_cmd(cmd); + iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); continue; } diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index c2185fc..e382221 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -758,9 +758,9 @@ static int iscsi_add_notunderstood_response( } INIT_LIST_HEAD(&extra_response->er_list); - strncpy(extra_response->key, key, strlen(key) + 1); - strncpy(extra_response->value, NOTUNDERSTOOD, - strlen(NOTUNDERSTOOD) + 1); + strlcpy(extra_response->key, key, sizeof(extra_response->key)); + strlcpy(extra_response->value, NOTUNDERSTOOD, + sizeof(extra_response->value)); list_add_tail(&extra_response->er_list, ¶m_list->extra_response_list); @@ -1629,8 +1629,6 @@ int iscsi_decode_text_input( if (phase & PHASE_SECURITY) { if (iscsi_check_for_auth_key(key) > 0) { - char *tmpptr = key + strlen(key); - *tmpptr = '='; kfree(tmpbuf); return 1; } diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index 915b067..a47046a 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -1,8 +1,10 @@ #ifndef ISCSI_PARAMETERS_H #define ISCSI_PARAMETERS_H +#include <scsi/iscsi_proto.h> + struct iscsi_extra_response { - char key[64]; + char key[KEY_MAXLEN]; char value[32]; struct list_head er_list; } ____cacheline_aligned; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 2cc6c9a..08a3bac 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -676,40 +676,56 @@ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) void iscsit_release_cmd(struct iscsi_cmd *cmd) { - struct iscsi_conn *conn = cmd->conn; - - iscsit_free_r2ts_from_list(cmd); - iscsit_free_all_datain_reqs(cmd); - kfree(cmd->buf_ptr); kfree(cmd->pdu_list); kfree(cmd->seq_list); kfree(cmd->tmr_req); kfree(cmd->iov_data); - if (conn) { + kmem_cache_free(lio_cmd_cache, cmd); +} + +static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd, + bool check_queues) +{ + struct iscsi_conn *conn = cmd->conn; + + if (scsi_cmd) { + if (cmd->data_direction == DMA_TO_DEVICE) { + iscsit_stop_dataout_timer(cmd); + iscsit_free_r2ts_from_list(cmd); + } + if (cmd->data_direction == DMA_FROM_DEVICE) + iscsit_free_all_datain_reqs(cmd); + } + + if (conn && check_queues) { iscsit_remove_cmd_from_immediate_queue(cmd, conn); iscsit_remove_cmd_from_response_queue(cmd, conn); } - - kmem_cache_free(lio_cmd_cache, cmd); } -void iscsit_free_cmd(struct iscsi_cmd *cmd) +void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) { + struct se_cmd *se_cmd = NULL; + int rc; /* * Determine if a struct se_cmd is associated with * this struct iscsi_cmd. */ switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - if (cmd->data_direction == DMA_TO_DEVICE) - iscsit_stop_dataout_timer(cmd); + se_cmd = &cmd->se_cmd; + __iscsit_free_cmd(cmd, true, shutdown); /* * Fallthrough */ case ISCSI_OP_SCSI_TMFUNC: - transport_generic_free_cmd(&cmd->se_cmd, 1); + rc = transport_generic_free_cmd(&cmd->se_cmd, 1); + if (!rc && shutdown && se_cmd && se_cmd->se_sess) { + __iscsit_free_cmd(cmd, true, shutdown); + target_put_sess_cmd(se_cmd->se_sess, se_cmd); + } break; case ISCSI_OP_REJECT: /* @@ -718,11 +734,19 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd) * associated cmd->se_cmd needs to be released. */ if (cmd->se_cmd.se_tfo != NULL) { - transport_generic_free_cmd(&cmd->se_cmd, 1); + se_cmd = &cmd->se_cmd; + __iscsit_free_cmd(cmd, true, shutdown); + + rc = transport_generic_free_cmd(&cmd->se_cmd, 1); + if (!rc && shutdown && se_cmd->se_sess) { + __iscsit_free_cmd(cmd, true, shutdown); + target_put_sess_cmd(se_cmd->se_sess, se_cmd); + } break; } /* Fall-through */ default: + __iscsit_free_cmd(cmd, false, shutdown); cmd->release_cmd(cmd); break; } diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 4f8e01a..a442265 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -29,7 +29,7 @@ extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_co extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *); extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *); extern void iscsit_release_cmd(struct iscsi_cmd *); -extern void iscsit_free_cmd(struct iscsi_cmd *); +extern void iscsit_free_cmd(struct iscsi_cmd *, bool); extern int iscsit_check_session_usage_count(struct iscsi_session *); extern void iscsit_dec_session_usage_count(struct iscsi_session *); extern void iscsit_inc_session_usage_count(struct iscsi_session *); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 1b1d544..b11890d 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -153,6 +153,7 @@ static int fd_configure_device(struct se_device *dev) struct request_queue *q = bdev_get_queue(inode->i_bdev); unsigned long long dev_size; + fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev); /* * Determine the number of bytes from i_size_read() minus * one (1) logical sector from underlying struct block_device @@ -199,6 +200,7 @@ static int fd_configure_device(struct se_device *dev) goto fail; } + fd_dev->fd_block_size = FD_BLOCKSIZE; /* * Limit UNMAP emulation to 8k Number of LBAs (NoLB) */ @@ -217,9 +219,7 @@ static int fd_configure_device(struct se_device *dev) dev->dev_attrib.max_write_same_len = 0x1000; } - fd_dev->fd_block_size = dev->dev_attrib.hw_block_size; - - dev->dev_attrib.hw_block_size = FD_BLOCKSIZE; + dev->dev_attrib.hw_block_size = fd_dev->fd_block_size; dev->dev_attrib.hw_max_sectors = FD_MAX_SECTORS; dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH; @@ -694,11 +694,12 @@ static sector_t fd_get_blocks(struct se_device *dev) * to handle underlying block_device resize operations. */ if (S_ISBLK(i->i_mode)) - dev_size = (i_size_read(i) - fd_dev->fd_block_size); + dev_size = i_size_read(i); else dev_size = fd_dev->fd_dev_size; - return div_u64(dev_size, dev->dev_attrib.block_size); + return div_u64(dev_size - dev->dev_attrib.block_size, + dev->dev_attrib.block_size); } static struct sbc_ops fd_sbc_ops = { diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4a79336..21e3158 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -65,7 +65,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd); static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev); static int transport_generic_get_mem(struct se_cmd *cmd); -static void transport_put_cmd(struct se_cmd *cmd); +static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); int init_se_kmem_caches(void) @@ -221,6 +221,7 @@ struct se_session *transport_init_session(void) INIT_LIST_HEAD(&se_sess->sess_list); INIT_LIST_HEAD(&se_sess->sess_acl_list); INIT_LIST_HEAD(&se_sess->sess_cmd_list); + INIT_LIST_HEAD(&se_sess->sess_wait_list); spin_lock_init(&se_sess->sess_cmd_lock); kref_init(&se_sess->sess_kref); @@ -1943,7 +1944,7 @@ static inline void transport_free_pages(struct se_cmd *cmd) * This routine unconditionally frees a command, and reference counting * or list removal must be done in the caller. */ -static void transport_release_cmd(struct se_cmd *cmd) +static int transport_release_cmd(struct se_cmd *cmd) { BUG_ON(!cmd->se_tfo); @@ -1955,11 +1956,11 @@ static void transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - if (cmd->check_release != 0) { - target_put_sess_cmd(cmd->se_sess, cmd); - return; - } + if (cmd->check_release != 0) + return target_put_sess_cmd(cmd->se_sess, cmd); + cmd->se_tfo->release_cmd(cmd); + return 1; } /** @@ -1968,7 +1969,7 @@ static void transport_release_cmd(struct se_cmd *cmd) * * This routine releases our reference to the command and frees it if possible. */ -static void transport_put_cmd(struct se_cmd *cmd) +static int transport_put_cmd(struct se_cmd *cmd) { unsigned long flags; @@ -1976,7 +1977,7 @@ static void transport_put_cmd(struct se_cmd *cmd) if (atomic_read(&cmd->t_fe_count) && !atomic_dec_and_test(&cmd->t_fe_count)) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return; + return 0; } if (cmd->transport_state & CMD_T_DEV_ACTIVE) { @@ -1986,8 +1987,7 @@ static void transport_put_cmd(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); transport_free_pages(cmd); - transport_release_cmd(cmd); - return; + return transport_release_cmd(cmd); } void *transport_kmap_data_sg(struct se_cmd *cmd) @@ -2152,13 +2152,15 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } -void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) +int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { + int ret = 0; + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) transport_wait_for_tasks(cmd); - transport_release_cmd(cmd); + ret = transport_release_cmd(cmd); } else { if (wait_for_tasks) transport_wait_for_tasks(cmd); @@ -2166,8 +2168,9 @@ void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) if (cmd->se_lun) transport_lun_remove_cmd(cmd); - transport_put_cmd(cmd); + ret = transport_put_cmd(cmd); } + return ret; } EXPORT_SYMBOL(transport_generic_free_cmd); @@ -2250,11 +2253,14 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) unsigned long flags; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); - - WARN_ON(se_sess->sess_tearing_down); + if (se_sess->sess_tearing_down) { + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + return; + } se_sess->sess_tearing_down = 1; + list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) se_cmd->cmd_wait_set = 1; spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); @@ -2263,44 +2269,32 @@ EXPORT_SYMBOL(target_sess_cmd_list_set_waiting); /* target_wait_for_sess_cmds - Wait for outstanding descriptors * @se_sess: session to wait for active I/O - * @wait_for_tasks: Make extra transport_wait_for_tasks call */ -void target_wait_for_sess_cmds( - struct se_session *se_sess, - int wait_for_tasks) +void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; - bool rc = false; + unsigned long flags; list_for_each_entry_safe(se_cmd, tmp_cmd, - &se_sess->sess_cmd_list, se_cmd_list) { + &se_sess->sess_wait_list, se_cmd_list) { list_del(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); - if (wait_for_tasks) { - pr_debug("Calling transport_wait_for_tasks se_cmd: %p t_state: %d," - " fabric state: %d\n", se_cmd, se_cmd->t_state, - se_cmd->se_tfo->get_cmd_state(se_cmd)); - - rc = transport_wait_for_tasks(se_cmd); - - pr_debug("After transport_wait_for_tasks se_cmd: %p t_state: %d," - " fabric state: %d\n", se_cmd, se_cmd->t_state, - se_cmd->se_tfo->get_cmd_state(se_cmd)); - } - - if (!rc) { - wait_for_completion(&se_cmd->cmd_wait_comp); - pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" - " fabric state: %d\n", se_cmd, se_cmd->t_state, - se_cmd->se_tfo->get_cmd_state(se_cmd)); - } + wait_for_completion(&se_cmd->cmd_wait_comp); + pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" + " fabric state: %d\n", se_cmd, se_cmd->t_state, + se_cmd->se_tfo->get_cmd_state(se_cmd)); se_cmd->se_tfo->release_cmd(se_cmd); } + + spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); + WARN_ON(!list_empty(&se_sess->sess_cmd_list)); + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + } EXPORT_SYMBOL(target_wait_for_sess_cmds); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e773dfa..4ea4f98 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -543,6 +543,7 @@ struct se_session { struct list_head sess_list; struct list_head sess_acl_list; struct list_head sess_cmd_list; + struct list_head sess_wait_list; spinlock_t sess_cmd_lock; struct kref sess_kref; }; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index ba3471b..1dcce9c 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -114,7 +114,7 @@ sense_reason_t transport_generic_new_cmd(struct se_cmd *); void target_execute_cmd(struct se_cmd *cmd); -void transport_generic_free_cmd(struct se_cmd *, int); +int transport_generic_free_cmd(struct se_cmd *, int); bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); @@ -123,7 +123,7 @@ int transport_send_check_condition_and_sense(struct se_cmd *, int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); int target_put_sess_cmd(struct se_session *, struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); -void target_wait_for_sess_cmds(struct se_session *, int); +void target_wait_for_sess_cmds(struct se_session *); int core_alua_check_nonop_delay(struct se_cmd *); |