diff options
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r-- | drivers/misc/sgi-gru/gru.h | 11 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gru_instructions.h | 144 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 311 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 290 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.c | 70 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.h | 37 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukdump.c | 13 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.c | 211 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.h | 14 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grulib.h | 21 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grumain.c | 228 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruprocfs.c | 42 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 75 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 14 |
14 files changed, 1045 insertions, 436 deletions
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index f93f03a..3ad76cd 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -53,6 +53,17 @@ struct gru_chiplet_info { int free_user_cbr; }; +/* + * Statictics kept for each context. + */ +struct gru_gseg_statistics { + unsigned long fmm_tlbmiss; + unsigned long upm_tlbmiss; + unsigned long tlbdropin; + unsigned long context_stolen; + unsigned long reserved[10]; +}; + /* Flags for GRU options on the gru_create_context() call */ /* Select one of the follow 4 options to specify how TLB misses are handled */ #define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 3c9c066..d95587c 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -34,17 +34,17 @@ extern void gru_wait_abort_proc(void *cb); #include <asm/intrinsics.h> #define __flush_cache(p) ia64_fc((unsigned long)p) /* Use volatile on IA64 to ensure ordering via st4.rel */ -#define gru_ordered_store_int(p, v) \ +#define gru_ordered_store_ulong(p, v) \ do { \ barrier(); \ - *((volatile int *)(p)) = v; /* force st.rel */ \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ } while (0) #elif defined(CONFIG_X86_64) #define __flush_cache(p) clflush(p) -#define gru_ordered_store_int(p, v) \ +#define gru_ordered_store_ulong(p, v) \ do { \ barrier(); \ - *(int *)p = v; \ + *(unsigned long *)p = v; \ } while (0) #else #error "Unsupported architecture" @@ -129,8 +129,13 @@ struct gru_instruction_bits { */ struct gru_instruction { /* DW 0 */ - unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ - unsigned int tri0; + union { + unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ + struct { + unsigned int op32; + unsigned int tri0; + }; + }; unsigned long tri1_bufsize; /* DW 1 */ unsigned long baddr0; /* DW 2 */ unsigned long nelem; /* DW 3 */ @@ -140,7 +145,7 @@ struct gru_instruction { unsigned long avalue; /* DW 7 */ }; -/* Some shifts and masks for the low 32 bits of a GRU command */ +/* Some shifts and masks for the low 64 bits of a GRU command */ #define GRU_CB_ICMD_SHFT 0 #define GRU_CB_ICMD_MASK 0x1 #define GRU_CB_XTYPE_SHFT 8 @@ -155,6 +160,10 @@ struct gru_instruction { #define GRU_CB_OPC_MASK 0xff #define GRU_CB_EXOPC_SHFT 24 #define GRU_CB_EXOPC_MASK 0xff +#define GRU_IDEF2_SHFT 32 +#define GRU_IDEF2_MASK 0x3ffff +#define GRU_ISTATUS_SHFT 56 +#define GRU_ISTATUS_MASK 0x3 /* GRU instruction opcodes (opc field) */ #define OP_NOP 0x00 @@ -256,6 +265,7 @@ struct gru_instruction { #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) #define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) #define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) +#define CBE_CAUSE_FORCED_ERROR (1 << 19) /* CBE cbrexecstatus bits */ #define CBR_EXS_ABORT_OCC_BIT 0 @@ -264,13 +274,15 @@ struct gru_instruction { #define CBR_EXS_QUEUED_BIT 3 #define CBR_EXS_TLB_INVAL_BIT 4 #define CBR_EXS_EXCEPTION_BIT 5 +#define CBR_EXS_CB_INT_PENDING_BIT 6 #define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) #define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) #define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) #define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) -#define CBR_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) #define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) +#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) /* * Exceptions are retried for the following cases. If any OTHER bits are set @@ -296,12 +308,14 @@ union gru_mesqhead { /* Generate the low word of a GRU instruction */ -static inline unsigned int -__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, +static inline unsigned long +__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, unsigned char iaa0, unsigned char iaa1, - unsigned char ima) + unsigned long idef2, unsigned char ima) { return (1 << GRU_CB_ICMD_SHFT) | + ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | + (idef2<< GRU_IDEF2_SHFT) | (iaa0 << GRU_CB_IAA0_SHFT) | (iaa1 << GRU_CB_IAA1_SHFT) | (ima << GRU_CB_IMA_SHFT) | @@ -319,12 +333,13 @@ static inline void gru_flush_cache(void *p) } /* - * Store the lower 32 bits of the command including the "start" bit. Then + * Store the lower 64 bits of the command including the "start" bit. Then * start the instruction executing. */ -static inline void gru_start_instruction(struct gru_instruction *ins, int op32) +static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) { - gru_ordered_store_int(ins, op32); + gru_ordered_store_ulong(ins, op64); + mb(); gru_flush_cache(ins); } @@ -340,6 +355,30 @@ static inline void gru_start_instruction(struct gru_instruction *ins, int op32) * - nelem and stride are in elements * - tri0/tri1 is in bytes for the beginning of the data segment. */ +static inline void gru_vload_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + static inline void gru_vload(void *cb, unsigned long mem_addr, unsigned int tri0, unsigned char xtype, unsigned long nelem, unsigned long stride, unsigned long hints) @@ -348,10 +387,9 @@ static inline void gru_vload(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + (unsigned long)tri0, CB_IMA(hints))); } static inline void gru_vstore(void *cb, unsigned long mem_addr, @@ -362,10 +400,9 @@ static inline void gru_vstore(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_ivload(void *cb, unsigned long mem_addr, @@ -376,10 +413,9 @@ static inline void gru_ivload(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_ivstore(void *cb, unsigned long mem_addr, @@ -390,10 +426,9 @@ static inline void gru_ivstore(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_vset(void *cb, unsigned long mem_addr, @@ -406,8 +441,8 @@ static inline void gru_vset(void *cb, unsigned long mem_addr, ins->op2_value_baddr1 = value; ins->nelem = nelem; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_ivset(void *cb, unsigned long mem_addr, @@ -420,8 +455,8 @@ static inline void gru_ivset(void *cb, unsigned long mem_addr, ins->op2_value_baddr1 = value; ins->nelem = nelem; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_vflush(void *cb, unsigned long mem_addr, @@ -433,15 +468,15 @@ static inline void gru_vflush(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->op1_stride = stride; ins->nelem = nelem; - gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_nop(void *cb, int hints) { struct gru_instruction *ins = (void *)cb; - gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); } @@ -455,10 +490,9 @@ static inline void gru_bcopy(void *cb, const unsigned long src, ins->baddr0 = (long)src; ins->op2_value_baddr1 = (long)dest; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = bufsize; - gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM, - IAA_RAM, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, tri0, CB_IMA(hints))); } static inline void gru_bstore(void *cb, const unsigned long src, @@ -470,9 +504,8 @@ static inline void gru_bstore(void *cb, const unsigned long src, ins->baddr0 = (long)src; ins->op2_value_baddr1 = (long)dest; ins->nelem = nelem; - ins->tri0 = tri0; - gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + tri0, CB_IMA(hints))); } static inline void gru_gamir(void *cb, int exopc, unsigned long src, @@ -481,8 +514,8 @@ static inline void gru_gamir(void *cb, int exopc, unsigned long src, struct gru_instruction *ins = (void *)cb; ins->baddr0 = (long)src; - gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamirr(void *cb, int exopc, unsigned long src, @@ -491,8 +524,8 @@ static inline void gru_gamirr(void *cb, int exopc, unsigned long src, struct gru_instruction *ins = (void *)cb; ins->baddr0 = (long)src; - gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamer(void *cb, int exopc, unsigned long src, @@ -505,8 +538,8 @@ static inline void gru_gamer(void *cb, int exopc, unsigned long src, ins->baddr0 = (long)src; ins->op1_stride = operand1; ins->op2_value_baddr1 = operand2; - gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamerr(void *cb, int exopc, unsigned long src, @@ -518,8 +551,8 @@ static inline void gru_gamerr(void *cb, int exopc, unsigned long src, ins->baddr0 = (long)src; ins->op1_stride = operand1; ins->op2_value_baddr1 = operand2; - gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamxr(void *cb, unsigned long src, @@ -529,8 +562,8 @@ static inline void gru_gamxr(void *cb, unsigned long src, ins->baddr0 = (long)src; ins->nelem = 4; - gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, - IAA_RAM, 0, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, 0, CB_IMA(hints))); } static inline void gru_mesq(void *cb, unsigned long queue, @@ -541,9 +574,8 @@ static inline void gru_mesq(void *cb, unsigned long queue, ins->baddr0 = (long)queue; ins->nelem = nelem; - ins->tri0 = tri0; - gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline unsigned long gru_get_amo_value(void *cb) @@ -662,6 +694,14 @@ static inline void gru_wait_abort(void *cb) gru_wait_abort_proc(cb); } +/* + * Get a pointer to the start of a gseg + * p - Any valid pointer within the gseg + */ +static inline void *gru_get_gseg_pointer (void *p) +{ + return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); +} /* * Get a pointer to a control block diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 679e017..38657cd 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -40,6 +40,12 @@ #include "gru_instructions.h" #include <asm/uv/uv_hub.h> +/* Return codes for vtop functions */ +#define VTOP_SUCCESS 0 +#define VTOP_INVALID -1 +#define VTOP_RETRY -2 + + /* * Test if a physical address is a valid GRU GSEG address */ @@ -90,19 +96,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct gru_thread_state *gts = NULL; + struct gru_thread_state *gts = ERR_PTR(-EINVAL); down_write(&mm->mmap_sem); vma = gru_find_vma(vaddr); - if (vma) - gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); - if (gts) { - mutex_lock(>s->ts_ctxlock); - downgrade_write(&mm->mmap_sem); - } else { - up_write(&mm->mmap_sem); - } + if (!vma) + goto err; + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (IS_ERR(gts)) + goto err; + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + return gts; + +err: + up_write(&mm->mmap_sem); return gts; } @@ -122,39 +131,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts) * is necessary to prevent the user from seeing a stale cb.istatus that will * change as soon as the TFH restart is complete. Races may cause an * occasional failure to clear the cb.istatus, but that is ok. - * - * If the cb address is not valid (should not happen, but...), nothing - * bad will happen.. The get_user()/put_user() will fail but there - * are no bad side-effects. */ -static void gru_cb_set_istatus_active(unsigned long __user *cb) +static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) { - union { - struct gru_instruction_bits bits; - unsigned long dw; - } u; - - if (cb) { - get_user(u.dw, cb); - u.bits.istatus = CBS_ACTIVE; - put_user(u.dw, cb); + if (cbk) { + cbk->istatus = CBS_ACTIVE; } } /* - * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the - * interrupt. Interrupts are always sent to a cpu on the blade that contains the - * GRU (except for headless blades which are not currently supported). A blade - * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ - * number uniquely identifies the GRU chiplet on the local blade that caused the - * interrupt. Always called in interrupt context. - */ -static inline struct gru_state *irq_to_gru(int irq) -{ - return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; -} - -/* * Read & clear a TFM * * The GRU has an array of fault maps. A map is private to a cpu @@ -207,10 +192,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, { struct page *page; - /* ZZZ Need to handle HUGE pages */ - if (is_vm_hugetlb_page(vma)) - return -EFAULT; +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else *pageshift = PAGE_SHIFT; +#endif if (get_user_pages (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) return -EFAULT; @@ -268,7 +254,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, return 0; err: - local_irq_enable(); return 1; } @@ -301,14 +286,69 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, paddr = paddr & ~((1UL << ps) - 1); *gpa = uv_soc_phys_ram_to_gpa(paddr); *pageshift = ps; - return 0; + return VTOP_SUCCESS; inval: - return -1; + return VTOP_INVALID; upm: - return -2; + return VTOP_RETRY; +} + + +/* + * Flush a CBE from cache. The CBE is clean in the cache. Dirty the + * CBE cacheline so that the line will be written back to home agent. + * Otherwise the line may be silently dropped. This has no impact + * except on performance. + */ +static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) +{ + if (unlikely(cbe)) { + cbe->cbrexecstatus = 0; /* make CL dirty */ + gru_flush_cache(cbe); + } } +/* + * Preload the TLB with entries that may be required. Currently, preloading + * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to + * the end of the bcopy tranfer, whichever is smaller. + */ +static void gru_preload_tlb(struct gru_state *gru, + struct gru_thread_state *gts, int atomic, + unsigned long fault_vaddr, int asid, int write, + unsigned char tlb_preload_count, + struct gru_tlb_fault_handle *tfh, + struct gru_control_block_extended *cbe) +{ + unsigned long vaddr = 0, gpa; + int ret, pageshift; + + if (cbe->opccpy != OP_BCOPY) + return; + + if (fault_vaddr == cbe->cbe_baddr0) + vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; + else if (fault_vaddr == cbe->cbe_baddr1) + vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; + + fault_vaddr &= PAGE_MASK; + vaddr &= PAGE_MASK; + vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); + + while (vaddr > fault_vaddr) { + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift))) + return; + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, + vaddr, asid, write, pageshift, gpa); + vaddr -= PAGE_SIZE; + STAT(tlb_preload_page); + } +} /* * Drop a TLB entry into the GRU. The fault is described by info in an TFH. @@ -320,11 +360,14 @@ upm: * < 0 = error code * */ -static int gru_try_dropin(struct gru_thread_state *gts, +static int gru_try_dropin(struct gru_state *gru, + struct gru_thread_state *gts, struct gru_tlb_fault_handle *tfh, - unsigned long __user *cb) + struct gru_instruction_bits *cbk) { - int pageshift = 0, asid, write, ret, atomic = !cb; + struct gru_control_block_extended *cbe = NULL; + unsigned char tlb_preload_count = gts->ts_tlb_preload_count; + int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; unsigned long gpa = 0, vaddr = 0; /* @@ -335,24 +378,34 @@ static int gru_try_dropin(struct gru_thread_state *gts, */ /* + * Prefetch the CBE if doing TLB preloading + */ + if (unlikely(tlb_preload_count)) { + cbe = gru_tfh_to_cbe(tfh); + prefetchw(cbe); + } + + /* * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. * Might be a hardware race OR a stupid user. Ignore FMM because FMM * is a transient state. */ if (tfh->status != TFHSTATUS_EXCEPTION) { gru_flush_cache(tfh); + sync_core(); if (tfh->status != TFHSTATUS_EXCEPTION) goto failnoexception; STAT(tfh_stale_on_fault); } if (tfh->state == TFHSTATE_IDLE) goto failidle; - if (tfh->state == TFHSTATE_MISS_FMM && cb) + if (tfh->state == TFHSTATE_MISS_FMM && cbk) goto failfmm; write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; vaddr = tfh->missvaddr; asid = tfh->missasid; + indexway = tfh->indexway; if (asid == 0) goto failnoasid; @@ -366,41 +419,51 @@ static int gru_try_dropin(struct gru_thread_state *gts, goto failactive; ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); - if (ret == -1) + if (ret == VTOP_INVALID) goto failinval; - if (ret == -2) + if (ret == VTOP_RETRY) goto failupm; if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); - if (atomic || !gru_update_cch(gts, 0)) { + if (atomic || !gru_update_cch(gts)) { gts->ts_force_cch_reload = 1; goto failupm; } } - gru_cb_set_istatus_active(cb); + + if (unlikely(cbe) && pageshift == PAGE_SHIFT) { + gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); + gru_flush_cache_cbe(cbe); + } + + gru_cb_set_istatus_active(cbk); + gts->ustats.tlbdropin++; tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, GRU_PAGESIZE(pageshift)); - STAT(tlb_dropin); gru_dbg(grudev, - "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", - ret ? "non-atomic" : "atomic", tfh, vaddr, asid, - pageshift, gpa); + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," + " rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, + indexway, write, pageshift, gpa); + STAT(tlb_dropin); return 0; failnoasid: /* No asid (delayed unload). */ STAT(tlb_dropin_fail_no_asid); gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); - if (!cb) + if (!cbk) tfh_user_polling_mode(tfh); else gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); return -EAGAIN; failupm: /* Atomic failure switch CBR to UPM */ tfh_user_polling_mode(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_upm); gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); return 1; @@ -408,6 +471,7 @@ failupm: failfmm: /* FMM state on UPM call */ gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_fmm); gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; @@ -415,17 +479,20 @@ failfmm: failnoexception: /* TFH status did not show exception pending */ gru_flush_cache(tfh); - if (cb) - gru_flush_cache(cb); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); STAT(tlb_dropin_fail_no_exception); - gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", + tfh, tfh->status, tfh->state); return 0; failidle: /* TFH state was idle - no miss pending */ gru_flush_cache(tfh); - if (cb) - gru_flush_cache(cb); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); STAT(tlb_dropin_fail_idle); gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; @@ -433,16 +500,18 @@ failidle: failinval: /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ tfh_exception(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_invalid); gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); return -EFAULT; failactive: /* Range invalidate active. Switch to UPM iff atomic */ - if (!cb) + if (!cbk) tfh_user_polling_mode(tfh); else gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_range_active); gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", tfh, vaddr); @@ -455,31 +524,41 @@ failactive: * Note that this is the interrupt handler that is registered with linux * interrupt handlers. */ -irqreturn_t gru_intr(int irq, void *dev_id) +static irqreturn_t gru_intr(int chiplet, int blade) { struct gru_state *gru; struct gru_tlb_fault_map imap, dmap; struct gru_thread_state *gts; struct gru_tlb_fault_handle *tfh = NULL; + struct completion *cmp; int cbrnum, ctxnum; STAT(intr); - gru = irq_to_gru(irq); + gru = &gru_base[blade]->bs_grus[chiplet]; if (!gru) { - dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", - raw_smp_processor_id(), irq); + dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", + raw_smp_processor_id(), chiplet); return IRQ_NONE; } get_clear_fault_map(gru, &imap, &dmap); + gru_dbg(grudev, + "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", + smp_processor_id(), chiplet, gru->gs_gid, + imap.fault_bits[0], imap.fault_bits[1], + dmap.fault_bits[0], dmap.fault_bits[1]); for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { - complete(gru->gs_blade->bs_async_wq); + STAT(intr_cbr); + cmp = gru->gs_blade->bs_async_wq; + if (cmp) + complete(cmp); gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", - gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done); + gru->gs_gid, cbrnum, cmp ? cmp->done : -1); } for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { + STAT(intr_tfh); tfh = get_tfh_by_index(gru, cbrnum); prefetchw(tfh); /* Helps on hdw, required for emulator */ @@ -492,14 +571,20 @@ irqreturn_t gru_intr(int irq, void *dev_id) ctxnum = tfh->ctxnum; gts = gru->gs_gts[ctxnum]; + /* Spurious interrupts can cause this. Ignore. */ + if (!gts) { + STAT(intr_spurious); + continue; + } + /* * This is running in interrupt context. Trylock the mmap_sem. * If it fails, retry the fault in user context. */ + gts->ustats.fmm_tlbmiss++; if (!gts->ts_force_cch_reload && down_read_trylock(>s->ts_mm->mmap_sem)) { - gts->ustats.fmm_tlbdropin++; - gru_try_dropin(gts, tfh, NULL); + gru_try_dropin(gru, gts, tfh, NULL); up_read(>s->ts_mm->mmap_sem); } else { tfh_user_polling_mode(tfh); @@ -509,20 +594,43 @@ irqreturn_t gru_intr(int irq, void *dev_id) return IRQ_HANDLED; } +irqreturn_t gru0_intr(int irq, void *dev_id) +{ + return gru_intr(0, uv_numa_blade_id()); +} + +irqreturn_t gru1_intr(int irq, void *dev_id) +{ + return gru_intr(1, uv_numa_blade_id()); +} + +irqreturn_t gru_intr_mblade(int irq, void *dev_id) +{ + int blade; + + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_intr(0, blade); + gru_intr(1, blade); + } + return IRQ_HANDLED; +} + static int gru_user_dropin(struct gru_thread_state *gts, struct gru_tlb_fault_handle *tfh, - unsigned long __user *cb) + void *cb) { struct gru_mm_struct *gms = gts->ts_gms; int ret; - gts->ustats.upm_tlbdropin++; + gts->ustats.upm_tlbmiss++; while (1) { wait_event(gms->ms_wait_queue, atomic_read(&gms->ms_range_active) == 0); prefetchw(tfh); /* Helps on hdw, required for emulator */ - ret = gru_try_dropin(gts, tfh, cb); + ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); if (ret <= 0) return ret; STAT(call_os_wait_queue); @@ -538,52 +646,41 @@ int gru_handle_user_call_os(unsigned long cb) { struct gru_tlb_fault_handle *tfh; struct gru_thread_state *gts; - unsigned long __user *cbp; + void *cbk; int ucbnum, cbrnum, ret = -EINVAL; STAT(call_os); - gru_dbg(grudev, "address 0x%lx\n", cb); /* sanity check the cb pointer */ ucbnum = get_cb_number((void *)cb); if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL; - cbp = (unsigned long *)cb; gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - /* - * If force_unload is set, the UPM TLB fault is phony. The task - * has migrated to another node and the GSEG must be moved. Just - * unload the context. The task will page fault and assign a new - * context. - */ - if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && - gts->ts_blade != uv_numa_blade_id()) { - STAT(call_os_offnode_reference); - gts->ts_force_unload = 1; - } + gru_check_context_placement(gts); /* * CCH may contain stale data if ts_force_cch_reload is set. */ if (gts->ts_gru && gts->ts_force_cch_reload) { gts->ts_force_cch_reload = 0; - gru_update_cch(gts, 0); + gru_update_cch(gts); } ret = -EAGAIN; cbrnum = thread_cbr_number(gts, ucbnum); - if (gts->ts_force_unload) { - gru_unload_context(gts, 1); - } else if (gts->ts_gru) { + if (gts->ts_gru) { tfh = get_tfh_by_index(gts->ts_gru, cbrnum); - ret = gru_user_dropin(gts, tfh, cbp); + cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, + gts->ts_ctxnum, ucbnum); + ret = gru_user_dropin(gts, tfh, cbk); } exit: gru_unlock_gts(gts); @@ -605,11 +702,11 @@ int gru_get_exception_detail(unsigned long arg) if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) return -EFAULT; - gru_dbg(grudev, "address 0x%lx\n", excdet.cb); gts = gru_find_lock_gts(excdet.cb); if (!gts) return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); ucbnum = get_cb_number((void *)excdet.cb); if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { ret = -EINVAL; @@ -617,6 +714,7 @@ int gru_get_exception_detail(unsigned long arg) cbrnum = thread_cbr_number(gts, ucbnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum); gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); /* make sure we are have current data */ excdet.opc = cbe->opccpy; excdet.exopc = cbe->exopccpy; excdet.ecause = cbe->ecause; @@ -624,7 +722,7 @@ int gru_get_exception_detail(unsigned long arg) excdet.exceptdet1 = cbe->idef3upd; excdet.cbrstate = cbe->cbrstate; excdet.cbrexecstatus = cbe->cbrexecstatus; - gru_flush_cache(cbe); + gru_flush_cache_cbe(cbe); ret = 0; } else { ret = -EAGAIN; @@ -733,6 +831,11 @@ long gru_get_gseg_statistics(unsigned long arg) if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT; + /* + * The library creates arrays of contexts for threaded programs. + * If no gts exists in the array, the context has never been used & all + * statistics are implicitly 0. + */ gts = gru_find_lock_gts(req.gseg); if (gts) { memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); @@ -762,11 +865,25 @@ int gru_set_context_option(unsigned long arg) return -EFAULT; gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); - gts = gru_alloc_locked_gts(req.gseg); - if (!gts) - return -EINVAL; + gts = gru_find_lock_gts(req.gseg); + if (!gts) { + gts = gru_alloc_locked_gts(req.gseg); + if (IS_ERR(gts)) + return PTR_ERR(gts); + } switch (req.op) { + case sco_blade_chiplet: + /* Select blade/chiplet for GRU context */ + if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || + req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { + ret = -EINVAL; + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; + gru_check_context_placement(gts); + } + break; case sco_gseg_owner: /* Register the current task as the GSEG owner */ gts->ts_tgid_owner = current->tgid; diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index ce5eda9..cb3b4d2 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -35,6 +35,9 @@ #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/uaccess.h> +#ifdef CONFIG_X86_64 +#include <asm/uv/uv_irq.h> +#endif #include <asm/uv/uv.h> #include "gru.h" #include "grulib.h" @@ -130,7 +133,6 @@ static int gru_create_new_context(unsigned long arg) struct gru_vma_data *vdata; int ret = -EINVAL; - if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT; @@ -150,6 +152,7 @@ static int gru_create_new_context(unsigned long arg) vdata->vd_dsr_au_count = GRU_DS_BYTES_TO_AU(req.data_segment_bytes); vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + vdata->vd_tlb_preload_count = req.tlb_preload_count; ret = 0; } up_write(¤t->mm->mmap_sem); @@ -190,7 +193,7 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, { int err = -EBADRQC; - gru_dbg(grudev, "file %p\n", file); + gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); switch (req) { case GRU_CREATE_CONTEXT: @@ -232,23 +235,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, * system. */ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, - void *vaddr, int nid, int bid, int grunum) + void *vaddr, int blade_id, int chiplet_id) { spin_lock_init(&gru->gs_lock); spin_lock_init(&gru->gs_asid_lock); gru->gs_gru_base_paddr = paddr; gru->gs_gru_base_vaddr = vaddr; - gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum; - gru->gs_blade = gru_base[bid]; - gru->gs_blade_id = bid; + gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; + gru->gs_blade = gru_base[blade_id]; + gru->gs_blade_id = blade_id; + gru->gs_chiplet_id = chiplet_id; gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; gru->gs_asid_limit = MAX_ASID; gru_tgh_flush_init(gru); if (gru->gs_gid >= gru_max_gids) gru_max_gids = gru->gs_gid + 1; - gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n", - bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, + gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", + blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, gru->gs_gru_base_paddr); } @@ -264,12 +268,10 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) max_user_cbrs = GRU_NUM_CB; max_user_dsr_bytes = GRU_NUM_DSR_BYTES; - for_each_online_node(nid) { - bid = uv_node_to_blade_id(nid); - pnode = uv_node_to_pnode(nid); - if (bid < 0 || gru_base[bid]) - continue; - page = alloc_pages_exact_node(nid, GFP_KERNEL, order); + for_each_possible_blade(bid) { + pnode = uv_blade_to_pnode(bid); + nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ + page = alloc_pages_node(nid, GFP_KERNEL, order); if (!page) goto fail; gru_base[bid] = page_address(page); @@ -285,7 +287,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) chip++, gru++) { paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); - gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, chip); n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; cbrs = max(cbrs, n); n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; @@ -298,39 +300,215 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) return 0; fail: - for (nid--; nid >= 0; nid--) - free_pages((unsigned long)gru_base[nid], order); + for (bid--; bid >= 0; bid--) + free_pages((unsigned long)gru_base[bid], order); return -ENOMEM; } -#ifdef CONFIG_IA64 +static void gru_free_tables(void) +{ + int bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); -static int get_base_irq(void) + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); +} + +static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) { - return IRQ_GRU; + unsigned long mmr = 0; + int core; + + /* + * We target the cores of a blade and not the hyperthreads themselves. + * There is a max of 8 cores per socket and 2 sockets per blade, + * making for a max total of 16 cores (i.e., 16 CPUs without + * hyperthreading and 32 CPUs with hyperthreading). + */ + core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) + return 0; + + if (chiplet == 0) { + mmr = UVH_GR0_TLB_INT0_CONFIG + + core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); + } else if (chiplet == 1) { + mmr = UVH_GR1_TLB_INT0_CONFIG + + core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); + } else { + BUG(); + } + + *corep = core; + return mmr; } -#elif defined CONFIG_X86_64 +#ifdef CONFIG_IA64 -static void noop(unsigned int irq) +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(unsigned int irq) { } -static struct irq_chip gru_chip = { - .name = "gru", - .mask = noop, - .unmask = noop, - .ack = noop, +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .mask = gru_noop, + .unmask = gru_noop, + .ack = gru_noop + } }; -static int get_base_irq(void) +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = set_irq_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq, core; + int ret; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); + if (irq < 0) { + printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -irq); + return irq; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + uv_teardown_irq(irq); + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) { - set_irq_chip(IRQ_GRU, &gru_chip); - set_irq_chip(IRQ_GRU + 1, &gru_chip); - return IRQ_GRU; + int irq, core; + unsigned long mmr; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr) { + irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; + if (irq) { + free_irq(irq, NULL); + uv_teardown_irq(irq); + } + } } + #endif +static void gru_teardown_tlb_irqs(void) +{ + int blade; + int cpu; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + gru_chiplet_teardown_tlb_irq(0, cpu, blade); + gru_chiplet_teardown_tlb_irq(1, cpu, blade); + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_chiplet_teardown_tlb_irq(0, 0, blade); + gru_chiplet_teardown_tlb_irq(1, 0, blade); + } +} + +static int gru_setup_tlb_irqs(void) +{ + int blade; + int cpu; + int ret; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); + if (ret != 0) + goto exit1; + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + } + + return 0; + +exit1: + gru_teardown_tlb_irqs(); + return ret; +} + /* * gru_init * @@ -338,8 +516,7 @@ static int get_base_irq(void) */ static int __init gru_init(void) { - int ret, irq, chip; - char id[10]; + int ret; if (!is_uv_system()) return 0; @@ -354,41 +531,29 @@ static int __init gru_init(void) gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", gru_start_paddr, gru_end_paddr); - irq = get_base_irq(); - for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { - ret = request_irq(irq + chip, gru_intr, 0, id, NULL); - /* TODO: fix irq handling on x86. For now ignore failure because - * interrupts are not required & not yet fully supported */ - if (ret) { - printk(KERN_WARNING - "!!!WARNING: GRU ignoring request failure!!!\n"); - ret = 0; - } - if (ret) { - printk(KERN_ERR "%s: request_irq failed\n", - GRU_DRIVER_ID_STR); - goto exit1; - } - } - ret = misc_register(&gru_miscdev); if (ret) { printk(KERN_ERR "%s: misc_register failed\n", GRU_DRIVER_ID_STR); - goto exit1; + goto exit0; } ret = gru_proc_init(); if (ret) { printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); - goto exit2; + goto exit1; } ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); if (ret) { printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); - goto exit3; + goto exit2; } + + ret = gru_setup_tlb_irqs(); + if (ret != 0) + goto exit3; + gru_kservices_init(); printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, @@ -396,31 +561,24 @@ static int __init gru_init(void) return 0; exit3: - gru_proc_exit(); + gru_free_tables(); exit2: - misc_deregister(&gru_miscdev); + gru_proc_exit(); exit1: - for (--chip; chip >= 0; chip--) - free_irq(irq + chip, NULL); + misc_deregister(&gru_miscdev); +exit0: return ret; } static void __exit gru_exit(void) { - int i, bid; - int order = get_order(sizeof(struct gru_state) * - GRU_CHIPLETS_PER_BLADE); - if (!is_uv_system()) return; - for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) - free_irq(IRQ_GRU + i, NULL); + gru_teardown_tlb_irqs(); gru_kservices_exit(); - for (bid = 0; bid < GRU_MAX_BLADES; bid++) - free_pages((unsigned long)gru_base[bid], order); - + gru_free_tables(); misc_deregister(&gru_miscdev); gru_proc_exit(); } diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 37e7cfc..2f30bad 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -27,9 +27,11 @@ #ifdef CONFIG_IA64 #include <asm/processor.h> #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) #else #include <asm/tsc.h> #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) #endif /* Extract the status field from a kernel handle */ @@ -39,21 +41,39 @@ struct mcs_op_statistic mcs_op_statistics[mcsop_last]; static void update_mcs_stats(enum mcs_op op, unsigned long clks) { + unsigned long nsec; + + nsec = CLKS2NSEC(clks); atomic_long_inc(&mcs_op_statistics[op].count); - atomic_long_add(clks, &mcs_op_statistics[op].total); - if (mcs_op_statistics[op].max < clks) - mcs_op_statistics[op].max = clks; + atomic_long_add(nsec, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < nsec) + mcs_op_statistics[op].max = nsec; } static void start_instruction(void *h) { unsigned long *w0 = h; - wmb(); /* setting CMD bit must be last */ - *w0 = *w0 | 1; + wmb(); /* setting CMD/STATUS bits must be last */ + *w0 = *w0 | 0x20001; gru_flush_cache(h); } +static void report_instruction_timeout(void *h) +{ + unsigned long goff = GSEGPOFF((unsigned long)h); + char *id = "???"; + + if (TYPE_IS(CCH, goff)) + id = "CCH"; + else if (TYPE_IS(TGH, goff)) + id = "TGH"; + else if (TYPE_IS(TFH, goff)) + id = "TFH"; + + panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); +} + static int wait_instruction_complete(void *h, enum mcs_op opc) { int status; @@ -64,9 +84,10 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) status = GET_MSEG_HANDLE_STATUS(h); if (status != CCHSTATUS_ACTIVE) break; - if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) - panic("GRU %p is malfunctioning: start %ld, end %ld\n", - h, start_time, (unsigned long)get_cycles()); + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { + report_instruction_timeout(h); + start_time = get_cycles(); + } } if (gru_options & OPT_STATS) update_mcs_stats(opc, get_cycles() - start_time); @@ -75,9 +96,18 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) int cch_allocate(struct gru_context_configuration_handle *cch) { + int ret; + cch->opc = CCHOP_ALLOCATE; start_instruction(cch); - return wait_instruction_complete(cch, cchop_allocate); + ret = wait_instruction_complete(cch, cchop_allocate); + + /* + * Stop speculation into the GSEG being mapped by the previous ALLOCATE. + * The GSEG memory does not exist until the ALLOCATE completes. + */ + sync_core(); + return ret; } int cch_start(struct gru_context_configuration_handle *cch) @@ -96,9 +126,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch) int cch_deallocate(struct gru_context_configuration_handle *cch) { + int ret; + cch->opc = CCHOP_DEALLOCATE; start_instruction(cch); - return wait_instruction_complete(cch, cchop_deallocate); + ret = wait_instruction_complete(cch, cchop_deallocate); + + /* + * Stop speculation into the GSEG being unmapped by the previous + * DEALLOCATE. + */ + sync_core(); + return ret; } int cch_interrupt_sync(struct gru_context_configuration_handle @@ -126,17 +165,20 @@ int tgh_invalidate(struct gru_tlb_global_handle *tgh, return wait_instruction_complete(tgh, tghop_invalidate); } -void tfh_write_only(struct gru_tlb_fault_handle *tfh, - unsigned long pfn, unsigned long vaddr, - int asid, int dirty, int pagesize) +int tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) { tfh->fillasid = asid; tfh->fillvaddr = vaddr; - tfh->pfn = pfn; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; tfh->dirty = dirty; tfh->pagesize = pagesize; tfh->opc = TFHOP_WRITE_ONLY; start_instruction(tfh); + return wait_instruction_complete(tfh, tfhop_write_only); } void tfh_write_restart(struct gru_tlb_fault_handle *tfh, diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index f441122..3f998b9 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -91,6 +91,12 @@ /* Convert an arbitrary handle address to the beginning of the GRU segment */ #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) +/* Test a valid handle address to determine the type */ +#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ + GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ + (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + + /* General addressing macros. */ static inline void *get_gseg_base_address(void *base, int ctxnum) { @@ -158,6 +164,16 @@ static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) return vaddr + GRU_SIZE * (2 * pnode + chiplet); } +static inline struct gru_control_block_extended *gru_tfh_to_cbe( + struct gru_tlb_fault_handle *tfh) +{ + unsigned long cbe; + + cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; + return (struct gru_control_block_extended*)cbe; +} + + /* @@ -236,6 +252,17 @@ enum gru_tgh_state { TGHSTATE_RESTART_CTX, }; +enum gru_tgh_cause { + TGHCAUSE_RR_ECC, + TGHCAUSE_TLB_ECC, + TGHCAUSE_LRU_ECC, + TGHCAUSE_PS_ECC, + TGHCAUSE_MUL_ERR, + TGHCAUSE_DATA_ERR, + TGHCAUSE_SW_FORCE +}; + + /* * TFH - TLB Global Handle * Used for TLB dropins into the GRU TLB. @@ -440,6 +467,12 @@ struct gru_control_block_extended { unsigned int cbrexecstatus:8; }; +/* CBE fields for active BCOPY instructions */ +#define cbe_baddr0 idef1upd +#define cbe_baddr1 idef3upd +#define cbe_src_cl idef6cpy +#define cbe_nelemcur idef5upd + enum gru_cbr_state { CBRSTATE_INACTIVE, CBRSTATE_IDLE, @@ -487,8 +520,8 @@ int cch_interrupt_sync(struct gru_context_configuration_handle *cch); int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, unsigned long vaddrmask, int asid, int pagesize, int global, int n, unsigned short ctxbitmap); -void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn, - unsigned long vaddr, int asid, int dirty, int pagesize); +int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_restart(struct gru_tlb_fault_handle *tfh); diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c index 55eabfa..9b2062d 100644 --- a/drivers/misc/sgi-gru/grukdump.c +++ b/drivers/misc/sgi-gru/grukdump.c @@ -44,7 +44,8 @@ static int gru_user_copy_handle(void __user **dp, void *s) static int gru_dump_context_data(void *grubase, struct gru_context_configuration_handle *cch, - void __user *ubuf, int ctxnum, int dsrcnt) + void __user *ubuf, int ctxnum, int dsrcnt, + int flush_cbrs) { void *cb, *cbe, *tfh, *gseg; int i, scr; @@ -55,6 +56,8 @@ static int gru_dump_context_data(void *grubase, tfh = grubase + GRU_TFH_BASE; for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (flush_cbrs) + gru_flush_cache(cb); if (gru_user_copy_handle(&ubuf, cb)) goto fail; if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) @@ -115,7 +118,7 @@ fail: static int gru_dump_context(struct gru_state *gru, int ctxnum, void __user *ubuf, void __user *ubufend, char data_opt, - char lock_cch) + char lock_cch, char flush_cbrs) { struct gru_dump_context_header hdr; struct gru_dump_context_header __user *uhdr = ubuf; @@ -159,8 +162,7 @@ static int gru_dump_context(struct gru_state *gru, int ctxnum, ret = -EFBIG; else ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, - dsrcnt); - + dsrcnt, flush_cbrs); } if (cch_locked) unlock_cch_handle(cch); @@ -215,7 +217,8 @@ int gru_dump_chiplet_request(unsigned long arg) for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { if (req.ctxnum == ctxnum || req.ctxnum < 0) { ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, - req.data_opt, req.lock_cch); + req.data_opt, req.lock_cch, + req.flush_cbrs); if (ret < 0) goto fail; ubuf += ret; diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 766e21e..34749ee 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -31,6 +31,7 @@ #include <linux/interrupt.h> #include <linux/uaccess.h> #include <linux/delay.h> +#include <asm/io_apic.h> #include "gru.h" #include "grulib.h" #include "grutables.h" @@ -97,9 +98,6 @@ #define ASYNC_HAN_TO_BID(h) ((h) - 1) #define ASYNC_BID_TO_HAN(b) ((b) + 1) #define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] -#define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \ - (GRU_SIZE * GRU_CHIPLETS_PER_BLADE)) -#define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)] #define GRU_NUM_KERNEL_CBR 1 #define GRU_NUM_KERNEL_DSR_BYTES 256 @@ -160,8 +158,10 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) up_read(&bs->bs_kgts_sema); down_write(&bs->bs_kgts_sema); - if (!bs->bs_kgts) - bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0); + if (!bs->bs_kgts) { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + bs->bs_kgts->ts_user_blade_id = blade_id; + } kgts = bs->bs_kgts; if (!kgts->ts_gru) { @@ -172,9 +172,9 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( GRU_NUM_KERNEL_DSR_BYTES * ncpus + bs->bs_async_dsr_bytes); - while (!gru_assign_gru_context(kgts, blade_id)) { + while (!gru_assign_gru_context(kgts)) { msleep(1); - gru_steal_context(kgts, blade_id); + gru_steal_context(kgts); } gru_load_context(kgts); gru = bs->bs_kgts->ts_gru; @@ -200,13 +200,15 @@ static int gru_free_kernel_contexts(void) bs = gru_base[bid]; if (!bs) continue; + + /* Ignore busy contexts. Don't want to block here. */ if (down_write_trylock(&bs->bs_kgts_sema)) { kgts = bs->bs_kgts; if (kgts && kgts->ts_gru) gru_unload_context(kgts, 0); - kfree(kgts); bs->bs_kgts = NULL; up_write(&bs->bs_kgts_sema); + kfree(kgts); } else { ret++; } @@ -220,13 +222,21 @@ static int gru_free_kernel_contexts(void) static struct gru_blade_state *gru_lock_kernel_context(int blade_id) { struct gru_blade_state *bs; + int bid; STAT(lock_kernel_context); - bs = gru_base[blade_id]; +again: + bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; + bs = gru_base[bid]; + /* Handle the case where migration occured while waiting for the sema */ down_read(&bs->bs_kgts_sema); + if (blade_id < 0 && bid != uv_numa_blade_id()) { + up_read(&bs->bs_kgts_sema); + goto again; + } if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) - gru_load_kernel_context(bs, blade_id); + gru_load_kernel_context(bs, bid); return bs; } @@ -255,7 +265,7 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); preempt_disable(); - bs = gru_lock_kernel_context(uv_numa_blade_id()); + bs = gru_lock_kernel_context(-1); lcpu = uv_blade_processor_id(); *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; @@ -384,13 +394,31 @@ int gru_get_cb_exception_detail(void *cb, struct control_block_extended_exc_detail *excdet) { struct gru_control_block_extended *cbe; - struct gru_blade_state *bs; - int cbrnum; - - bs = KCB_TO_BS(cb); - cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); + struct gru_thread_state *kgts = NULL; + unsigned long off; + int cbrnum, bid; + + /* + * Locate kgts for cb. This algorithm is SLOW but + * this function is rarely called (ie., almost never). + * Performance does not matter. + */ + for_each_possible_blade(bid) { + if (!gru_base[bid]) + break; + kgts = gru_base[bid]->bs_kgts; + if (!kgts || !kgts->ts_gru) + continue; + off = cb - kgts->ts_gru->gs_gru_base_vaddr; + if (off < GRU_SIZE) + break; + kgts = NULL; + } + BUG_ON(!kgts); + cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); cbe = get_cbe(GRUBASE(cb), cbrnum); gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; @@ -409,8 +437,8 @@ char *gru_get_cb_exception_detail_str(int ret, void *cb, if (ret > 0 && gen->istatus == CBS_EXCEPTION) { gru_get_cb_exception_detail(cb, &excdet); snprintf(buf, size, - "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," - "excdet0 0x%lx, excdet1 0x%x", + "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), gen, excdet.opc, excdet.exopc, excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); } else { @@ -457,9 +485,10 @@ int gru_check_status_proc(void *cb) int ret; ret = gen->istatus; - if (ret != CBS_EXCEPTION) - return ret; - return gru_retry_exception(cb); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; } @@ -471,7 +500,7 @@ int gru_wait_proc(void *cb) ret = gru_wait_idle_or_exception(gen); if (ret == CBS_EXCEPTION) ret = gru_retry_exception(cb); - + rmb(); return ret; } @@ -538,7 +567,7 @@ int gru_create_message_queue(struct gru_message_queue_desc *mqd, mqd->mq = mq; mqd->mq_gpa = uv_gpa(mq); mqd->qlines = qlines; - mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); + mqd->interrupt_pnode = nasid >> 1; mqd->interrupt_vector = vector; mqd->interrupt_apicid = apicid; return 0; @@ -598,6 +627,8 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, ret = MQE_UNEXPECTED_CB_ERR; break; case CBSS_PAGE_OVERFLOW: + STAT(mesq_noop_page_overflow); + /* fallthru */ default: BUG(); } @@ -673,18 +704,6 @@ cberr: } /* - * Send a cross-partition interrupt to the SSI that contains the target - * message queue. Normally, the interrupt is automatically delivered by hardware - * but some error conditions require explicit delivery. - */ -static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) -{ - if (mqd->interrupt_vector) - uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, - mqd->interrupt_vector); -} - -/* * Handle a PUT failure. Note: if message was a 2-line message, one of the * lines might have successfully have been written. Before sending the * message, "present" must be cleared in BOTH lines to prevent the receiver @@ -693,7 +712,8 @@ static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, void *mesg, int lines) { - unsigned long m; + unsigned long m, *val = mesg, gpa, save; + int ret; m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); if (lines == 2) { @@ -704,7 +724,26 @@ static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); if (gru_wait(cb) != CBS_IDLE) return MQE_UNEXPECTED_CB_ERR; - send_message_queue_interrupt(mqd); + + if (!mqd->interrupt_vector) + return MQE_OK; + + /* + * Send a cross-partition interrupt to the SSI that contains the target + * message queue. Normally, the interrupt is automatically delivered by + * hardware but some error conditions require explicit delivery. + * Use the GRU to deliver the interrupt. Otherwise partition failures + * could cause unrecovered errors. + */ + gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); + save = *val; + *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, + dest_Fixed); + gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); + ret = gru_wait(cb); + *val = save; + if (ret != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; return MQE_OK; } @@ -739,6 +778,9 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, STAT(mesq_send_put_nacked); ret = send_message_put_nacked(cb, mqd, mesg, lines); break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_page_overflow); + /* fallthru */ default: BUG(); } @@ -831,7 +873,6 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) int present = mhdr->present; /* skip NOOP messages */ - STAT(mesq_receive); while (present == MQS_NOOP) { gru_free_message(mqd, mhdr); mhdr = mq->next; @@ -851,6 +892,7 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) if (mhdr->lines == 2) restore_present2(mhdr, mhdr->present2); + STAT(mesq_receive); return mhdr; } EXPORT_SYMBOL_GPL(gru_get_next_message); @@ -858,6 +900,29 @@ EXPORT_SYMBOL_GPL(gru_get_next_message); /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ /* + * Load a DW from a global GPA. The GPA can be a memory or MMR address. + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa) +{ + void *cb; + void *dsr; + int ret, iaa; + + STAT(read_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + iaa = gpa >> 62; + gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); + ret = gru_wait(cb); + if (ret == CBS_IDLE) + *value = *(unsigned long *)dsr; + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_read_gpa); + + +/* * Copy a block of data using the GRU resources */ int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, @@ -898,24 +963,24 @@ static int quicktest0(unsigned long arg) gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); if (gru_wait(cb) != CBS_IDLE) { - printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n"); + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); goto done; } if (*p != MAGIC) { - printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p); + printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); goto done; } gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); if (gru_wait(cb) != CBS_IDLE) { - printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n"); + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); goto done; } if (word0 != word1 || word1 != MAGIC) { printk(KERN_DEBUG - "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n", - word1, MAGIC); + "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", + smp_processor_id(), word1, MAGIC); goto done; } ret = 0; @@ -952,8 +1017,11 @@ static int quicktest1(unsigned long arg) if (ret) break; } - if (ret != MQE_QUEUE_FULL || i != 4) + if (ret != MQE_QUEUE_FULL || i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + smp_processor_id(), ret, i); goto done; + } for (i = 0; i < 6; i++) { m = gru_get_next_message(&mqd); @@ -961,7 +1029,12 @@ static int quicktest1(unsigned long arg) break; gru_free_message(&mqd, m); } - ret = (i == 4) ? 0 : -EIO; + if (i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", + smp_processor_id(), i, m, m ? m[8] : -1); + goto done; + } + ret = 0; done: kfree(p); @@ -977,6 +1050,7 @@ static int quicktest2(unsigned long arg) int ret = 0; unsigned long *buf; void *cb0, *cb; + struct gru_control_block_status *gen; int i, k, istatus, bytes; bytes = numcb * 4 * 8; @@ -996,20 +1070,30 @@ static int quicktest2(unsigned long arg) XTYPE_DW, 4, 1, IMA_INTERRUPT); ret = 0; - for (k = 0; k < numcb; k++) { + k = numcb; + do { gru_wait_async_cbr(han); for (i = 0; i < numcb; i++) { cb = cb0 + i * GRU_HANDLE_STRIDE; istatus = gru_check_status(cb); - if (istatus == CBS_ACTIVE) - continue; - if (istatus == CBS_EXCEPTION) - ret = -EFAULT; - else if (buf[i] || buf[i + 1] || buf[i + 2] || - buf[i + 3]) - ret = -EIO; + if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) + break; } - } + if (i == numcb) + continue; + if (istatus != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); + ret = -EFAULT; + } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || + buf[4 * i + 3]) { + printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", + smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); + ret = -EIO; + } + k--; + gen = cb; + gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ + } while (k); BUG_ON(cmp.done); gru_unlock_async_resource(han); @@ -1019,6 +1103,22 @@ done: return ret; } +#define BUFSIZE 200 +static int quicktest3(unsigned long arg) +{ + char buf1[BUFSIZE], buf2[BUFSIZE]; + int ret = 0; + + memset(buf2, 0, sizeof(buf2)); + memset(buf1, get_cycles() & 255, sizeof(buf1)); + gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); + if (memcmp(buf1, buf2, BUFSIZE)) { + printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); + ret = -EIO; + } + return ret; +} + /* * Debugging only. User hook for various kernel tests * of driver & gru. @@ -1037,6 +1137,9 @@ int gru_ktest(unsigned long arg) case 2: ret = quicktest2(arg); break; + case 3: + ret = quicktest3(arg); + break; case 99: ret = gru_free_kernel_contexts(); break; diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h index d60d34b..02aa94d 100644 --- a/drivers/misc/sgi-gru/grukservices.h +++ b/drivers/misc/sgi-gru/grukservices.h @@ -131,6 +131,20 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); /* + * Read a GRU global GPA. Source can be located in a remote partition. + * + * Input: + * value memory address where MMR value is returned + * gpa source numalink physical address of GPA + * + * Output: + * 0 OK + * >0 error + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa); + + +/* * Copy data using the GRU. Source or destination can be located in a remote * partition. * diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h index 889bc44..e77d1b1 100644 --- a/drivers/misc/sgi-gru/grulib.h +++ b/drivers/misc/sgi-gru/grulib.h @@ -63,18 +63,9 @@ #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) #define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) -/* - * Statictics kept on a per-GTS basis. - */ -struct gts_statistics { - unsigned long fmm_tlbdropin; - unsigned long upm_tlbdropin; - unsigned long context_stolen; -}; - struct gru_get_gseg_statistics_req { - unsigned long gseg; - struct gts_statistics stats; + unsigned long gseg; + struct gru_gseg_statistics stats; }; /* @@ -86,6 +77,7 @@ struct gru_create_context_req { unsigned int control_blocks; unsigned int maximum_thread_count; unsigned int options; + unsigned char tlb_preload_count; }; /* @@ -98,11 +90,12 @@ struct gru_unload_context_req { /* * Structure used to set context options */ -enum {sco_gseg_owner, sco_cch_req_slice}; +enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; struct gru_set_context_option_req { unsigned long gseg; int op; - unsigned long val1; + int val0; + long val1; }; /* @@ -124,6 +117,8 @@ struct gru_dump_chiplet_state_req { int ctxnum; char data_opt; char lock_cch; + char flush_cbrs; + char fill[10]; pid_t pid; void *buf; size_t buflen; diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 3bc643d..f8538bb 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -27,6 +27,7 @@ #include <linux/sched.h> #include <linux/device.h> #include <linux/list.h> +#include <linux/err.h> #include <asm/uv/uv_hub.h> #include "gru.h" #include "grutables.h" @@ -48,12 +49,20 @@ struct device *grudev = &gru_device; /* * Select a gru fault map to be used by the current cpu. Note that * multiple cpus may be using the same map. - * ZZZ should "shift" be used?? Depends on HT cpu numbering * ZZZ should be inline but did not work on emulator */ int gru_cpu_fault_map_id(void) { +#ifdef CONFIG_IA64 return uv_blade_processor_id() % GRU_NUM_TFM; +#else + int cpu = smp_processor_id(); + int id, core; + + core = uv_cpu_core_number(cpu); + id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + return id; +#endif } /*--------- ASID Management ------------------------------------------- @@ -286,7 +295,8 @@ static void gru_unload_mm_tracker(struct gru_state *gru, void gts_drop(struct gru_thread_state *gts) { if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { - gru_drop_mmu_notifier(gts->ts_gms); + if (gts->ts_gms) + gru_drop_mmu_notifier(gts->ts_gms); kfree(gts); STAT(gts_free); } @@ -310,16 +320,18 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data * Allocate a thread state structure. */ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, - int cbr_au_count, int dsr_au_count, int options, int tsid) + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid) { struct gru_thread_state *gts; + struct gru_mm_struct *gms; int bytes; bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); bytes += sizeof(struct gru_thread_state); gts = kmalloc(bytes, GFP_KERNEL); if (!gts) - return NULL; + return ERR_PTR(-ENOMEM); STAT(gts_alloc); memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ @@ -327,7 +339,10 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, mutex_init(>s->ts_ctxlock); gts->ts_cbr_au_count = cbr_au_count; gts->ts_dsr_au_count = dsr_au_count; + gts->ts_tlb_preload_count = tlb_preload_count; gts->ts_user_options = options; + gts->ts_user_blade_id = -1; + gts->ts_user_chiplet_id = -1; gts->ts_tsid = tsid; gts->ts_ctxnum = NULLCTX; gts->ts_tlb_int_select = -1; @@ -336,9 +351,10 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, if (vma) { gts->ts_mm = current->mm; gts->ts_vma = vma; - gts->ts_gms = gru_register_mmu_notifier(); - if (!gts->ts_gms) + gms = gru_register_mmu_notifier(); + if (IS_ERR(gms)) goto err; + gts->ts_gms = gms; } gru_dbg(grudev, "alloc gts %p\n", gts); @@ -346,7 +362,7 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, err: gts_drop(gts); - return NULL; + return ERR_CAST(gms); } /* @@ -360,6 +376,7 @@ struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) if (!vdata) return NULL; + STAT(vdata_alloc); INIT_LIST_HEAD(&vdata->vd_head); spin_lock_init(&vdata->vd_lock); gru_dbg(grudev, "alloc vdata %p\n", vdata); @@ -392,10 +409,12 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, struct gru_vma_data *vdata = vma->vm_private_data; struct gru_thread_state *gts, *ngts; - gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count, + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, + vdata->vd_dsr_au_count, + vdata->vd_tlb_preload_count, vdata->vd_user_options, tsid); - if (!gts) - return NULL; + if (IS_ERR(gts)) + return gts; spin_lock(&vdata->vd_lock); ngts = gru_find_current_gts_nolock(vdata, tsid); @@ -493,6 +512,9 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum, memset(cbe + i * GRU_HANDLE_STRIDE, 0, GRU_CACHE_LINE_BYTES); } + /* Flush CBE to hide race in context restart */ + mb(); + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); cb += GRU_HANDLE_STRIDE; } @@ -513,6 +535,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, cb = gseg + GRU_CB_BASE; cbe = grubase + GRU_CBE_BASE; length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + + /* CBEs may not be coherent. Flush them from cache */ + for_each_cbr_in_allocation_map(i, &cbrmap, scr) + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + mb(); /* Let the CL flush complete */ + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); for_each_cbr_in_allocation_map(i, &cbrmap, scr) { @@ -533,7 +561,8 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); - gru_dbg(grudev, "gts %p\n", gts); + gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", + gts, gts->ts_cbr_map, gts->ts_dsr_map); lock_cch_handle(cch); if (cch_interrupt_sync(cch)) BUG(); @@ -549,7 +578,6 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) if (cch_deallocate(cch)) BUG(); - gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ unlock_cch_handle(cch); gru_free_gru_context(gts); @@ -565,9 +593,7 @@ void gru_load_context(struct gru_thread_state *gts) struct gru_context_configuration_handle *cch; int i, err, asid, ctxnum = gts->ts_ctxnum; - gru_dbg(grudev, "gts %p\n", gts); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); - lock_cch_handle(cch); cch->tfm_fault_bit_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL @@ -591,6 +617,7 @@ void gru_load_context(struct gru_thread_state *gts) cch->unmap_enable = 1; cch->tfm_done_bit_enable = 1; cch->cb_int_enable = 1; + cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ } else { cch->unmap_enable = 0; cch->tfm_done_bit_enable = 0; @@ -616,17 +643,18 @@ void gru_load_context(struct gru_thread_state *gts) if (cch_start(cch)) BUG(); unlock_cch_handle(cch); + + gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", + gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, + (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); } /* * Update fields in an active CCH: * - retarget interrupts on local blade * - update sizeavail mask - * - force a delayed context unload by clearing the CCH asids. This - * forces TLB misses for new GRU instructions. The context is unloaded - * when the next TLB miss occurs. */ -int gru_update_cch(struct gru_thread_state *gts, int force_unload) +int gru_update_cch(struct gru_thread_state *gts) { struct gru_context_configuration_handle *cch; struct gru_state *gru = gts->ts_gru; @@ -640,21 +668,13 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload) goto exit; if (cch_interrupt(cch)) BUG(); - if (!force_unload) { - for (i = 0; i < 8; i++) - cch->sizeavail[i] = gts->ts_sizeavail; - gts->ts_tlb_int_select = gru_cpu_fault_map_id(); - cch->tlb_int_select = gru_cpu_fault_map_id(); - cch->tfm_fault_bit_enable = - (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL - || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); - } else { - for (i = 0; i < 8; i++) - cch->asid[i] = 0; - cch->tfm_fault_bit_enable = 0; - cch->tlb_int_enable = 0; - gts->ts_force_unload = 1; - } + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); if (cch_start(cch)) BUG(); ret = 1; @@ -679,7 +699,54 @@ static int gru_retarget_intr(struct gru_thread_state *gts) gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, gru_cpu_fault_map_id()); - return gru_update_cch(gts, 0); + return gru_update_cch(gts); +} + +/* + * Check if a GRU context is allowed to use a specific chiplet. By default + * a context is assigned to any blade-local chiplet. However, users can + * override this. + * Returns 1 if assignment allowed, 0 otherwise + */ +static int gru_check_chiplet_assignment(struct gru_state *gru, + struct gru_thread_state *gts) +{ + int blade_id; + int chiplet_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + + chiplet_id = gts->ts_user_chiplet_id; + return gru->gs_blade_id == blade_id && + (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); +} + +/* + * Unload the gru context if it is not assigned to the correct blade or + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +void gru_check_context_placement(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + /* + * If the current task is the context owner, verify that the + * context is correctly placed. This test is skipped for non-owner + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; + if (!gru || gts->ts_tgid_owner != current->tgid) + return; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); + gru_unload_context(gts, 1); + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } } @@ -712,13 +779,17 @@ static void gts_stolen(struct gru_thread_state *gts, } } -void gru_steal_context(struct gru_thread_state *gts, int blade_id) +void gru_steal_context(struct gru_thread_state *gts) { struct gru_blade_state *blade; struct gru_state *gru, *gru0; struct gru_thread_state *ngts = NULL; int ctxnum, ctxnum0, flag = 0, cbr, dsr; + int blade_id; + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); cbr = gts->ts_cbr_au_count; dsr = gts->ts_dsr_au_count; @@ -729,35 +800,39 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id) gru = blade->bs_lru_gru; if (ctxnum == 0) gru = next_gru(blade, gru); + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; ctxnum0 = ctxnum; gru0 = gru; while (1) { - if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) - break; - spin_lock(&gru->gs_lock); - for (; ctxnum < GRU_NUM_CCH; ctxnum++) { - if (flag && gru == gru0 && ctxnum == ctxnum0) + if (gru_check_chiplet_assignment(gru, gts)) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) break; - ngts = gru->gs_gts[ctxnum]; - /* - * We are grabbing locks out of order, so trylock is - * needed. GTSs are usually not locked, so the odds of - * success are high. If trylock fails, try to steal a - * different GSEG. - */ - if (ngts && is_gts_stealable(ngts, blade)) + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && is_gts_stealable(ngts, blade)) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) break; - ngts = NULL; - flag = 1; } - spin_unlock(&gru->gs_lock); - if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + if (flag && gru == gru0) break; + flag = 1; ctxnum = 0; gru = next_gru(blade, gru); } - blade->bs_lru_gru = gru; - blade->bs_lru_ctxnum = ctxnum; spin_unlock(&blade->bs_lock); if (ngts) { @@ -776,19 +851,34 @@ void gru_steal_context(struct gru_thread_state *gts, int blade_id) } /* + * Assign a gru context. + */ +static int gru_assign_context_number(struct gru_state *gru) +{ + int ctxnum; + + ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + __set_bit(ctxnum, &gru->gs_context_map); + return ctxnum; +} + +/* * Scan the GRUs on the local blade & assign a GRU context. */ -struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, - int blade) +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) { struct gru_state *gru, *grux; int i, max_active_contexts; + int blade_id = gts->ts_user_blade_id; - + if (blade_id < 0) + blade_id = uv_numa_blade_id(); again: gru = NULL; max_active_contexts = GRU_NUM_CCH; - for_each_gru_on_blade(grux, blade, i) { + for_each_gru_on_blade(grux, blade_id, i) { + if (!gru_check_chiplet_assignment(grux, gts)) + continue; if (check_gru_resources(grux, gts->ts_cbr_au_count, gts->ts_dsr_au_count, max_active_contexts)) { @@ -809,12 +899,9 @@ again: reserve_gru_resources(gru, gts); gts->ts_gru = gru; gts->ts_blade = gru->gs_blade_id; - gts->ts_ctxnum = - find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); - BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); + gts->ts_ctxnum = gru_assign_context_number(gru); atomic_inc(>s->ts_refcnt); gru->gs_gts[gts->ts_ctxnum] = gts; - __set_bit(gts->ts_ctxnum, &gru->gs_context_map); spin_unlock(&gru->gs_lock); STAT(assign_context); @@ -842,7 +929,6 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct gru_thread_state *gts; unsigned long paddr, vaddr; - int blade_id; vaddr = (unsigned long)vmf->virtual_address; gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", @@ -857,28 +943,18 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) again: mutex_lock(>s->ts_ctxlock); preempt_disable(); - blade_id = uv_numa_blade_id(); - if (gts->ts_gru) { - if (gts->ts_gru->gs_blade_id != blade_id) { - STAT(migrated_nopfn_unload); - gru_unload_context(gts, 1); - } else { - if (gru_retarget_intr(gts)) - STAT(migrated_nopfn_retarget); - } - } + gru_check_context_placement(gts); if (!gts->ts_gru) { STAT(load_user_context); - if (!gru_assign_gru_context(gts, blade_id)) { + if (!gru_assign_gru_context(gts)) { preempt_enable(); mutex_unlock(>s->ts_ctxlock); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ - blade_id = uv_numa_blade_id(); if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) - gru_steal_context(gts, blade_id); + gru_steal_context(gts); goto again; } gru_load_context(gts); diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 3f2375c..7768b87 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -36,8 +36,7 @@ static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) { unsigned long val = atomic_long_read(v); - if (val) - seq_printf(s, "%16lu %s\n", val, id); + seq_printf(s, "%16lu %s\n", val, id); } static int statistics_show(struct seq_file *s, void *p) @@ -46,7 +45,8 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, vdata_free); printstat(s, gts_alloc); printstat(s, gts_free); - printstat(s, vdata_double_alloc); + printstat(s, gms_alloc); + printstat(s, gms_free); printstat(s, gts_double_allocate); printstat(s, assign_context); printstat(s, assign_context_failed); @@ -59,28 +59,25 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, steal_kernel_context); printstat(s, steal_context_failed); printstat(s, nopfn); - printstat(s, break_cow); printstat(s, asid_new); printstat(s, asid_next); printstat(s, asid_wrap); printstat(s, asid_reuse); printstat(s, intr); + printstat(s, intr_cbr); + printstat(s, intr_tfh); + printstat(s, intr_spurious); printstat(s, intr_mm_lock_failed); printstat(s, call_os); - printstat(s, call_os_offnode_reference); - printstat(s, call_os_check_for_bug); printstat(s, call_os_wait_queue); printstat(s, user_flush_tlb); printstat(s, user_unload_context); printstat(s, user_exception); printstat(s, set_context_option); - printstat(s, migrate_check); - printstat(s, migrated_retarget); - printstat(s, migrated_unload); - printstat(s, migrated_unload_delay); - printstat(s, migrated_nopfn_retarget); - printstat(s, migrated_nopfn_unload); + printstat(s, check_context_retarget_intr); + printstat(s, check_context_unload); printstat(s, tlb_dropin); + printstat(s, tlb_preload_page); printstat(s, tlb_dropin_fail_no_asid); printstat(s, tlb_dropin_fail_upm); printstat(s, tlb_dropin_fail_invalid); @@ -88,16 +85,15 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, tlb_dropin_fail_idle); printstat(s, tlb_dropin_fail_fmm); printstat(s, tlb_dropin_fail_no_exception); - printstat(s, tlb_dropin_fail_no_exception_war); printstat(s, tfh_stale_on_fault); printstat(s, mmu_invalidate_range); printstat(s, mmu_invalidate_page); - printstat(s, mmu_clear_flush_young); printstat(s, flush_tlb); printstat(s, flush_tlb_gru); printstat(s, flush_tlb_gru_tgh); printstat(s, flush_tlb_gru_zero_asid); printstat(s, copy_gpa); + printstat(s, read_gpa); printstat(s, mesq_receive); printstat(s, mesq_receive_none); printstat(s, mesq_send); @@ -108,7 +104,6 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, mesq_send_qlimit_reached); printstat(s, mesq_send_amo_nacked); printstat(s, mesq_send_put_nacked); - printstat(s, mesq_qf_not_full); printstat(s, mesq_qf_locked); printstat(s, mesq_qf_noop_not_full); printstat(s, mesq_qf_switch_head_failed); @@ -118,6 +113,7 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, mesq_noop_qlimit_reached); printstat(s, mesq_noop_amo_nacked); printstat(s, mesq_noop_put_nacked); + printstat(s, mesq_noop_page_overflow); return 0; } @@ -133,8 +129,10 @@ static int mcs_statistics_show(struct seq_file *s, void *p) int op; unsigned long total, count, max; static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", - "cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"}; + "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", + "tfh_write_restart", "tgh_invalidate"}; + seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); for (op = 0; op < mcsop_last; op++) { count = atomic_long_read(&mcs_op_statistics[op].count); total = atomic_long_read(&mcs_op_statistics[op].total); @@ -154,6 +152,7 @@ static ssize_t mcs_statistics_write(struct file *file, static int options_show(struct seq_file *s, void *p) { + seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); seq_printf(s, "0x%lx\n", gru_options); return 0; } @@ -183,16 +182,17 @@ static int cch_seq_show(struct seq_file *file, void *data) const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; if (gid == 0) - seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", - "ctx#", "pid", "cbrs", "dsbytes", "mode"); + seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); if (gru) for (i = 0; i < GRU_NUM_CCH; i++) { ts = gru->gs_gts[i]; if (!ts) continue; - seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", + seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", gru->gs_gid, gru->gs_blade_id, i, - ts->ts_tgid_owner, + is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, + is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, mode[ts->ts_user_options & @@ -355,7 +355,7 @@ static void delete_proc_files(void) for (p = proc_files; p->name; p++) if (p->entry) remove_proc_entry(p->name, proc_gru); - remove_proc_entry("gru", NULL); + remove_proc_entry("gru", proc_gru->parent); } } diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 46990bc..02a77b8 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -161,7 +161,7 @@ extern unsigned int gru_max_gids; #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" -#define GRU_DRIVER_VERSION_STR "0.80" +#define GRU_DRIVER_VERSION_STR "0.85" /* * GRU statistics. @@ -171,7 +171,8 @@ struct gru_stats_s { atomic_long_t vdata_free; atomic_long_t gts_alloc; atomic_long_t gts_free; - atomic_long_t vdata_double_alloc; + atomic_long_t gms_alloc; + atomic_long_t gms_free; atomic_long_t gts_double_allocate; atomic_long_t assign_context; atomic_long_t assign_context_failed; @@ -184,28 +185,25 @@ struct gru_stats_s { atomic_long_t steal_kernel_context; atomic_long_t steal_context_failed; atomic_long_t nopfn; - atomic_long_t break_cow; atomic_long_t asid_new; atomic_long_t asid_next; atomic_long_t asid_wrap; atomic_long_t asid_reuse; atomic_long_t intr; + atomic_long_t intr_cbr; + atomic_long_t intr_tfh; + atomic_long_t intr_spurious; atomic_long_t intr_mm_lock_failed; atomic_long_t call_os; - atomic_long_t call_os_offnode_reference; - atomic_long_t call_os_check_for_bug; atomic_long_t call_os_wait_queue; atomic_long_t user_flush_tlb; atomic_long_t user_unload_context; atomic_long_t user_exception; atomic_long_t set_context_option; - atomic_long_t migrate_check; - atomic_long_t migrated_retarget; - atomic_long_t migrated_unload; - atomic_long_t migrated_unload_delay; - atomic_long_t migrated_nopfn_retarget; - atomic_long_t migrated_nopfn_unload; + atomic_long_t check_context_retarget_intr; + atomic_long_t check_context_unload; atomic_long_t tlb_dropin; + atomic_long_t tlb_preload_page; atomic_long_t tlb_dropin_fail_no_asid; atomic_long_t tlb_dropin_fail_upm; atomic_long_t tlb_dropin_fail_invalid; @@ -213,17 +211,16 @@ struct gru_stats_s { atomic_long_t tlb_dropin_fail_idle; atomic_long_t tlb_dropin_fail_fmm; atomic_long_t tlb_dropin_fail_no_exception; - atomic_long_t tlb_dropin_fail_no_exception_war; atomic_long_t tfh_stale_on_fault; atomic_long_t mmu_invalidate_range; atomic_long_t mmu_invalidate_page; - atomic_long_t mmu_clear_flush_young; atomic_long_t flush_tlb; atomic_long_t flush_tlb_gru; atomic_long_t flush_tlb_gru_tgh; atomic_long_t flush_tlb_gru_zero_asid; atomic_long_t copy_gpa; + atomic_long_t read_gpa; atomic_long_t mesq_receive; atomic_long_t mesq_receive_none; @@ -235,7 +232,7 @@ struct gru_stats_s { atomic_long_t mesq_send_qlimit_reached; atomic_long_t mesq_send_amo_nacked; atomic_long_t mesq_send_put_nacked; - atomic_long_t mesq_qf_not_full; + atomic_long_t mesq_page_overflow; atomic_long_t mesq_qf_locked; atomic_long_t mesq_qf_noop_not_full; atomic_long_t mesq_qf_switch_head_failed; @@ -245,11 +242,13 @@ struct gru_stats_s { atomic_long_t mesq_noop_qlimit_reached; atomic_long_t mesq_noop_amo_nacked; atomic_long_t mesq_noop_put_nacked; + atomic_long_t mesq_noop_page_overflow; }; enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, - cchop_deallocate, tghop_invalidate, mcsop_last}; + cchop_deallocate, tfhop_write_only, tfhop_write_restart, + tghop_invalidate, mcsop_last}; struct mcs_op_statistic { atomic_long_t count; @@ -259,8 +258,8 @@ struct mcs_op_statistic { extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; -#define OPT_DPRINT 1 -#define OPT_STATS 2 +#define OPT_DPRINT 1 +#define OPT_STATS 2 #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ @@ -283,7 +282,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define gru_dbg(dev, fmt, x...) \ do { \ if (gru_options & OPT_DPRINT) \ - dev_dbg(dev, "%s: " fmt, __func__, x); \ + printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ } while (0) #else #define gru_dbg(x...) @@ -297,13 +296,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define ASID_INC 8 /* number of regions */ /* Generate a GRU asid value from a GRU base asid & a virtual address. */ -#if defined CONFIG_IA64 #define VADDR_HI_BIT 64 -#elif defined CONFIG_X86_64 -#define VADDR_HI_BIT 48 -#else -#error "Unsupported architecture" -#endif #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) @@ -345,6 +338,7 @@ struct gru_vma_data { long vd_user_options;/* misc user option flags */ int vd_cbr_au_count; int vd_dsr_au_count; + unsigned char vd_tlb_preload_count; }; /* @@ -360,6 +354,7 @@ struct gru_thread_state { struct gru_state *ts_gru; /* GRU where the context is loaded */ struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned char ts_tlb_preload_count; /* TLB preload pages */ unsigned long ts_cbr_map; /* map of allocated CBRs */ unsigned long ts_dsr_map; /* map of allocated DATA resources */ @@ -368,6 +363,8 @@ struct gru_thread_state { long ts_user_options;/* misc user option flags */ pid_t ts_tgid_owner; /* task that is using the context - for migration */ + short ts_user_blade_id;/* user selected blade */ + char ts_user_chiplet_id;/* user selected chiplet */ unsigned short ts_sizeavail; /* Pagesizes in use */ int ts_tsid; /* thread that owns the structure */ @@ -384,13 +381,11 @@ struct gru_thread_state { char ts_blade; /* If >= 0, migrate context if ref from diferent blade */ char ts_force_cch_reload; - char ts_force_unload;/* force context to be unloaded - after migration */ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each allocated CB */ int ts_data_valid; /* Indicates if ts_gdata has valid data */ - struct gts_statistics ustats; /* User statistics */ + struct gru_gseg_statistics ustats; /* User statistics */ unsigned long ts_gdata[0]; /* save area for GRU data (CB, DS, CBE) */ }; @@ -422,6 +417,7 @@ struct gru_state { gru segments (64) */ unsigned short gs_gid; /* unique GRU number */ unsigned short gs_blade_id; /* blade of GRU */ + unsigned char gs_chiplet_id; /* blade chiplet of GRU */ unsigned char gs_tgh_local_shift; /* used to pick TGH for local flush */ unsigned char gs_tgh_first_remote; /* starting TGH# for @@ -453,6 +449,7 @@ struct gru_state { in use */ struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using the context */ + int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ }; /* @@ -619,6 +616,15 @@ static inline int is_kernel_context(struct gru_thread_state *gts) return !gts->ts_mm; } +/* + * The following are for Nehelem-EX. A more general scheme is needed for + * future processors. + */ +#define UV_MAX_INT_CORES 8 +#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) +#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) +#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ + ((cpu_physical_id(p) >> 1) & 3)) /*----------------------------------------------------------------------------- * Function prototypes & externs */ @@ -633,24 +639,26 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, int tsid); extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, int tsid); -extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, - int blade); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); extern void gru_load_context(struct gru_thread_state *gts); -extern void gru_steal_context(struct gru_thread_state *gts, int blade_id); +extern void gru_steal_context(struct gru_thread_state *gts); extern void gru_unload_context(struct gru_thread_state *gts, int savestate); -extern int gru_update_cch(struct gru_thread_state *gts, int force_unload); +extern int gru_update_cch(struct gru_thread_state *gts); extern void gts_drop(struct gru_thread_state *gts); extern void gru_tgh_flush_init(struct gru_state *gru); extern int gru_kservices_init(void); extern void gru_kservices_exit(void); +extern irqreturn_t gru0_intr(int irq, void *dev_id); +extern irqreturn_t gru1_intr(int irq, void *dev_id); +extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); extern int gru_dump_chiplet_request(unsigned long arg); extern long gru_get_gseg_statistics(unsigned long arg); -extern irqreturn_t gru_intr(int irq, void *dev_id); extern int gru_handle_user_call_os(unsigned long address); extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); +extern void gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); @@ -658,7 +666,8 @@ extern int gru_proc_init(void); extern void gru_proc_exit(void); extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, - int cbr_au_count, int dsr_au_count, int options, int tsid); + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid); extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, char *cbmap); extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 1d12509..240a6d3 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -184,8 +184,8 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, STAT(flush_tlb_gru_tgh); asid = GRUASID(asid, start); gru_dbg(grudev, - " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", - gid, asid, num, asids->mt_ctxbitmap); + " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); tgh = get_lock_tgh_handle(gru); tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, num - 1, asids->mt_ctxbitmap); @@ -299,6 +299,7 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) { struct gru_mm_struct *gms; struct mmu_notifier *mn; + int err; mn = mmu_find_ops(current->mm, &gru_mmuops); if (mn) { @@ -307,16 +308,22 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) } else { gms = kzalloc(sizeof(*gms), GFP_KERNEL); if (gms) { + STAT(gms_alloc); spin_lock_init(&gms->ms_asid_lock); gms->ms_notifier.ops = &gru_mmuops; atomic_set(&gms->ms_refcnt, 1); init_waitqueue_head(&gms->ms_wait_queue); - __mmu_notifier_register(&gms->ms_notifier, current->mm); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; } } gru_dbg(grudev, "gms %p, refcnt %d\n", gms, atomic_read(&gms->ms_refcnt)); return gms; +error: + kfree(gms); + return ERR_PTR(err); } void gru_drop_mmu_notifier(struct gru_mm_struct *gms) @@ -327,6 +334,7 @@ void gru_drop_mmu_notifier(struct gru_mm_struct *gms) if (!gms->ms_released) mmu_notifier_unregister(&gms->ms_notifier, current->mm); kfree(gms); + STAT(gms_free); } } |