diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-25 15:59:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-25 15:59:38 -0700 |
commit | fa2af6e4fe0c4d2f8875d42625b25675e8584010 (patch) | |
tree | ef9a92949858ab763aa1bfda7cb11a5f7b84d123 /arch/tile/kernel | |
parent | 109b9b0408e5f1dd327a44f446841a9fbe0bcd83 (diff) | |
parent | 1fcb78e9da714d96f65edd37b29dae3b1f7df508 (diff) | |
download | op-kernel-dev-fa2af6e4fe0c4d2f8875d42625b25675e8584010.zip op-kernel-dev-fa2af6e4fe0c4d2f8875d42625b25675e8584010.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull tile updates from Chris Metcalf:
"These changes cover a range of new arch/tile features and
optimizations. They've been through LKML review and on linux-next for
a month or so. There's also one bug-fix that just missed 3.4, which
I've marked for stable."
Fixed up trivial conflict in arch/tile/Kconfig (new added tile Kconfig
entries clashing with the generic timer/clockevents changes).
* git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile:
tile: default to tilegx_defconfig for ARCH=tile
tile: fix bug where fls(0) was not returning 0
arch/tile: mark TILEGX as not EXPERIMENTAL
tile/mm/fault.c: Port OOM changes to handle_page_fault
arch/tile: add descriptive text if the kernel reports a bad trap
arch/tile: allow querying cpu module information from the hypervisor
arch/tile: fix hardwall for tilegx and generalize for idn and ipi
arch/tile: support multiple huge page sizes dynamically
mm: add new arch_make_huge_pte() method for tile support
arch/tile: support kexec() for tilegx
arch/tile: support <asm/cachectl.h> header for cacheflush() syscall
arch/tile: Allow tilegx to build with either 16K or 64K page size
arch/tile: optimize get_user/put_user and friends
arch/tile: support building big-endian kernel
arch/tile: allow building Linux with transparent huge pages enabled
arch/tile: use interrupt critical sections less
Diffstat (limited to 'arch/tile/kernel')
-rw-r--r-- | arch/tile/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/tile/kernel/entry.S | 3 | ||||
-rw-r--r-- | arch/tile/kernel/hardwall.c | 754 | ||||
-rw-r--r-- | arch/tile/kernel/head_32.S | 8 | ||||
-rw-r--r-- | arch/tile/kernel/head_64.S | 22 | ||||
-rw-r--r-- | arch/tile/kernel/hvglue.lds | 3 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_64.S | 80 | ||||
-rw-r--r-- | arch/tile/kernel/machine_kexec.c | 42 | ||||
-rw-r--r-- | arch/tile/kernel/module.c | 12 | ||||
-rw-r--r-- | arch/tile/kernel/proc.c | 1 | ||||
-rw-r--r-- | arch/tile/kernel/process.c | 16 | ||||
-rw-r--r-- | arch/tile/kernel/relocate_kernel_32.S (renamed from arch/tile/kernel/relocate_kernel.S) | 0 | ||||
-rw-r--r-- | arch/tile/kernel/relocate_kernel_64.S | 260 | ||||
-rw-r--r-- | arch/tile/kernel/setup.c | 169 | ||||
-rw-r--r-- | arch/tile/kernel/single_step.c | 16 | ||||
-rw-r--r-- | arch/tile/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/tile/kernel/sys.c | 10 | ||||
-rw-r--r-- | arch/tile/kernel/sysfs.c | 8 | ||||
-rw-r--r-- | arch/tile/kernel/tlb.c | 11 | ||||
-rw-r--r-- | arch/tile/kernel/traps.c | 30 |
20 files changed, 1057 insertions, 393 deletions
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 0d826fa..5de9924 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -9,10 +9,9 @@ obj-y := backtrace.o entry.o irq.o messaging.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o -obj-$(CONFIG_TILEGX) += futex_64.o obj-$(CONFIG_COMPAT) += compat.o compat_signal.o obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index ec91568..133c4b5 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -100,8 +100,9 @@ STD_ENTRY(smp_nap) */ STD_ENTRY(_cpu_idle) movei r1, 1 + IRQ_ENABLE_LOAD(r2, r3) mtspr INTERRUPT_CRITICAL_SECTION, r1 - IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + IRQ_ENABLE_APPLY(r2, r3) /* unmask, but still with ICS set */ mtspr INTERRUPT_CRITICAL_SECTION, zero .global _cpu_idle_nap _cpu_idle_nap: diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 8c41891..20273ee 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -33,59 +33,157 @@ /* - * This data structure tracks the rectangle data, etc., associated - * one-to-one with a "struct file *" from opening HARDWALL_FILE. + * Implement a per-cpu "hardwall" resource class such as UDN or IPI. + * We use "hardwall" nomenclature throughout for historical reasons. + * The lock here controls access to the list data structure as well as + * to the items on the list. + */ +struct hardwall_type { + int index; + int is_xdn; + int is_idn; + int disabled; + const char *name; + struct list_head list; + spinlock_t lock; + struct proc_dir_entry *proc_dir; +}; + +enum hardwall_index { + HARDWALL_UDN = 0, +#ifndef __tilepro__ + HARDWALL_IDN = 1, + HARDWALL_IPI = 2, +#endif + _HARDWALL_TYPES +}; + +static struct hardwall_type hardwall_types[] = { + { /* user-space access to UDN */ + 0, + 1, + 0, + 0, + "udn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock), + NULL + }, +#ifndef __tilepro__ + { /* user-space access to IDN */ + 1, + 1, + 1, + 1, /* disabled pending hypervisor support */ + "idn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock), + NULL + }, + { /* access to user-space IPI */ + 2, + 0, + 0, + 0, + "ipi", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock), + NULL + }, +#endif +}; + +/* + * This data structure tracks the cpu data, etc., associated + * one-to-one with a "struct file *" from opening a hardwall device file. * Note that the file's private data points back to this structure. */ struct hardwall_info { - struct list_head list; /* "rectangles" list */ + struct list_head list; /* for hardwall_types.list */ struct list_head task_head; /* head of tasks in this hardwall */ - struct cpumask cpumask; /* cpus in the rectangle */ + struct hardwall_type *type; /* type of this resource */ + struct cpumask cpumask; /* cpus reserved */ + int id; /* integer id for this hardwall */ + int teardown_in_progress; /* are we tearing this one down? */ + + /* Remaining fields only valid for user-network resources. */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ - int id; /* integer id for this hardwall */ - int teardown_in_progress; /* are we tearing this one down? */ +#if CHIP_HAS_REV1_XDN() + atomic_t xdn_pending_count; /* cores in phase 1 of drain */ +#endif }; -/* Currently allocated hardwall rectangles */ -static LIST_HEAD(rectangles); /* /proc/tile/hardwall */ static struct proc_dir_entry *hardwall_proc_dir; /* Functions to manage files in /proc/tile/hardwall. */ -static void hardwall_add_proc(struct hardwall_info *rect); -static void hardwall_remove_proc(struct hardwall_info *rect); - -/* - * Guard changes to the hardwall data structures. - * This could be finer grained (e.g. one lock for the list of hardwall - * rectangles, then separate embedded locks for each one's list of tasks), - * but there are subtle correctness issues when trying to start with - * a task's "hardwall" pointer and lock the correct rectangle's embedded - * lock in the presence of a simultaneous deactivation, so it seems - * easier to have a single lock, given that none of these data - * structures are touched very frequently during normal operation. - */ -static DEFINE_SPINLOCK(hardwall_lock); +static void hardwall_add_proc(struct hardwall_info *); +static void hardwall_remove_proc(struct hardwall_info *); /* Allow disabling UDN access. */ -static int udn_disabled; static int __init noudn(char *str) { pr_info("User-space UDN access is disabled\n"); - udn_disabled = 1; + hardwall_types[HARDWALL_UDN].disabled = 1; return 0; } early_param("noudn", noudn); +#ifndef __tilepro__ +/* Allow disabling IDN access. */ +static int __init noidn(char *str) +{ + pr_info("User-space IDN access is disabled\n"); + hardwall_types[HARDWALL_IDN].disabled = 1; + return 0; +} +early_param("noidn", noidn); + +/* Allow disabling IPI access. */ +static int __init noipi(char *str) +{ + pr_info("User-space IPI access is disabled\n"); + hardwall_types[HARDWALL_IPI].disabled = 1; + return 0; +} +early_param("noipi", noipi); +#endif + /* - * Low-level primitives + * Low-level primitives for UDN/IDN */ +#ifdef __tilepro__ +#define mtspr_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_UDN_##name, (val)); } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_MPL_UDN_##name, (val)); } while (0) +#define mfspr_XDN(hwt, name) \ + ((void)(hwt), __insn_mfspr(SPR_UDN_##name)) +#else +#define mtspr_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_UDN_##name, (val)); \ + } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_MPL_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_MPL_UDN_##name, (val)); \ + } while (0) +#define mfspr_XDN(hwt, name) \ + ((hwt)->is_idn ? __insn_mfspr(SPR_IDN_##name) : __insn_mfspr(SPR_UDN_##name)) +#endif + /* Set a CPU bit if the CPU is online. */ #define cpu_online_set(cpu, dst) do { \ if (cpu_online(cpu)) \ @@ -101,7 +199,7 @@ static int contains(struct hardwall_info *r, int x, int y) } /* Compute the rectangle parameters and validate the cpumask. */ -static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) +static int check_rectangle(struct hardwall_info *r, struct cpumask *mask) { int x, y, cpu, ulhc, lrhc; @@ -114,8 +212,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) r->ulhc_y = cpu_y(ulhc); r->width = cpu_x(lrhc) - r->ulhc_x + 1; r->height = cpu_y(lrhc) - r->ulhc_y + 1; - cpumask_copy(&r->cpumask, mask); - r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ /* Width and height must be positive */ if (r->width <= 0 || r->height <= 0) @@ -128,7 +224,7 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return -EINVAL; /* - * Note that offline cpus can't be drained when this UDN + * Note that offline cpus can't be drained when this user network * rectangle eventually closes. We used to detect this * situation and print a warning, but it annoyed users and * they ignored it anyway, so now we just return without a @@ -137,16 +233,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return 0; } -/* Do the two given rectangles overlap on any cpu? */ -static int overlaps(struct hardwall_info *a, struct hardwall_info *b) -{ - return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */ - b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */ - a->ulhc_y + a->height > b->ulhc_y && /* A not above */ - b->ulhc_y + b->height > a->ulhc_y; /* B not above */ -} - - /* * Hardware management of hardwall setup, teardown, trapping, * and enabling/disabling PL0 access to the networks. @@ -157,23 +243,35 @@ enum direction_protect { N_PROTECT = (1 << 0), E_PROTECT = (1 << 1), S_PROTECT = (1 << 2), - W_PROTECT = (1 << 3) + W_PROTECT = (1 << 3), + C_PROTECT = (1 << 4), }; -static void enable_firewall_interrupts(void) +static inline int xdn_which_interrupt(struct hardwall_type *hwt) +{ +#ifndef __tilepro__ + if (hwt->is_idn) + return INT_IDN_FIREWALL; +#endif + return INT_UDN_FIREWALL; +} + +static void enable_firewall_interrupts(struct hardwall_type *hwt) { - arch_local_irq_unmask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(xdn_which_interrupt(hwt)); } -static void disable_firewall_interrupts(void) +static void disable_firewall_interrupts(struct hardwall_type *hwt) { - arch_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_mask_now(xdn_which_interrupt(hwt)); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ -static void hardwall_setup_ipi_func(void *info) +static void hardwall_setup_func(void *info) { struct hardwall_info *r = info; + struct hardwall_type *hwt = r->type; + int cpu = smp_processor_id(); int x = cpu % smp_width; int y = cpu / smp_width; @@ -187,13 +285,12 @@ static void hardwall_setup_ipi_func(void *info) if (y == r->ulhc_y + r->height - 1) bits |= S_PROTECT; BUG_ON(bits == 0); - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits); - enable_firewall_interrupts(); - + mtspr_XDN(hwt, DIRECTION_PROTECT, bits); + enable_firewall_interrupts(hwt); } /* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */ -static void hardwall_setup(struct hardwall_info *r) +static void hardwall_protect_rectangle(struct hardwall_info *r) { int x, y, cpu, delta; struct cpumask rect_cpus; @@ -217,37 +314,50 @@ static void hardwall_setup(struct hardwall_info *r) } /* Then tell all the cpus to set up their protection SPR */ - on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1); + on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1); } void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) { struct hardwall_info *rect; + struct hardwall_type *hwt; struct task_struct *p; struct siginfo info; - int x, y; int cpu = smp_processor_id(); int found_processes; unsigned long flags; - struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + /* Figure out which network trapped. */ + switch (fault_num) { +#ifndef __tilepro__ + case INT_IDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_IDN]; + break; +#endif + case INT_UDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_UDN]; + break; + default: + BUG(); + } + BUG_ON(hwt->disabled); + /* This tile trapped a network access; find the rectangle. */ - x = cpu % smp_width; - y = cpu / smp_width; - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(rect, &rectangles, list) { - if (contains(rect, x, y)) + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(rect, &hwt->list, list) { + if (cpumask_test_cpu(cpu, &rect->cpumask)) break; } /* * It shouldn't be possible not to find this cpu on the * rectangle list, since only cpus in rectangles get hardwalled. - * The hardwall is only removed after the UDN is drained. + * The hardwall is only removed after the user network is drained. */ - BUG_ON(&rect->list == &rectangles); + BUG_ON(&rect->list == &hwt->list); /* * If we already started teardown on this hardwall, don't worry; @@ -255,30 +365,32 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * to quiesce. */ if (rect->teardown_in_progress) { - pr_notice("cpu %d: detected hardwall violation %#lx" + pr_notice("cpu %d: detected %s hardwall violation %#lx" " while teardown already in progress\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + cpu, hwt->name, + (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); goto done; } /* * Kill off any process that is activated in this rectangle. * We bypass security to deliver the signal, since it must be - * one of the activated processes that generated the UDN + * one of the activated processes that generated the user network * message that caused this trap, and all the activated * processes shared a single open file so are pretty tightly * bound together from a security point of view to begin with. */ rect->teardown_in_progress = 1; wmb(); /* Ensure visibility of rectangle before notifying processes. */ - pr_notice("cpu %d: detected hardwall violation %#lx...\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + pr_notice("cpu %d: detected %s hardwall violation %#lx...\n", + cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_HARDWALL; found_processes = 0; - list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { - BUG_ON(p->thread.hardwall != rect); + list_for_each_entry(p, &rect->task_head, + thread.hardwall[hwt->index].list) { + BUG_ON(p->thread.hardwall[hwt->index].info != rect); if (!(p->flags & PF_EXITING)) { found_processes = 1; pr_notice("hardwall: killing %d\n", p->pid); @@ -289,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) pr_notice("hardwall: no associated processes!\n"); done: - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock_irqrestore(&hwt->lock, flags); /* * We have to disable firewall interrupts now, or else when we @@ -298,48 +410,87 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * haven't yet drained the network, and that would allow packets * to cross out of the hardwall region. */ - disable_firewall_interrupts(); + disable_firewall_interrupts(hwt); irq_exit(); set_irq_regs(old_regs); } -/* Allow access from user space to the UDN. */ -void grant_network_mpls(void) +/* Allow access from user space to the user network. */ +void grant_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_0, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_0, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_0, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_0, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_0, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_0, 1); + mtspr_MPL_XDN(hwt, CA_SET_0, 1); #endif } -/* Deny access from user space to the UDN. */ -void restrict_network_mpls(void) +/* Deny access from user space to the user network. */ +void restrict_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_1, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_1, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_1, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_1, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_1, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_1, 1); + mtspr_MPL_XDN(hwt, CA_SET_1, 1); #endif } +/* Restrict or deny as necessary for the task we're switching to. */ +void hardwall_switch_tasks(struct task_struct *prev, + struct task_struct *next) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + if (prev->thread.hardwall[i].info != NULL) { + if (next->thread.hardwall[i].info == NULL) + restrict_hardwall_mpls(&hardwall_types[i]); + } else if (next->thread.hardwall[i].info != NULL) { + grant_hardwall_mpls(&hardwall_types[i]); + } + } +} + +/* Does this task have the right to IPI the given cpu? */ +int hardwall_ipi_valid(int cpu) +{ +#ifdef __tilegx__ + struct hardwall_info *info = + current->thread.hardwall[HARDWALL_IPI].info; + return info && cpumask_test_cpu(cpu, &info->cpumask); +#else + return 0; +#endif +} /* - * Code to create, activate, deactivate, and destroy hardwall rectangles. + * Code to create, activate, deactivate, and destroy hardwall resources. */ -/* Create a hardwall for the given rectangle */ -static struct hardwall_info *hardwall_create( - size_t size, const unsigned char __user *bits) +/* Create a hardwall for the given resource */ +static struct hardwall_info *hardwall_create(struct hardwall_type *hwt, + size_t size, + const unsigned char __user *bits) { - struct hardwall_info *iter, *rect; + struct hardwall_info *iter, *info; struct cpumask mask; unsigned long flags; int rc; @@ -370,55 +521,62 @@ static struct hardwall_info *hardwall_create( } } - /* Allocate a new rectangle optimistically. */ - rect = kmalloc(sizeof(struct hardwall_info), + /* Allocate a new hardwall_info optimistically. */ + info = kmalloc(sizeof(struct hardwall_info), GFP_KERNEL | __GFP_ZERO); - if (rect == NULL) + if (info == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&rect->task_head); + INIT_LIST_HEAD(&info->task_head); + info->type = hwt; /* Compute the rectangle size and validate that it's plausible. */ - rc = setup_rectangle(rect, &mask); - if (rc != 0) { - kfree(rect); - return ERR_PTR(rc); + cpumask_copy(&info->cpumask, &mask); + info->id = find_first_bit(cpumask_bits(&mask), nr_cpumask_bits); + if (hwt->is_xdn) { + rc = check_rectangle(info, &mask); + if (rc != 0) { + kfree(info); + return ERR_PTR(rc); + } } /* Confirm it doesn't overlap and add it to the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(iter, &rectangles, list) { - if (overlaps(iter, rect)) { - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(iter, &hwt->list, list) { + if (cpumask_intersects(&iter->cpumask, &info->cpumask)) { + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); return ERR_PTR(-EBUSY); } } - list_add_tail(&rect->list, &rectangles); - spin_unlock_irqrestore(&hardwall_lock, flags); + list_add_tail(&info->list, &hwt->list); + spin_unlock_irqrestore(&hwt->lock, flags); /* Set up appropriate hardwalling on all affected cpus. */ - hardwall_setup(rect); + if (hwt->is_xdn) + hardwall_protect_rectangle(info); /* Create a /proc/tile/hardwall entry. */ - hardwall_add_proc(rect); + hardwall_add_proc(info); - return rect; + return info; } /* Activate a given hardwall on this cpu for this process. */ -static int hardwall_activate(struct hardwall_info *rect) +static int hardwall_activate(struct hardwall_info *info) { - int cpu, x, y; + int cpu; unsigned long flags; struct task_struct *p = current; struct thread_struct *ts = &p->thread; + struct hardwall_type *hwt; - /* Require a rectangle. */ - if (rect == NULL) + /* Require a hardwall. */ + if (info == NULL) return -ENODATA; - /* Not allowed to activate a rectangle that is being torn down. */ - if (rect->teardown_in_progress) + /* Not allowed to activate a hardwall that is being torn down. */ + if (info->teardown_in_progress) return -EINVAL; /* @@ -428,78 +586,87 @@ static int hardwall_activate(struct hardwall_info *rect) if (cpumask_weight(&p->cpus_allowed) != 1) return -EPERM; - /* Make sure we are bound to a cpu in this rectangle. */ + /* Make sure we are bound to a cpu assigned to this resource. */ cpu = smp_processor_id(); BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); - x = cpu_x(cpu); - y = cpu_y(cpu); - if (!contains(rect, x, y)) + if (!cpumask_test_cpu(cpu, &info->cpumask)) return -EINVAL; /* If we are already bound to this hardwall, it's a no-op. */ - if (ts->hardwall) { - BUG_ON(ts->hardwall != rect); + hwt = info->type; + if (ts->hardwall[hwt->index].info) { + BUG_ON(ts->hardwall[hwt->index].info != info); return 0; } - /* Success! This process gets to use the user networks on this cpu. */ - ts->hardwall = rect; - spin_lock_irqsave(&hardwall_lock, flags); - list_add(&ts->hardwall_list, &rect->task_head); - spin_unlock_irqrestore(&hardwall_lock, flags); - grant_network_mpls(); - printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n", - p->pid, p->comm, cpu); + /* Success! This process gets to use the resource on this cpu. */ + ts->hardwall[hwt->index].info = info; + spin_lock_irqsave(&hwt->lock, flags); + list_add(&ts->hardwall[hwt->index].list, &info->task_head); + spin_unlock_irqrestore(&hwt->lock, flags); + grant_hardwall_mpls(hwt); + printk(KERN_DEBUG "Pid %d (%s) activated for %s hardwall: cpu %d\n", + p->pid, p->comm, hwt->name, cpu); return 0; } /* - * Deactivate a task's hardwall. Must hold hardwall_lock. + * Deactivate a task's hardwall. Must hold lock for hardwall_type. * This method may be called from free_task(), so we don't want to * rely on too many fields of struct task_struct still being valid. * We assume the cpus_allowed, pid, and comm fields are still valid. */ -static void _hardwall_deactivate(struct task_struct *task) +static void _hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { struct thread_struct *ts = &task->thread; if (cpumask_weight(&task->cpus_allowed) != 1) { - pr_err("pid %d (%s) releasing networks with" + pr_err("pid %d (%s) releasing %s hardwall with" " an affinity mask containing %d cpus!\n", - task->pid, task->comm, + task->pid, task->comm, hwt->name, cpumask_weight(&task->cpus_allowed)); BUG(); } - BUG_ON(ts->hardwall == NULL); - ts->hardwall = NULL; - list_del(&ts->hardwall_list); + BUG_ON(ts->hardwall[hwt->index].info == NULL); + ts->hardwall[hwt->index].info = NULL; + list_del(&ts->hardwall[hwt->index].list); if (task == current) - restrict_network_mpls(); + restrict_hardwall_mpls(hwt); } /* Deactivate a task's hardwall. */ -int hardwall_deactivate(struct task_struct *task) +static int hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { unsigned long flags; int activated; - spin_lock_irqsave(&hardwall_lock, flags); - activated = (task->thread.hardwall != NULL); + spin_lock_irqsave(&hwt->lock, flags); + activated = (task->thread.hardwall[hwt->index].info != NULL); if (activated) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); if (!activated) return -EINVAL; - printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n", - task->pid, task->comm, smp_processor_id()); + printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n", + task->pid, task->comm, hwt->name, smp_processor_id()); return 0; } -/* Stop a UDN switch before draining the network. */ -static void stop_udn_switch(void *ignored) +void hardwall_deactivate_all(struct task_struct *task) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) + if (task->thread.hardwall[i].info) + hardwall_deactivate(&hardwall_types[i], task); +} + +/* Stop the switch before draining the network. */ +static void stop_xdn_switch(void *arg) { #if !CHIP_HAS_REV1_XDN() /* Freeze the switch and the demux. */ @@ -507,13 +674,71 @@ static void stop_udn_switch(void *ignored) SPR_UDN_SP_FREEZE__SP_FRZ_MASK | SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK | SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK); +#else + /* + * Drop all packets bound for the core or off the edge. + * We rely on the normal hardwall protection setup code + * to have set the low four bits to trigger firewall interrupts, + * and shift those bits up to trigger "drop on send" semantics, + * plus adding "drop on send to core" for all switches. + * In practice it seems the switches latch the DIRECTION_PROTECT + * SPR so they won't start dropping if they're already + * delivering the last message to the core, but it doesn't + * hurt to enable it here. + */ + struct hardwall_type *hwt = arg; + unsigned long protect = mfspr_XDN(hwt, DIRECTION_PROTECT); + mtspr_XDN(hwt, DIRECTION_PROTECT, (protect | C_PROTECT) << 5); #endif } +static void empty_xdn_demuxes(struct hardwall_type *hwt) +{ +#ifndef __tilepro__ + if (hwt->is_idn) { + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_idn0_receive(); + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_idn1_receive(); + return; + } +#endif + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_udn0_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_udn1_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) + (void) __tile_udn2_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) + (void) __tile_udn3_receive(); +} + /* Drain all the state from a stopped switch. */ -static void drain_udn_switch(void *ignored) +static void drain_xdn_switch(void *arg) { -#if !CHIP_HAS_REV1_XDN() + struct hardwall_info *info = arg; + struct hardwall_type *hwt = info->type; + +#if CHIP_HAS_REV1_XDN() + /* + * The switches have been configured to drop any messages + * destined for cores (or off the edge of the rectangle). + * But the current message may continue to be delivered, + * so we wait until all the cores have finished any pending + * messages before we stop draining. + */ + int pending = mfspr_XDN(hwt, PENDING); + while (pending--) { + empty_xdn_demuxes(hwt); + if (hwt->is_idn) + __tile_idn_send(0); + else + __tile_udn_send(0); + } + atomic_dec(&info->xdn_pending_count); + while (atomic_read(&info->xdn_pending_count)) + empty_xdn_demuxes(hwt); +#else int i; int from_tile_words, ca_count; @@ -533,15 +758,7 @@ static void drain_udn_switch(void *ignored) (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO); /* Empty out demuxes. */ - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) - (void) __tile_udn0_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) - (void) __tile_udn1_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) - (void) __tile_udn2_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) - (void) __tile_udn3_receive(); - BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0); + empty_xdn_demuxes(hwt); /* Empty out catch all. */ ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT); @@ -563,21 +780,25 @@ static void drain_udn_switch(void *ignored) #endif } -/* Reset random UDN state registers at boot up and during hardwall teardown. */ -void reset_network_state(void) +/* Reset random XDN state registers at boot up and during hardwall teardown. */ +static void reset_xdn_network_state(struct hardwall_type *hwt) { -#if !CHIP_HAS_REV1_XDN() - /* Reset UDN coordinates to their standard value */ - unsigned int cpu = smp_processor_id(); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; -#endif - - if (udn_disabled) + if (hwt->disabled) return; + /* Clear out other random registers so we have a clean slate. */ + mtspr_XDN(hwt, DIRECTION_PROTECT, 0); + mtspr_XDN(hwt, AVAIL_EN, 0); + mtspr_XDN(hwt, DEADLOCK_TIMEOUT, 0); + #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + /* Reset UDN coordinates to their standard value */ + { + unsigned int cpu = smp_processor_id(); + unsigned int x = cpu % smp_width; + unsigned int y = cpu / smp_width; + __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + } /* Set demux tags to predefined values and enable them. */ __insn_mtspr(SPR_UDN_TAG_VALID, 0xf); @@ -585,56 +806,50 @@ void reset_network_state(void) __insn_mtspr(SPR_UDN_TAG_1, (1 << 1)); __insn_mtspr(SPR_UDN_TAG_2, (1 << 2)); __insn_mtspr(SPR_UDN_TAG_3, (1 << 3)); -#endif - /* Clear out other random registers so we have a clean slate. */ - __insn_mtspr(SPR_UDN_AVAIL_EN, 0); - __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0); -#if !CHIP_HAS_REV1_XDN() + /* Set other rev0 random registers to a clean state. */ __insn_mtspr(SPR_UDN_REFILL_EN, 0); __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0); __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0); -#endif /* Start the switch and demux. */ -#if !CHIP_HAS_REV1_XDN() __insn_mtspr(SPR_UDN_SP_FREEZE, 0); #endif } -/* Restart a UDN switch after draining. */ -static void restart_udn_switch(void *ignored) +void reset_network_state(void) { - reset_network_state(); - - /* Disable firewall interrupts. */ - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0); - disable_firewall_interrupts(); + reset_xdn_network_state(&hardwall_types[HARDWALL_UDN]); +#ifndef __tilepro__ + reset_xdn_network_state(&hardwall_types[HARDWALL_IDN]); +#endif } -/* Build a struct cpumask containing all valid tiles in bounding rectangle. */ -static void fill_mask(struct hardwall_info *r, struct cpumask *result) +/* Restart an XDN switch after draining. */ +static void restart_xdn_switch(void *arg) { - int x, y, cpu; + struct hardwall_type *hwt = arg; - cpumask_clear(result); +#if CHIP_HAS_REV1_XDN() + /* One last drain step to avoid races with injection and draining. */ + empty_xdn_demuxes(hwt); +#endif - cpu = r->ulhc_y * smp_width + r->ulhc_x; - for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) { - for (x = 0; x < r->width; ++x, ++cpu) - cpu_online_set(cpu, result); - } + reset_xdn_network_state(hwt); + + /* Disable firewall interrupts. */ + disable_firewall_interrupts(hwt); } /* Last reference to a hardwall is gone, so clear the network. */ -static void hardwall_destroy(struct hardwall_info *rect) +static void hardwall_destroy(struct hardwall_info *info) { struct task_struct *task; + struct hardwall_type *hwt; unsigned long flags; - struct cpumask mask; - /* Make sure this file actually represents a rectangle. */ - if (rect == NULL) + /* Make sure this file actually represents a hardwall. */ + if (info == NULL) return; /* @@ -644,39 +859,53 @@ static void hardwall_destroy(struct hardwall_info *rect) * deactivate any remaining tasks before freeing the * hardwall_info object itself. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(task, &rect->task_head, thread.hardwall_list) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); - - /* Drain the UDN. */ - printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n", - rect->width, rect->height, rect->ulhc_x, rect->ulhc_y); - fill_mask(rect, &mask); - on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1); - on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1); + hwt = info->type; + info->teardown_in_progress = 1; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(task, &info->task_head, + thread.hardwall[hwt->index].list) + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); + + if (hwt->is_xdn) { + /* Configure the switches for draining the user network. */ + printk(KERN_DEBUG + "Clearing %s hardwall rectangle %dx%d %d,%d\n", + hwt->name, info->width, info->height, + info->ulhc_x, info->ulhc_y); + on_each_cpu_mask(&info->cpumask, stop_xdn_switch, hwt, 1); + + /* Drain the network. */ +#if CHIP_HAS_REV1_XDN() + atomic_set(&info->xdn_pending_count, + cpumask_weight(&info->cpumask)); + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 0); +#else + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 1); +#endif - /* Restart switch and disable firewall. */ - on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Restart switch and disable firewall. */ + on_each_cpu_mask(&info->cpumask, restart_xdn_switch, hwt, 1); + } /* Remove the /proc/tile/hardwall entry. */ - hardwall_remove_proc(rect); - - /* Now free the rectangle from the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - BUG_ON(!list_empty(&rect->task_head)); - list_del(&rect->list); - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + hardwall_remove_proc(info); + + /* Now free the hardwall from the list. */ + spin_lock_irqsave(&hwt->lock, flags); + BUG_ON(!list_empty(&info->task_head)); + list_del(&info->list); + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); } static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *rect = sf->private; + struct hardwall_info *info = sf->private; char buf[256]; - int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask); buf[rc++] = '\n'; seq_write(sf, buf, rc); return 0; @@ -695,31 +924,45 @@ static const struct file_operations hardwall_proc_fops = { .release = single_release, }; -static void hardwall_add_proc(struct hardwall_info *rect) +static void hardwall_add_proc(struct hardwall_info *info) { char buf[64]; - snprintf(buf, sizeof(buf), "%d", rect->id); - proc_create_data(buf, 0444, hardwall_proc_dir, - &hardwall_proc_fops, rect); + snprintf(buf, sizeof(buf), "%d", info->id); + proc_create_data(buf, 0444, info->type->proc_dir, + &hardwall_proc_fops, info); } -static void hardwall_remove_proc(struct hardwall_info *rect) +static void hardwall_remove_proc(struct hardwall_info *info) { char buf[64]; - snprintf(buf, sizeof(buf), "%d", rect->id); - remove_proc_entry(buf, hardwall_proc_dir); + snprintf(buf, sizeof(buf), "%d", info->id); + remove_proc_entry(buf, info->type->proc_dir); } int proc_pid_hardwall(struct task_struct *task, char *buffer) { - struct hardwall_info *rect = task->thread.hardwall; - return rect ? sprintf(buffer, "%d\n", rect->id) : 0; + int i; + int n = 0; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_info *info = task->thread.hardwall[i].info; + if (info) + n += sprintf(&buffer[n], "%s: %d\n", + info->type->name, info->id); + } + return n; } void proc_tile_hardwall_init(struct proc_dir_entry *root) { - if (!udn_disabled) - hardwall_proc_dir = proc_mkdir("hardwall", root); + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_type *hwt = &hardwall_types[i]; + if (hwt->disabled) + continue; + if (hardwall_proc_dir == NULL) + hardwall_proc_dir = proc_mkdir("hardwall", root); + hwt->proc_dir = proc_mkdir(hwt->name, hardwall_proc_dir); + } } @@ -729,34 +972,45 @@ void proc_tile_hardwall_init(struct proc_dir_entry *root) static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; + int minor = iminor(file->f_mapping->host); + struct hardwall_type* hwt; if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE) return -EINVAL; + BUILD_BUG_ON(HARDWALL_TYPES != _HARDWALL_TYPES); + BUILD_BUG_ON(HARDWALL_TYPES != + sizeof(hardwall_types)/sizeof(hardwall_types[0])); + + if (minor < 0 || minor >= HARDWALL_TYPES) + return -EINVAL; + hwt = &hardwall_types[minor]; + WARN_ON(info && hwt != info->type); + switch (_IOC_NR(a)) { case _HARDWALL_CREATE: - if (udn_disabled) + if (hwt->disabled) return -ENOSYS; - if (rect != NULL) + if (info != NULL) return -EALREADY; - rect = hardwall_create(_IOC_SIZE(a), - (const unsigned char __user *)b); - if (IS_ERR(rect)) - return PTR_ERR(rect); - file->private_data = rect; + info = hardwall_create(hwt, _IOC_SIZE(a), + (const unsigned char __user *)b); + if (IS_ERR(info)) + return PTR_ERR(info); + file->private_data = info; return 0; case _HARDWALL_ACTIVATE: - return hardwall_activate(rect); + return hardwall_activate(info); case _HARDWALL_DEACTIVATE: - if (current->thread.hardwall != rect) + if (current->thread.hardwall[hwt->index].info != info) return -EINVAL; - return hardwall_deactivate(current); + return hardwall_deactivate(hwt, current); case _HARDWALL_GET_ID: - return rect ? rect->id : -EINVAL; + return info ? info->id : -EINVAL; default: return -EINVAL; @@ -775,26 +1029,28 @@ static long hardwall_compat_ioctl(struct file *file, /* The user process closed the file; revoke access to user networks. */ static int hardwall_flush(struct file *file, fl_owner_t owner) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; struct task_struct *task, *tmp; unsigned long flags; - if (rect) { + if (info) { /* * NOTE: if multiple threads are activated on this hardwall * file, the other threads will continue having access to the - * UDN until they are context-switched out and back in again. + * user network until they are context-switched out and back + * in again. * * NOTE: A NULL files pointer means the task is being torn * down, so in that case we also deactivate it. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry_safe(task, tmp, &rect->task_head, - thread.hardwall_list) { + struct hardwall_type *hwt = info->type; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry_safe(task, tmp, &info->task_head, + thread.hardwall[hwt->index].list) { if (task->files == owner || task->files == NULL) - _hardwall_deactivate(task); + _hardwall_deactivate(hwt, task); } - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock_irqrestore(&hwt->lock, flags); } return 0; @@ -824,11 +1080,11 @@ static int __init dev_hardwall_init(void) int rc; dev_t dev; - rc = alloc_chrdev_region(&dev, 0, 1, "hardwall"); + rc = alloc_chrdev_region(&dev, 0, HARDWALL_TYPES, "hardwall"); if (rc < 0) return rc; cdev_init(&hardwall_dev, &dev_hardwall_fops); - rc = cdev_add(&hardwall_dev, dev, 1); + rc = cdev_add(&hardwall_dev, dev, HARDWALL_TYPES); if (rc < 0) return rc; diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 1a39b7c..f71bfee 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -69,7 +69,7 @@ ENTRY(_start) } { moveli lr, lo16(1f) - move r5, zero + moveli r5, CTX_PAGE_FLAG } { auli lr, lr, ha16(1f) @@ -141,11 +141,11 @@ ENTRY(empty_zero_page) .macro PTE va, cpa, bits1, no_org=0 .ifeq \no_org - .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(\va) * HV_PTE_SIZE .endif .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) - .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) + .word (\bits1) | (HV_CPA_TO_PTFN(\cpa) << (HV_PTE_INDEX_PTFN - 32)) .endm __PAGE_ALIGNED_DATA @@ -166,7 +166,7 @@ ENTRY(swapper_pg_dir) /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) - .org swapper_pg_dir + HV_L1_SIZE + .org swapper_pg_dir + PGDIR_SIZE END(swapper_pg_dir) /* diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 6bc3a93..f9a2734 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -114,7 +114,7 @@ ENTRY(_start) shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) } { - move r3, zero + moveli r3, CTX_PAGE_FLAG j hv_install_context } 1: @@ -210,19 +210,19 @@ ENTRY(empty_zero_page) .macro PTE cpa, bits1 .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ - (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + (\bits1) | (HV_CPA_TO_PTFN(\cpa) << HV_PTE_INDEX_PTFN) .endm __PAGE_ALIGNED_DATA .align PAGE_SIZE ENTRY(swapper_pg_dir) - .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(PAGE_OFFSET) * HV_PTE_SIZE .Lsv_data_pmd: .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ - .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(MEM_SV_START) * HV_PTE_SIZE .Lsv_code_pmd: .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ - .org swapper_pg_dir + HV_L0_SIZE + .org swapper_pg_dir + SIZEOF_PGD END(swapper_pg_dir) .align HV_PAGE_TABLE_ALIGN @@ -233,11 +233,11 @@ ENTRY(temp_data_pmd) * permissions later. */ .set addr, 0 - .rept HV_L1_ENTRIES + .rept PTRS_PER_PMD PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE - .set addr, addr + HV_PAGE_SIZE_LARGE + .set addr, addr + HPAGE_SIZE .endr - .org temp_data_pmd + HV_L1_SIZE + .org temp_data_pmd + SIZEOF_PMD END(temp_data_pmd) .align HV_PAGE_TABLE_ALIGN @@ -248,11 +248,11 @@ ENTRY(temp_code_pmd) * permissions later. */ .set addr, 0 - .rept HV_L1_ENTRIES + .rept PTRS_PER_PMD PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE - .set addr, addr + HV_PAGE_SIZE_LARGE + .set addr, addr + HPAGE_SIZE .endr - .org temp_code_pmd + HV_L1_SIZE + .org temp_code_pmd + SIZEOF_PMD END(temp_code_pmd) /* diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds index 2b7cd0a..d44c5a6 100644 --- a/arch/tile/kernel/hvglue.lds +++ b/arch/tile/kernel/hvglue.lds @@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0; hv_inquire_realpa = TEXT_OFFSET + 0x106c0; hv_flush_all = TEXT_OFFSET + 0x106e0; hv_get_ipi_pte = TEXT_OFFSET + 0x10700; -hv_glue_internals = TEXT_OFFSET + 0x10720; +hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; +hv_glue_internals = TEXT_OFFSET + 0x10740; diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 30ae76e..7c06d59 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -220,7 +220,9 @@ intvec_\vecname: * This routine saves just the first four registers, plus the * stack context so we can do proper backtracing right away, * and defers to handle_interrupt to save the rest. - * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum, + * and needs sp set to its final location at the bottom of + * the stack frame. */ addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) wh64 r0 /* cache line 7 */ @@ -450,23 +452,6 @@ intvec_\vecname: push_reg r5, r52 st r52, r4 - /* Load tp with our per-cpu offset. */ -#ifdef CONFIG_SMP - { - mfspr r20, SPR_SYSTEM_SAVE_K_0 - moveli r21, hw2_last(__per_cpu_offset) - } - { - shl16insli r21, r21, hw1(__per_cpu_offset) - bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 - } - shl16insli r21, r21, hw0(__per_cpu_offset) - shl3add r20, r20, r21 - ld tp, r20 -#else - move tp, zero -#endif - /* * If we will be returning to the kernel, we will need to * reset the interrupt masks to the state they had before. @@ -489,6 +474,44 @@ intvec_\vecname: .endif st r21, r32 + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + * + * It's important that no code before this point touch memory + * other than our own stack (to keep the invariant that this + * is all that gets touched under ICS), and that no code after + * this point reference any interrupt-specific SPR, in particular + * the EX_CONTEXT_K_ values. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, hw2_last(__per_cpu_offset) + } + { + shl16insli r21, r21, hw1(__per_cpu_offset) + bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + } + shl16insli r21, r21, hw0(__per_cpu_offset) + shl3add r20, r20, r21 + ld tp, r20 +#else + move tp, zero +#endif + #ifdef __COLLECT_LINKER_FEEDBACK__ /* * Notify the feedback routines that we were in the @@ -513,21 +536,6 @@ intvec_\vecname: #endif /* - * we've captured enough state to the stack (including in - * particular our EX_CONTEXT state) that we can now release - * the interrupt critical section and replace it with our - * standard "interrupts disabled" mask value. This allows - * synchronous interrupts (and profile interrupts) to punch - * through from this point onwards. - */ - .ifc \function,handle_nmi - IRQ_DISABLE_ALL(r20) - .else - IRQ_DISABLE(r20, r21) - .endif - mtspr INTERRUPT_CRITICAL_SECTION, zero - - /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. */ @@ -736,9 +744,10 @@ STD_ENTRY(interrupt_return) beqzt r30, .Lrestore_regs j 3f 2: TRACE_IRQS_ON + IRQ_ENABLE_LOAD(r20, r21) movei r0, 1 mtspr INTERRUPT_CRITICAL_SECTION, r0 - IRQ_ENABLE(r20, r21) + IRQ_ENABLE_APPLY(r20, r21) beqzt r30, .Lrestore_regs 3: @@ -755,7 +764,6 @@ STD_ENTRY(interrupt_return) * that will save some cycles if this turns out to be a syscall. */ .Lrestore_regs: - FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ /* * Rotate so we have one high bit and one low bit to test. @@ -1249,7 +1257,7 @@ STD_ENTRY(fill_ra_stack) int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault - int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index 6255f2e..f0b54a9 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -31,6 +31,8 @@ #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/checksum.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> #include <hv/hypervisor.h> @@ -222,11 +224,22 @@ struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order) return alloc_pages_node(0, gfp_mask, order); } +/* + * Address range in which pa=va mapping is set in setup_quasi_va_is_pa(). + * For tilepro, PAGE_OFFSET is used since this is the largest possbile value + * for tilepro, while for tilegx, we limit it to entire middle level page + * table which we assume has been allocated and is undoubtedly large enough. + */ +#ifndef __tilegx__ +#define QUASI_VA_IS_PA_ADDR_RANGE PAGE_OFFSET +#else +#define QUASI_VA_IS_PA_ADDR_RANGE PGDIR_SIZE +#endif + static void setup_quasi_va_is_pa(void) { - HV_PTE *pgtable; HV_PTE pte; - int i; + unsigned long i; /* * Flush our TLB to prevent conflicts between the previous contents @@ -234,16 +247,22 @@ static void setup_quasi_va_is_pa(void) */ local_flush_tlb_all(); - /* setup VA is PA, at least up to PAGE_OFFSET */ - - pgtable = (HV_PTE *)current->mm->pgd; + /* + * setup VA is PA, at least up to QUASI_VA_IS_PA_ADDR_RANGE. + * Note here we assume that level-1 page table is defined by + * HPAGE_SIZE. + */ pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - - for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + for (i = 0; i < (QUASI_VA_IS_PA_ADDR_RANGE >> HPAGE_SHIFT); i++) { + unsigned long vaddr = i << HPAGE_SHIFT; + pgd_t *pgd = pgd_offset(current->mm, vaddr); + pud_t *pud = pud_offset(pgd, vaddr); + pte_t *ptep = (pte_t *) pmd_offset(pud, vaddr); unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) - __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + __set_pte(ptep, pfn_pte(pfn, pte)); } } @@ -251,6 +270,7 @@ static void setup_quasi_va_is_pa(void) void machine_kexec(struct kimage *image) { void *reboot_code_buffer; + pte_t *ptep; void (*rnk)(unsigned long, void *, unsigned long) __noreturn; @@ -266,8 +286,10 @@ void machine_kexec(struct kimage *image) */ homecache_change_page_home(image->control_code_page, 0, smp_processor_id()); - reboot_code_buffer = vmap(&image->control_code_page, 1, 0, - __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE)); + reboot_code_buffer = page_address(image->control_code_page); + BUG_ON(reboot_code_buffer == NULL); + ptep = virt_to_pte(NULL, (unsigned long)reboot_code_buffer); + __set_pte(ptep, pte_mkexec(*ptep)); memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); __flush_icache_range( diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index 98d4769..001cbfa 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -159,7 +159,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, switch (ELF_R_TYPE(rel[i].r_info)) { -#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value))) +#ifdef __LITTLE_ENDIAN +# define MUNGE(func) \ + (*location = ((*location & ~func(-1)) | func(value))) +#else +/* + * Instructions are always little-endian, so when we read them as data, + * we have to swap them around before and after modifying them. + */ +# define MUNGE(func) \ + (*location = swab64((swab64(*location) & ~func(-1)) | func(value))) +#endif #ifndef __tilegx__ case R_TILE_32: diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 446a7f5..dafc447 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -22,6 +22,7 @@ #include <linux/proc_fs.h> #include <linux/sysctl.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <linux/mman.h> #include <asm/unaligned.h> #include <asm/pgtable.h> diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index f572c19..ba1023d 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -128,10 +128,10 @@ void arch_release_thread_info(struct thread_info *info) * Calling deactivate here just frees up the data structures. * If the task we're freeing held the last reference to a * hardwall fd, it would have been released prior to this point - * anyway via exit_files(), and "hardwall" would be NULL by now. + * anyway via exit_files(), and the hardwall_task.info pointers + * would be NULL by now. */ - if (info->task->thread.hardwall) - hardwall_deactivate(info->task); + hardwall_deactivate_all(info->task); #endif if (step_state) { @@ -245,7 +245,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ - p->thread.hardwall = NULL; + memset(&p->thread.hardwall[0], 0, + sizeof(struct hardwall_task) * HARDWALL_TYPES); #endif @@ -515,12 +516,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ - if (prev->thread.hardwall != NULL) { - if (next->thread.hardwall == NULL) - restrict_network_mpls(); - } else if (next->thread.hardwall != NULL) { - grant_network_mpls(); - } + hardwall_switch_tasks(prev, next); #endif /* diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel_32.S index 010b418..010b418 100644 --- a/arch/tile/kernel/relocate_kernel.S +++ b/arch/tile/kernel/relocate_kernel_32.S diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S new file mode 100644 index 0000000..1c09a4f --- /dev/null +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -0,0 +1,260 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * copy new kernel into place and then call hv_reexec + * + */ + +#include <linux/linkage.h> +#include <arch/chip.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +#undef RELOCATE_NEW_KERNEL_VERBOSE + +STD_ENTRY(relocate_new_kernel) + + move r30, r0 /* page list */ + move r31, r1 /* address of page we are on */ + move r32, r2 /* start address of new kernel */ + + shrui r1, r1, PAGE_SHIFT + addi r1, r1, 1 + shli sp, r1, PAGE_SHIFT + addi sp, sp, -8 + /* we now have a stack (whether we need one or not) */ + + moveli r40, hw2_last(hv_console_putc) + shl16insli r40, r40, hw1(hv_console_putc) + shl16insli r40, r40, hw0(hv_console_putc) + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'r' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'n' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'k' + jalr r40 + + moveli r0, '\n' + jalr r40 +#endif + + /* + * Throughout this code r30 is pointer to the element of page + * list we are working on. + * + * Normally we get to the next element of the page list by + * incrementing r30 by eight. The exception is if the element + * on the page list is an IND_INDIRECTION in which case we use + * the element with the low bits masked off as the new value + * of r30. + * + * To get this started, we need the value passed to us (which + * will always be an IND_INDIRECTION) in memory somewhere with + * r30 pointing at it. To do that, we push the value passed + * to us on the stack and make r30 point to it. + */ + + st sp, r30 + move r30, sp + addi sp, sp, -16 + +#if CHIP_HAS_CBOX_HOME_MAP() + /* + * On TILE-GX, we need to flush all tiles' caches, since we may + * have been doing hash-for-home caching there. Note that we + * must do this _after_ we're completely done modifying any memory + * other than our output buffer (which we know is locally cached). + * We want the caches to be fully clean when we do the reexec, + * because the hypervisor is going to do this flush again at that + * point, and we don't want that second flush to overwrite any memory. + */ + { + move r0, zero /* cache_pa */ + moveli r1, hw2_last(HV_FLUSH_EVICT_L2) + } + { + shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) + movei r2, -1 /* cache_cpumask; -1 means all client tiles */ + } + { + shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) /* cache_control */ + move r3, zero /* tlb_va */ + } + { + move r4, zero /* tlb_length */ + move r5, zero /* tlb_pgsize */ + } + { + move r6, zero /* tlb_cpumask */ + move r7, zero /* asids */ + } + { + moveli r20, hw2_last(hv_flush_remote) + move r8, zero /* asidcount */ + } + shl16insli r20, r20, hw1(hv_flush_remote) + shl16insli r20, r20, hw0(hv_flush_remote) + + jalr r20 +#endif + + /* r33 is destination pointer, default to zero */ + + moveli r33, 0 + +.Lloop: ld r10, r30 + + andi r9, r10, 0xf /* low 4 bits tell us what type it is */ + xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */ + + cmpeqi r0, r9, 0x1 /* IND_DESTINATION */ + beqzt r0, .Ltry2 + + move r33, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'd' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + +.Ltry2: + cmpeqi r0, r9, 0x2 /* IND_INDIRECTION */ + beqzt r0, .Ltry4 + + move r30, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'i' + jalr r40 +#endif + + j .Lloop + +.Ltry4: + cmpeqi r0, r9, 0x4 /* IND_DONE */ + beqzt r0, .Ltry8 + + mf + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'D' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + move r0, r32 + + moveli r41, hw2_last(hv_reexec) + shl16insli r41, r41, hw1(hv_reexec) + shl16insli r41, r41, hw0(hv_reexec) + + jalr r41 + + /* we should not get here */ + + moveli r0, '?' + jalr r40 + moveli r0, '\n' + jalr r40 + + j .Lhalt + +.Ltry8: cmpeqi r0, r9, 0x8 /* IND_SOURCE */ + beqz r0, .Lerr /* unknown type */ + + /* copy page at r10 to page at r33 */ + + move r11, r33 + + moveli r0, hw2_last(PAGE_SIZE) + shl16insli r0, r0, hw1(PAGE_SIZE) + shl16insli r0, r0, hw0(PAGE_SIZE) + add r33, r33, r0 + + /* copy word at r10 to word at r11 until r11 equals r33 */ + + /* We know page size must be multiple of 8, so we can unroll + * 8 times safely without any edge case checking. + * + * Issue a flush of the destination every 8 words to avoid + * incoherence when starting the new kernel. (Now this is + * just good paranoia because the hv_reexec call will also + * take care of this.) + */ + +1: + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0 } + { flush r11 ; addi r11, r11, 8 } + + cmpeq r0, r33, r11 + beqzt r0, 1b + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 's' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + + +.Lerr: moveli r0, 'e' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, '\n' + jalr r40 +.Lhalt: + moveli r41, hw2_last(hv_halt) + shl16insli r41, r41, hw1(hv_halt) + shl16insli r41, r41, hw0(hv_halt) + + jalr r41 + STD_ENDPROC(relocate_new_kernel) + + .section .rodata,"a" + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long .Lend_relocate_new_kernel - relocate_new_kernel diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 98d80eb..6098ccc 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -28,6 +28,7 @@ #include <linux/highmem.h> #include <linux/smp.h> #include <linux/timex.h> +#include <linux/hugetlb.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> @@ -49,9 +50,6 @@ char chip_model[64] __write_once; struct pglist_data node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -/* We only create bootmem data on node 0. */ -static bootmem_data_t __initdata node0_bdata; - /* Information on the NUMA nodes that we compute early */ unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; @@ -534,37 +532,96 @@ static void __init setup_memory(void) #endif } -static void __init setup_bootmem_allocator(void) +/* + * On 32-bit machines, we only put bootmem on the low controller, + * since PAs > 4GB can't be used in bootmem. In principle one could + * imagine, e.g., multiple 1 GB controllers all of which could support + * bootmem, but in practice using controllers this small isn't a + * particularly interesting scenario, so we just keep it simple and + * use only the first controller for bootmem on 32-bit machines. + */ +static inline int node_has_bootmem(int nid) { - unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; +#ifdef CONFIG_64BIT + return 1; +#else + return nid == 0; +#endif +} - /* Provide a node 0 bdata. */ - NODE_DATA(0)->bdata = &node0_bdata; +static inline unsigned long alloc_bootmem_pfn(int nid, + unsigned long size, + unsigned long goal) +{ + void *kva = __alloc_bootmem_node(NODE_DATA(nid), size, + PAGE_SIZE, goal); + unsigned long pfn = kaddr_to_pfn(kva); + BUG_ON(goal && PFN_PHYS(pfn) != goal); + return pfn; +} -#ifdef CONFIG_PCI - /* Don't let boot memory alias the PCI region. */ - last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); +static void __init setup_bootmem_allocator_node(int i) +{ + unsigned long start, end, mapsize, mapstart; + + if (node_has_bootmem(i)) { + NODE_DATA(i)->bdata = &bootmem_node_data[i]; + } else { + /* Share controller zero's bdata for now. */ + NODE_DATA(i)->bdata = &bootmem_node_data[0]; + return; + } + + /* Skip up to after the bss in node 0. */ + start = (i == 0) ? min_low_pfn : node_start_pfn[i]; + + /* Only lowmem, if we're a HIGHMEM build. */ +#ifdef CONFIG_HIGHMEM + end = node_lowmem_end_pfn[i]; #else - last_alloc_pfn = max_low_pfn; + end = node_end_pfn[i]; #endif - /* - * Initialize the boot-time allocator (with low memory only): - * The first argument says where to put the bitmap, and the - * second says where the end of allocatable memory is. - */ - bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); + /* No memory here. */ + if (end == start) + return; + + /* Figure out where the bootmem bitmap is located. */ + mapsize = bootmem_bootmap_pages(end - start); + if (i == 0) { + /* Use some space right before the heap on node 0. */ + mapstart = start; + start += mapsize; + } else { + /* Allocate bitmap on node 0 to avoid page table issues. */ + mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0); + } + /* Initialize a node. */ + init_bootmem_node(NODE_DATA(i), mapstart, start, end); + + /* Free all the space back into the allocator. */ + free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); + +#if defined(CONFIG_PCI) /* - * Let the bootmem allocator use all the space we've given it - * except for its own bitmap. + * Throw away any memory aliased by the PCI region. FIXME: this + * is a temporary hack to work around bug 10502, and needs to be + * fixed properly. */ - first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); - if (first_alloc_pfn >= last_alloc_pfn) - early_panic("Not enough memory on controller 0 for bootmem\n"); + if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) + reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), + PFN_PHYS(pci_reserve_end_pfn - + pci_reserve_start_pfn), + BOOTMEM_EXCLUSIVE); +#endif +} - free_bootmem(PFN_PHYS(first_alloc_pfn), - PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); +static void __init setup_bootmem_allocator(void) +{ + int i; + for (i = 0; i < MAX_NUMNODES; ++i) + setup_bootmem_allocator_node(i); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) @@ -595,14 +652,6 @@ static int __init percpu_size(void) return size; } -static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) -{ - void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); - unsigned long pfn = kaddr_to_pfn(kva); - BUG_ON(goal && PFN_PHYS(pfn) != goal); - return pfn; -} - static void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = { 0 }; @@ -640,21 +689,22 @@ static void __init zone_sizes_init(void) * though, there'll be no lowmem, so we just alloc_bootmem * the memmap. There will be no percpu memory either. */ - if (__pfn_to_highbits(start) == 0) { - /* In low PAs, allocate via bootmem. */ + if (i != 0 && cpu_isset(i, isolnodes)) { + node_memmap_pfn[i] = + alloc_bootmem_pfn(0, memmap_size, 0); + BUG_ON(node_percpu[i] != 0); + } else if (node_has_bootmem(start)) { unsigned long goal = 0; node_memmap_pfn[i] = - alloc_bootmem_pfn(memmap_size, goal); + alloc_bootmem_pfn(i, memmap_size, 0); if (kdata_huge) goal = PFN_PHYS(lowmem_end) - node_percpu[i]; if (node_percpu[i]) node_percpu_pfn[i] = - alloc_bootmem_pfn(node_percpu[i], goal); - } else if (cpu_isset(i, isolnodes)) { - node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); - BUG_ON(node_percpu[i] != 0); + alloc_bootmem_pfn(i, node_percpu[i], + goal); } else { - /* In high PAs, just reserve some pages. */ + /* In non-bootmem zones, just reserve some pages. */ node_memmap_pfn[i] = node_free_pfn[i]; node_free_pfn[i] += PFN_UP(memmap_size); if (!kdata_huge) { @@ -678,16 +728,9 @@ static void __init zone_sizes_init(void) zones_size[ZONE_NORMAL] = end - start; #endif - /* - * Everyone shares node 0's bootmem allocator, but - * we use alloc_remap(), above, to put the actual - * struct page array on the individual controllers, - * which is most of the data that we actually care about. - * We can't place bootmem allocators on the other - * controllers since the bootmem allocator can only - * operate on 32-bit physical addresses. - */ - NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; + /* Take zone metadata from controller 0 if we're isolnode. */ + if (node_isset(i, isolnodes)) + NODE_DATA(i)->bdata = &bootmem_node_data[0]; free_area_init_node(i, zones_size, start, NULL); printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", @@ -870,6 +913,22 @@ subsys_initcall(topology_init); #endif /* CONFIG_NUMA */ +/* + * Initialize hugepage support on this cpu. We do this on all cores + * early in boot: before argument parsing for the boot cpu, and after + * argument parsing but before the init functions run on the secondaries. + * So the values we set up here in the hypervisor may be overridden on + * the boot cpu as arguments are parsed. + */ +static __cpuinit void init_super_pages(void) +{ +#ifdef CONFIG_HUGETLB_SUPER_PAGES + int i; + for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i) + hv_set_pte_super_shift(i, huge_shift[i]); +#endif +} + /** * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. * @boot: Is this the boot cpu? @@ -924,6 +983,8 @@ void __cpuinit setup_cpu(int boot) /* Reset the network state on this cpu. */ reset_network_state(); #endif + + init_super_pages(); } #ifdef CONFIG_BLK_DEV_INITRD @@ -1412,13 +1473,13 @@ void __init setup_per_cpu_areas(void) for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) { /* Update the vmalloc mapping and page home. */ - pte_t *ptep = - virt_to_pte(NULL, (unsigned long)ptr + i); + unsigned long addr = (unsigned long)ptr + i; + pte_t *ptep = virt_to_pte(NULL, addr); pte_t pte = *ptep; BUG_ON(pfn != pte_pfn(pte)); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); pte = set_remote_cache_cpu(pte, cpu); - set_pte(ptep, pte); + set_pte_at(&init_mm, addr, ptep, pte); /* Update the lowmem mapping for consistency. */ lowmem_va = (unsigned long)pfn_to_kaddr(pfn); @@ -1431,7 +1492,7 @@ void __init setup_per_cpu_areas(void) BUG_ON(pte_huge(*ptep)); } BUG_ON(pfn != pte_pfn(*ptep)); - set_pte(ptep, pte); + set_pte_at(&init_mm, lowmem_va, ptep, pte); } } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 89529c9..27742e8 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -172,9 +172,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tilepro_bundle_bits) 0; } -#ifndef __LITTLE_ENDIAN -# error We assume little-endian representation with copy_xx_user size 2 here -#endif /* Handle unaligned load/store */ if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { unsigned short val_16; @@ -195,8 +192,19 @@ static tile_bundle_bits rewrite_load_store_unaligned( state->update = 1; } } else { + unsigned short val_16; val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; - err = copy_to_user(addr, &val, size); + switch (size) { + case 2: + val_16 = val; + err = copy_to_user(addr, &val_16, sizeof(val_16)); + break; + case 4: + err = copy_to_user(addr, &val, sizeof(val)); + break; + default: + BUG(); + } } if (err) { diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 91da0f7..cbc73a8 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -203,7 +203,7 @@ void __init ipi_init(void) if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) panic("Failed to initialize IPI for cpu %d\n", cpu); - offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; + offset = PFN_PHYS(pte_pfn(pte)); ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte); } #endif diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index cb44ba7..b08095b 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -32,11 +32,17 @@ #include <asm/syscalls.h> #include <asm/pgtable.h> #include <asm/homecache.h> +#include <asm/cachectl.h> #include <arch/chip.h> -SYSCALL_DEFINE0(flush_cache) +SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, + unsigned long, flags) { - homecache_evict(cpumask_of(smp_processor_id())); + if (flags & DCACHE) + homecache_evict(cpumask_of(smp_processor_id())); + if (flags & ICACHE) + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm), + 0, 0, 0, NULL, NULL, 0); return 0; } diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index 71ae728..e25b0a8 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -93,6 +93,10 @@ HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(cpumod_part, HV_CONFSTR_CPUMOD_PART_NUM) +HV_CONF_ATTR(cpumod_serial, HV_CONFSTR_CPUMOD_SERIAL_NUM) +HV_CONF_ATTR(cpumod_revision, HV_CONFSTR_CPUMOD_REV) +HV_CONF_ATTR(cpumod_description,HV_CONFSTR_CPUMOD_DESC) HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) static struct attribute *board_attrs[] = { @@ -104,6 +108,10 @@ static struct attribute *board_attrs[] = { &dev_attr_mezz_serial.attr, &dev_attr_mezz_revision.attr, &dev_attr_mezz_description.attr, + &dev_attr_cpumod_part.attr, + &dev_attr_cpumod_serial.attr, + &dev_attr_cpumod_revision.attr, + &dev_attr_cpumod_description.attr, &dev_attr_switch_control.attr, NULL }; diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c index a5f241c..3fd54d5 100644 --- a/arch/tile/kernel/tlb.c +++ b/arch/tile/kernel/tlb.c @@ -15,6 +15,7 @@ #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/homecache.h> #include <hv/hypervisor.h> @@ -49,25 +50,25 @@ void flush_tlb_current_task(void) flush_tlb_mm(current->mm); } -void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, +void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long va) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; flush_remote(0, cache, mm_cpumask(mm), va, size, size, mm_cpumask(mm), NULL, 0); } -void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { flush_tlb_page_mm(vma, vma->vm_mm, va); } EXPORT_SYMBOL(flush_tlb_page); -void flush_tlb_range(const struct vm_area_struct *vma, +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); struct mm_struct *mm = vma->vm_mm; int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 73cff81..5b19a23 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -195,6 +195,25 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) return 1; } +static const char *const int_name[] = { + [INT_MEM_ERROR] = "Memory error", + [INT_ILL] = "Illegal instruction", + [INT_GPV] = "General protection violation", + [INT_UDN_ACCESS] = "UDN access", + [INT_IDN_ACCESS] = "IDN access", +#if CHIP_HAS_SN() + [INT_SN_ACCESS] = "SN access", +#endif + [INT_SWINT_3] = "Software interrupt 3", + [INT_SWINT_2] = "Software interrupt 2", + [INT_SWINT_0] = "Software interrupt 0", + [INT_UNALIGN_DATA] = "Unaligned data", + [INT_DOUBLE_FAULT] = "Double fault", +#ifdef __tilegx__ + [INT_ILL_TRANS] = "Illegal virtual address", +#endif +}; + void __kprobes do_trap(struct pt_regs *regs, int fault_num, unsigned long reason) { @@ -211,10 +230,17 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, * current process and hope for the best. */ if (!user_mode(regs)) { + const char *name; if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ return; - pr_alert("Kernel took bad trap %d at PC %#lx\n", - fault_num, regs->pc); + if (fault_num >= 0 && + fault_num < sizeof(int_name)/sizeof(int_name[0]) && + int_name[fault_num] != NULL) + name = int_name[fault_num]; + else + name = "Unknown interrupt"; + pr_alert("Kernel took bad trap %d (%s) at PC %#lx\n", + fault_num, name, regs->pc); if (fault_num == INT_GPV) pr_alert("GPV_REASON is %#lx\n", reason); show_regs(regs); |