From bae918ac280f01a4fa89b570643def7bb276f597 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 11 Aug 2010 11:11:06 +0900
Subject: tile: remove unused ISA_DMA_THRESHOLD define

No need to define ISA_DMA_THRESHOLD

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[cmetcalf@tilera.com: converted to a single-line #include file]
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/scatterlist.h | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
index c560424..35d786f 100644
--- a/arch/tile/include/asm/scatterlist.h
+++ b/arch/tile/include/asm/scatterlist.h
@@ -1,22 +1 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_SCATTERLIST_H
-#define _ASM_TILE_SCATTERLIST_H
-
-#define ISA_DMA_THRESHOLD	(~0UL)
-
 #include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_TILE_SCATTERLIST_H */
-- 
cgit v1.1


From b77c49ab6d9bfe4d8207e1df72a1978fdd0a96b8 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 11 Aug 2010 10:54:13 -0400
Subject: arch/tile: support new kunmap_atomic() naming convention.

See commit 597781f3e51f48ef8e67be772196d9e9673752c4.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/highmem.h | 2 +-
 arch/tile/mm/highmem.c          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index efdd12e..d155db6 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -60,7 +60,7 @@ void *kmap_fix_kpte(struct page *page, int finished);
 /* This macro is used only in map_new_virtual() to map "page". */
 #define kmap_prot page_to_kpgprot(page)
 
-void kunmap_atomic(void *kvaddr, enum km_type type);
+void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
 void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index ff1cdff..12ab137 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -276,7 +276,7 @@ void *kmap_atomic(struct page *page, enum km_type type)
 }
 EXPORT_SYMBOL(kmap_atomic);
 
-void kunmap_atomic(void *kvaddr, enum km_type type)
+void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
@@ -300,7 +300,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kunmap_atomic_notypecheck);
 
 /*
  * This API is supposed to allow us to map memory without a "struct page".
-- 
cgit v1.1


From fad9e93e5490e1f2c54f5e7c8418952bc55200a1 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 11 Aug 2010 11:07:24 -0400
Subject: Add fanotify syscalls to <asm-generic/unistd.h>.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric Paris <eparis@redhat.com>
---
 include/asm-generic/unistd.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index e189809..b969770 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -642,9 +642,13 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 __SYSCALL(__NR_wait4, sys_wait4)
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_fanotify_init 262
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 263
+__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
 
 #undef __NR_syscalls
-#define __NR_syscalls 262
+#define __NR_syscalls 264
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.1


From a6fb72f1e9e9f1905746646fd1e40932336291da Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Wed, 11 Aug 2010 21:07:34 +0200
Subject: arch: tile: kernel/proc.c Removed duplicated #include

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/proc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 92ef925..2e02c41 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -23,7 +23,6 @@
 #include <linux/sysctl.h>
 #include <linux/hardirq.h>
 #include <linux/mman.h>
-#include <linux/smp.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
-- 
cgit v1.1


From c45c1cd7bf4e997c74407dd29268d41d7adbb257 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Wed, 11 Aug 2010 21:07:35 +0200
Subject: arch: tile: mm: pgtable.c: Removed duplicated #include

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/pgtable.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 28c2314..335c246 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -17,7 +17,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
-#include <linux/smp.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-- 
cgit v1.1


From bc63de7c5bcc44b0098d09931f69a19e93d8a7ba Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:23:07 -0400
Subject: arch/tile: correct a bug in freeing bootmem by VA for the optional
 second initrd.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 4dd21c1..e7d54c7 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -953,7 +953,7 @@ static void __init load_hv_initrd(void)
 	if (rc != stat.size) {
 		pr_err("Error reading %d bytes from hvfs file '%s': %d\n",
 		       stat.size, initramfs_file, rc);
-		free_bootmem((unsigned long) initrd, stat.size);
+		free_initrd_mem((unsigned long) initrd, stat.size);
 		return;
 	}
 	initrd_start = (unsigned long) initrd;
@@ -962,7 +962,7 @@ static void __init load_hv_initrd(void)
 
 void __init free_initrd_mem(unsigned long begin, unsigned long end)
 {
-	free_bootmem(begin, end - begin);
+	free_bootmem(__pa(begin), end - begin);
 }
 
 static void __init validate_hv(void)
-- 
cgit v1.1


From 749dc6f252b57d5cb9c1f4c1c4aafe4c92a28207 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:24:22 -0400
Subject: arch/tile: Use separate, better minsec values for clocksource and
 sched_clock.

We were using the same 5-sec minsec for the clocksource and sched_clock
that we were using for the clock_event_device.  For the clock_event_device
that's exactly right since it has a short maximum countdown time.
But for sched_clock we want to avoid wraparound when converting from
ticks to nsec over a much longer window, so we force a shift of 10.
And for clocksource it seems dodgy to use a 5-sec minsec as well, so we
copy some other platforms and force a shift of 22.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/time.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index b9ab25a..6bed820 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -36,16 +36,6 @@
 /* How many cycles per second we are running at. */
 static cycles_t cycles_per_sec __write_once;
 
-/*
- * We set up shift and multiply values with a minsec of five seconds,
- * since our timer counter counts down 31 bits at a frequency of
- * no less than 500 MHz.  See @minsec for clocks_calc_mult_shift().
- * We could use a different value for the 64-bit free-running
- * cycle counter, but we use the same one for consistency, and since
- * we will be reasonably precise with this value anyway.
- */
-#define TILE_MINSEC 5
-
 cycles_t get_clock_rate(void)
 {
 	return cycles_per_sec;
@@ -68,6 +58,14 @@ cycles_t get_cycles(void)
 }
 #endif
 
+/*
+ * We use a relatively small shift value so that sched_clock()
+ * won't wrap around very often.
+ */
+#define SCHED_CLOCK_SHIFT 10
+
+static unsigned long sched_clock_mult __write_once;
+
 static cycles_t clocksource_get_cycles(struct clocksource *cs)
 {
 	return get_cycles();
@@ -78,6 +76,7 @@ static struct clocksource cycle_counter_cs = {
 	.rating = 300,
 	.read = clocksource_get_cycles,
 	.mask = CLOCKSOURCE_MASK(64),
+	.shift = 22,   /* typical value, e.g. x86 tsc uses this */
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -88,8 +87,10 @@ static struct clocksource cycle_counter_cs = {
 void __init setup_clock(void)
 {
 	cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
-	clocksource_calc_mult_shift(&cycle_counter_cs, cycles_per_sec,
-				    TILE_MINSEC);
+	sched_clock_mult =
+		clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
+	cycle_counter_cs.mult =
+		clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
 }
 
 void __init calibrate_delay(void)
@@ -117,9 +118,14 @@ void __init time_init(void)
  * counter, plus bit 31, which signifies that the counter has wrapped
  * from zero to (2**31) - 1.  The INT_TILE_TIMER interrupt will be
  * raised as long as bit 31 is set.
+ *
+ * The TILE_MINSEC value represents the largest range of real-time
+ * we can possibly cover with the timer, based on MAX_TICK combined
+ * with the slowest reasonable clock rate we might run at.
  */
 
 #define MAX_TICK 0x7fffffff   /* we have 31 bits of countdown timer */
+#define TILE_MINSEC 5         /* timer covers no more than 5 seconds */
 
 static int tile_timer_set_next_event(unsigned long ticks,
 				     struct clock_event_device *evt)
@@ -211,8 +217,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num)
 unsigned long long sched_clock(void)
 {
 	return clocksource_cyc2ns(get_cycles(),
-				  cycle_counter_cs.mult,
-				  cycle_counter_cs.shift);
+				  sched_clock_mult, SCHED_CLOCK_SHIFT);
 }
 
 int setup_profiling_timer(unsigned int multiplier)
-- 
cgit v1.1


From 32020effaf713c0c669864301bcd5dac6b9bb9e0 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:32:21 -0400
Subject: arch/tile: Fix a couple of issues with the COMPAT code for TILE-Gx.

First, the siginfo preamble wasn't quite right; we need to indicate
that we are padding up to 4 ints of preamble for 64-bit code, and
then for compat mode we need to pad differently, using only 3 ints.

Second, the C ABI requires a save area of two registers, not two
pointers, since in compat mode we have 64-bit registers all of which
we need to save, even though we only have 32-bit VAs.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/abi.h     | 4 +---
 arch/tile/include/asm/siginfo.h  | 4 ++++
 arch/tile/kernel/compat_signal.c | 4 +++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h
index da8df5b..8affc76 100644
--- a/arch/tile/include/arch/abi.h
+++ b/arch/tile/include/arch/abi.h
@@ -59,9 +59,7 @@
  * The ABI requires callers to allocate a caller state save area of
  * this many bytes at the bottom of each stack frame.
  */
-#ifdef __tile__
-#define C_ABI_SAVE_AREA_SIZE (2 * __SIZEOF_POINTER__)
-#endif
+#define C_ABI_SAVE_AREA_SIZE (2 * (CHIP_WORD_SIZE() / 8))
 
 /**
  * The operand to an 'info' opcode directing the backtracer to not
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h
index 0c12d1b..56d661b 100644
--- a/arch/tile/include/asm/siginfo.h
+++ b/arch/tile/include/asm/siginfo.h
@@ -17,6 +17,10 @@
 
 #define __ARCH_SI_TRAPNO
 
+#ifdef __LP64__
+# define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+#endif
+
 #include <asm-generic/siginfo.h>
 
 /*
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index d5efb21..9c710db 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -56,13 +56,15 @@ struct compat_ucontext {
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
+#define COMPAT_SI_PAD_SIZE	((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
+
 struct compat_siginfo {
 	int si_signo;
 	int si_errno;
 	int si_code;
 
 	union {
-		int _pad[SI_PAD_SIZE];
+		int _pad[COMPAT_SI_PAD_SIZE];
 
 		/* kill() */
 		struct {
-- 
cgit v1.1


From 1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:40:57 -0400
Subject: arch/tile: support backtracing on TILE-Gx

This functionality was stubbed out until recently.  Now we support our
normal backtracing API on TILE-Gx as well as on TILE64/TILEPro.
This change includes a tweak to the instruction encoding caused by
adding addxli for compat mode.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/backtrace.h |   4 +-
 arch/tile/kernel/backtrace.c      | 137 +++++++++++++++++++++++++++-----------
 arch/tile/kernel/stack.c          |   8 ---
 arch/tile/kernel/traps.c          |   4 +-
 4 files changed, 105 insertions(+), 48 deletions(-)

diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
index 6970bfc..758ca46 100644
--- a/arch/tile/include/asm/backtrace.h
+++ b/arch/tile/include/asm/backtrace.h
@@ -21,7 +21,9 @@
 
 #include <arch/chip.h>
 
-#if CHIP_VA_WIDTH() > 32
+#if defined(__tile__)
+typedef unsigned long VirtualAddress;
+#elif CHIP_VA_WIDTH() > 32
 typedef unsigned long long VirtualAddress;
 #else
 typedef unsigned int VirtualAddress;
diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c
index 77265f3..d3c41c1 100644
--- a/arch/tile/kernel/backtrace.c
+++ b/arch/tile/kernel/backtrace.c
@@ -19,9 +19,6 @@
 
 #include <arch/chip.h>
 
-#if TILE_CHIP < 10
-
-
 #include <asm/opcode-tile.h>
 
 
@@ -29,6 +26,27 @@
 #define TREG_LR 55
 
 
+#if TILE_CHIP >= 10
+#define tile_bundle_bits tilegx_bundle_bits
+#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
+#define tile_decoded_instruction tilegx_decoded_instruction
+#define tile_mnemonic tilegx_mnemonic
+#define parse_insn_tile parse_insn_tilegx
+#define TILE_OPC_IRET TILEGX_OPC_IRET
+#define TILE_OPC_ADDI TILEGX_OPC_ADDI
+#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI
+#define TILE_OPC_INFO TILEGX_OPC_INFO
+#define TILE_OPC_INFOL TILEGX_OPC_INFOL
+#define TILE_OPC_JRP TILEGX_OPC_JRP
+#define TILE_OPC_MOVE TILEGX_OPC_MOVE
+#define OPCODE_STORE TILEGX_OPC_ST
+typedef long long bt_int_reg_t;
+#else
+#define OPCODE_STORE TILE_OPC_SW
+typedef int bt_int_reg_t;
+#endif
+
 /** A decoded bundle used for backtracer analysis. */
 struct BacktraceBundle {
 	tile_bundle_bits bits;
@@ -41,7 +59,7 @@ struct BacktraceBundle {
 /* This implementation only makes sense for native tools. */
 /** Default function to read memory. */
 static bool bt_read_memory(void *result, VirtualAddress addr,
-			   size_t size, void *extra)
+			   unsigned int size, void *extra)
 {
 	/* FIXME: this should do some horrible signal stuff to catch
 	 * SEGV cleanly and fail.
@@ -106,6 +124,12 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust)
 		find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2);
 	if (insn == NULL)
 		insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2);
+#if TILE_CHIP >= 10
+	if (insn == NULL)
+		insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2);
+	if (insn == NULL)
+		insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2);
+#endif
 	if (insn == NULL)
 		return false;
 
@@ -190,13 +214,52 @@ static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle)
 	return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL;
 }
 
-/** Does this bundle contain the instruction 'sw sp, lr'? */
+/** Does this bundle contain a store of lr to sp? */
 static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle)
 {
 	static const int vals[2] = { TREG_SP, TREG_LR };
-	return find_matching_insn(bundle, TILE_OPC_SW, vals, 2) != NULL;
+	return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL;
+}
+
+#if TILE_CHIP >= 10
+/** Track moveli values placed into registers. */
+static inline void bt_update_moveli(const struct BacktraceBundle *bundle,
+				    int moveli_args[])
+{
+	int i;
+	for (i = 0; i < bundle->num_insns; i++) {
+		const struct tile_decoded_instruction *insn =
+			&bundle->insns[i];
+
+		if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) {
+			int reg = insn->operand_values[0];
+			moveli_args[reg] = insn->operand_values[1];
+		}
+	}
 }
 
+/** Does this bundle contain an 'add sp, sp, reg' instruction
+ * from a register that we saw a moveli into, and if so, what
+ * is the value in the register?
+ */
+static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust,
+			  int moveli_args[])
+{
+	static const int vals[2] = { TREG_SP, TREG_SP };
+
+	const struct tile_decoded_instruction *insn =
+		find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2);
+	if (insn) {
+		int reg = insn->operand_values[2];
+		if (moveli_args[reg]) {
+			*adjust = moveli_args[reg];
+			return true;
+		}
+	}
+	return false;
+}
+#endif
+
 /** Locates the caller's PC and SP for a program starting at the
  * given address.
  */
@@ -227,6 +290,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location,
 	int next_bundle = 0;
 	VirtualAddress pc;
 
+#if TILE_CHIP >= 10
+	/* Naively try to track moveli values to support addx for -m32. */
+	int moveli_args[TILEGX_NUM_REGISTERS] = { 0 };
+#endif
+
 	/* Default to assuming that the caller's sp is the current sp.
 	 * This is necessary to handle the case where we start backtracing
 	 * right at the end of the epilog.
@@ -380,7 +448,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location,
 
 		if (!sp_determined) {
 			int adjust;
-			if (bt_has_addi_sp(&bundle, &adjust)) {
+			if (bt_has_addi_sp(&bundle, &adjust)
+#if TILE_CHIP >= 10
+			    || bt_has_add_sp(&bundle, &adjust, moveli_args)
+#endif
+				) {
 				location->sp_location = SP_LOC_OFFSET;
 
 				if (adjust <= 0) {
@@ -427,6 +499,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location,
 					sp_determined = true;
 				}
 			}
+
+#if TILE_CHIP >= 10
+			/* Track moveli arguments for -m32 mode. */
+			bt_update_moveli(&bundle, moveli_args);
+#endif
 		}
 
 		if (bt_has_iret(&bundle)) {
@@ -502,11 +579,10 @@ void backtrace_init(BacktraceIterator *state,
 		break;
 	}
 
-	/* The frame pointer should theoretically be aligned mod 8. If
-	 * it's not even aligned mod 4 then something terrible happened
-	 * and we should mark it as invalid.
+	/* If the frame pointer is not aligned to the basic word size
+	 * something terrible happened and we should mark it as invalid.
 	 */
-	if (fp % 4 != 0)
+	if (fp % sizeof(bt_int_reg_t) != 0)
 		fp = -1;
 
 	/* -1 means "don't know initial_frame_caller_pc". */
@@ -547,9 +623,16 @@ void backtrace_init(BacktraceIterator *state,
 	state->read_memory_func_extra = read_memory_func_extra;
 }
 
+/* Handle the case where the register holds more bits than the VA. */
+static bool valid_addr_reg(bt_int_reg_t reg)
+{
+	return ((VirtualAddress)reg == reg);
+}
+
 bool backtrace_next(BacktraceIterator *state)
 {
-	VirtualAddress next_fp, next_pc, next_frame[2];
+	VirtualAddress next_fp, next_pc;
+	bt_int_reg_t next_frame[2];
 
 	if (state->fp == -1) {
 		/* No parent frame. */
@@ -563,11 +646,9 @@ bool backtrace_next(BacktraceIterator *state)
 	}
 
 	next_fp = next_frame[1];
-	if (next_fp % 4 != 0) {
-		/* Caller's frame pointer is suspect, so give up.
-		 * Technically it should be aligned mod 8, but we will
-		 * be forgiving here.
-		 */
+	if (!valid_addr_reg(next_frame[1]) ||
+	    next_fp % sizeof(bt_int_reg_t) != 0) {
+		/* Caller's frame pointer is suspect, so give up. */
 		return false;
 	}
 
@@ -585,7 +666,7 @@ bool backtrace_next(BacktraceIterator *state)
 	} else {
 		/* Get the caller PC from the frame linkage area. */
 		next_pc = next_frame[0];
-		if (next_pc == 0 ||
+		if (!valid_addr_reg(next_frame[0]) || next_pc == 0 ||
 		    next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) {
 			/* The PC is suspect, so give up. */
 			return false;
@@ -599,23 +680,3 @@ bool backtrace_next(BacktraceIterator *state)
 
 	return true;
 }
-
-#else /* TILE_CHIP < 10 */
-
-void backtrace_init(BacktraceIterator *state,
-		    BacktraceMemoryReader read_memory_func,
-		    void *read_memory_func_extra,
-		    VirtualAddress pc, VirtualAddress lr,
-		    VirtualAddress sp, VirtualAddress r52)
-{
-	state->pc = pc;
-	state->sp = sp;
-	state->fp = -1;
-	state->initial_frame_caller_pc = -1;
-	state->read_memory_func = read_memory_func;
-	state->read_memory_func_extra = read_memory_func_extra;
-}
-
-bool backtrace_next(BacktraceIterator *state) { return false; }
-
-#endif /* TILE_CHIP < 10 */
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index b6268d3..38a68b0b4 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -108,7 +108,6 @@ static bool read_memory_func(void *result, VirtualAddress address,
 /* Return a pt_regs pointer for a valid fault handler frame */
 static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 {
-#ifndef __tilegx__
 	const char *fault = NULL;  /* happy compiler */
 	char fault_buf[64];
 	VirtualAddress sp = kbt->it.sp;
@@ -146,7 +145,6 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 	}
 	if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0)
 		return p;
-#endif
 	return NULL;
 }
 
@@ -351,12 +349,6 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 		       kbt->task->pid, kbt->task->tgid, kbt->task->comm,
 		       smp_processor_id(), get_cycles());
 	}
-#ifdef __tilegx__
-	if (kbt->is_current) {
-		__insn_mtspr(SPR_SIM_CONTROL,
-			     SIM_DUMP_SPR_ARG(SIM_DUMP_BACKTRACE));
-	}
-#endif
 	kbt->verbose = 1;
 	i = 0;
 	for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 3870abbe..0f362dc 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -128,7 +128,9 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep)
 #ifdef __tilegx__
 	if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0)
 		return 0;
-	if (get_Opcode_X1(bundle) != UNARY_OPCODE_X1)
+	if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1)
+		return 0;
+	if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1)
 		return 0;
 	if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1)
 		return 0;
-- 
cgit v1.1


From c745a8a11fa1df6078bfc61fc29492ed43f71c2b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:52:19 -0400
Subject: arch/tile: Various cleanups.

This change rolls up random cleanups not representing any actual bugs.

- Remove a stale CONFIG_ value from the default tile_defconfig
- Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h>
- Optimize get_order() using Tile's "clz" instruction
- Fix a bad hypervisor upcall name (not currently used in Linux anyway)
- Use __copy_in_user_inatomic() name for consistency, and export it
- Export some additional hypervisor driver I/O upcalls and some homecache calls
- Remove the obfuscating MEMCPY_TEST_WH64 support code
- Other stray comment cleanups, #if 0 removal, etc.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/configs/tile_defconfig  |  1 -
 arch/tile/include/asm/atomic_32.h | 37 -------------------------------------
 arch/tile/include/asm/page.h      |  6 +++++-
 arch/tile/include/asm/uaccess.h   |  4 ++--
 arch/tile/include/hv/hypervisor.h |  8 ++++----
 arch/tile/lib/Makefile            |  4 +++-
 arch/tile/lib/exports.c           | 16 +++++++++++-----
 arch/tile/lib/memcpy_32.S         | 20 ++++----------------
 arch/tile/lib/memset_32.c         | 25 -------------------------
 arch/tile/mm/fault.c              |  8 ++++++++
 arch/tile/mm/homecache.c          |  3 +++
 11 files changed, 40 insertions(+), 92 deletions(-)

diff --git a/arch/tile/configs/tile_defconfig b/arch/tile/configs/tile_defconfig
index f34c70b..5ad1a71 100644
--- a/arch/tile/configs/tile_defconfig
+++ b/arch/tile/configs/tile_defconfig
@@ -231,7 +231,6 @@ CONFIG_HARDWALL=y
 CONFIG_MEMPROF=y
 CONFIG_XGBE=y
 CONFIG_NET_TILE=y
-CONFIG_PSEUDO_NAPI=y
 CONFIG_TILEPCI_ENDP=y
 CONFIG_TILEPCI_HOST_SUBSET=m
 CONFIG_TILE_IDE_GPIO=y
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 40a5a3a..ed359ae 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
 #define smp_mb__after_atomic_dec()	do { } while (0)
 #define smp_mb__after_atomic_inc()	do { } while (0)
 
-
-/*
- * Support "tns" atomic integers.  These are atomic integers that can
- * hold any value but "1".  They are more efficient than regular atomic
- * operations because the "lock" (aka acquire) step is a single "tns"
- * in the uncontended case, and the "unlock" (aka release) step is a
- * single "store" without an mf.  (However, note that on tilepro the
- * "tns" will evict the local cache line, so it's not all upside.)
- *
- * Note that you can ONLY observe the value stored in the pointer
- * using these operations; a direct read of the value may confusingly
- * return the special value "1".
- */
-
-int __tns_atomic_acquire(atomic_t *);
-void __tns_atomic_release(atomic_t *p, int v);
-
-static inline void tns_atomic_set(atomic_t *v, int i)
-{
-	__tns_atomic_acquire(v);
-	__tns_atomic_release(v, i);
-}
-
-static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n)
-{
-	int ret = __tns_atomic_acquire(v);
-	__tns_atomic_release(v, (ret == o) ? n : ret);
-	return ret;
-}
-
-static inline int tns_atomic_xchg(atomic_t *v, int n)
-{
-	int ret = __tns_atomic_acquire(v);
-	__tns_atomic_release(v, n);
-	return ret;
-}
-
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index f894a90..7d90641 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd)
 
 #endif
 
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
@@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
 	(VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/getorder.h>
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ed17a80..ef34d2caa 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to,
  * Returns number of bytes that could not be copied.
  * On success, this will be zero.
  */
-extern unsigned long __copy_in_user_asm(
+extern unsigned long __copy_in_user_inatomic(
 	void __user *to, const void __user *from, unsigned long n);
 
 static inline unsigned long __must_check
 __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
-	return __copy_in_user_asm(to, from, n);
+	return __copy_in_user_inatomic(to, from, n);
 }
 
 static inline unsigned long __must_check
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 59b46dc..9bd303a 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask);
  */
 void hv_clear_intr(HV_IntrMask clear_mask);
 
-/** Assert a set of device interrupts.
+/** Raise a set of device interrupts.
  *
- * @param assert_mask Bitmap of interrupts to clear.
+ * @param raise_mask Bitmap of interrupts to raise.
  */
-void hv_assert_intr(HV_IntrMask assert_mask);
+void hv_raise_intr(HV_IntrMask raise_mask);
 
 /** Trigger a one-shot interrupt on some tile
  *
@@ -1712,7 +1712,7 @@ typedef struct
  * @param cache_control This argument allows you to specify a length of
  *        physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
  *        You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
- *        You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache.
+ *        You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
  *        HV_FLUSH_ALL flushes all caches.
  * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
  *        tile indices to perform cache flush on.  The low bit of the first
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 438af38..746dc81 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \
 	memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
 	strchr_$(BITS).o strlen_$(BITS).o
 
-ifneq ($(CONFIG_TILEGX),y)
+ifeq ($(CONFIG_TILEGX),y)
+lib-y += memcpy_user_64.o
+else
 lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
 endif
 
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 6bc7b52..ce5dbf5 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm);
 EXPORT_SYMBOL(current_text_addr);
 EXPORT_SYMBOL(dump_stack);
 
-/* arch/tile/lib/__memcpy.S */
-/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
+/* arch/tile/lib/, various memcpy files */
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(__copy_to_user_inatomic);
 EXPORT_SYMBOL(__copy_from_user_inatomic);
 EXPORT_SYMBOL(__copy_from_user_zeroing);
+#ifdef __tilegx__
+EXPORT_SYMBOL(__copy_in_user_inatomic);
+#endif
 
 /* hypervisor glue */
 #include <hv/hypervisor.h>
 EXPORT_SYMBOL(hv_dev_open);
 EXPORT_SYMBOL(hv_dev_pread);
 EXPORT_SYMBOL(hv_dev_pwrite);
+EXPORT_SYMBOL(hv_dev_preada);
+EXPORT_SYMBOL(hv_dev_pwritea);
+EXPORT_SYMBOL(hv_dev_poll);
+EXPORT_SYMBOL(hv_dev_poll_cancel);
 EXPORT_SYMBOL(hv_dev_close);
+EXPORT_SYMBOL(hv_sysconf);
+EXPORT_SYMBOL(hv_confstr);
 
-/* -ltile-cc */
+/* libgcc.a */
 uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
 EXPORT_SYMBOL(__udivsi3);
 int32_t __divsi3(int32_t dividend, int32_t divisor);
@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3);
 #ifndef __tilegx__
 uint64_t __ll_mul(uint64_t n0, uint64_t n1);
 EXPORT_SYMBOL(__ll_mul);
-#endif
-#ifndef __tilegx__
 int64_t __muldi3(int64_t, int64_t);
 EXPORT_SYMBOL(__muldi3);
 uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index f92984b..30c3b7e 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -17,10 +17,6 @@
 
 #include <arch/chip.h>
 
-#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
-#define MEMCPY_USE_WH64
-#endif
-
 
 #include <linux/linkage.h>
 
@@ -160,7 +156,7 @@ EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
 
 	{ addi r3, r1, 60; andi r9, r9, -64 }
 
-#ifdef MEMCPY_USE_WH64
+#if CHIP_HAS_WH64()
         /* No need to prefetch dst, we'll just do the wh64
          * right before we copy a line.
 	 */
@@ -173,7 +169,7 @@ EX:	{ lw r6, r3; addi r3, r3, 64 }
         /* Intentionally stall for a few cycles to leave L2 cache alone. */
         { bnzt zero, . }
 EX:	{ lw r7, r3; addi r3, r3, 64 }
-#ifndef MEMCPY_USE_WH64
+#if !CHIP_HAS_WH64()
         /* Prefetch the dest */
         /* Intentionally stall for a few cycles to leave L2 cache alone. */
         { bnzt zero, . }
@@ -288,15 +284,7 @@ EX:	{ lw r7, r3; addi r3, r3, 64 }
         /* Fill second L1D line. */
 EX:	{ lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
 
-#ifdef MEMCPY_TEST_WH64
-        /* Issue a fake wh64 that clobbers the destination words
-         * with random garbage, for testing.
-         */
-	{ movei r19, 64; crc32_32 r10, r2, r9 }
-.Lwh64_test_loop:
-EX:	{ sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
-        { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
-#elif CHIP_HAS_WH64()
+#if CHIP_HAS_WH64()
         /* Prepare destination line for writing. */
 EX:	{ wh64 r9; addi r9, r9, 64 }
 #else
@@ -340,7 +328,7 @@ EX:	{ lw r18, r1; addi r1, r1, 4 }                  /* r18 = WORD_8 */
 EX:	{ sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
 EX:	{ sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
 EX:	{ sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
-#ifdef MEMCPY_USE_WH64
+#if CHIP_HAS_WH64()
 EX:	{ sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
 #else
         /* Back up the r9 to a cache line we are already storing to
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index bfde5d8..d014c1f 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n)
 			 */
 			__insn_prefetch(&out32[ahead32]);
 
-#if 1
 #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
 #error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
 #endif
@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n)
 				*out32++ = v32;
 				*out32++ = v32;
 			}
-#else
-			/* Unfortunately, due to a code generator flaw this
-			 * allocates a separate register for each of these
-			 * stores, which requires a large number of spills,
-			 * which makes this procedure enormously bigger
-			 * (something like 70%)
-			 */
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			*out32++ = v32;
-			n32 -= 16;
-#endif
 
 			/* To save compiled code size, reuse this loop even
 			 * when we run out of prefetching to do by dropping
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 0011f06..704f3e8 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -567,6 +567,14 @@ do_sigbus:
  * since that might indicate we have not yet squirreled the SPR
  * contents away and can thus safely take a recursive interrupt.
  * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
+ *
+ * Note that this routine is called before homecache_tlb_defer_enter(),
+ * which means that we can properly unlock any atomics that might
+ * be used there (good), but also means we must be very sensitive
+ * to not touch any data structures that might be located in memory
+ * that could migrate, as we could be entering the kernel on a dataplane
+ * cpu that has been deferring kernel TLB updates.  This means, for
+ * example, that we can't migrate init_mm or its pgd.
  */
 struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
 				      unsigned long address,
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 97c478e..fb3b4a5 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -29,6 +29,7 @@
 #include <linux/timex.h>
 #include <linux/cache.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -348,6 +349,7 @@ pte_t pte_set_home(pte_t pte, int home)
 
 	return pte;
 }
+EXPORT_SYMBOL(pte_set_home);
 
 /*
  * The routines in this section are the "static" versions of the normal
@@ -403,6 +405,7 @@ struct page *homecache_alloc_pages(gfp_t gfp_mask,
 		homecache_change_page_home(page, order, home);
 	return page;
 }
+EXPORT_SYMBOL(homecache_alloc_pages);
 
 struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
 					unsigned int order, int home)
-- 
cgit v1.1


From ba00376b0b13f234d839541a7b36a5bf5c2a4036 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 16:37:00 -0400
Subject: arch/tile: extend syscall ABI to set r1 on return as well.

Until now, the tile architecture ABI for syscall return has just been
that r0 holds the return value, and an error is only signalled like it is
for kernel code, with a negative small number.

However, this means that in multiple places in userspace we end up writing
the same three-cycle idiom that tests for a small negative number for
error.  It seems cleaner to instead move that code into the kernel, and
set r1 to hold zero on success or errno on failure; previously, r1 was
just zeroed on return from the kernel (to avoid leaking kernel state).
This way a single conditional branch after the syscall is sufficient
to test for the failure case.  The number of cycles taken is the same,
but the error-checking code is in just one place, so total code size is
smaller, and random userspace syscall code is easier to understand.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/intvec_32.S | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 3404c75..84f296c 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -952,7 +952,7 @@ STD_ENTRY(interrupt_return)
 	 * able to safely read all the remaining words on those cache
 	 * lines without waiting for the memory subsystem.
 	 */
-	pop_reg_zero r0, r1, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0)
+	pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0)
 	pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30)
 	pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
 	pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1
@@ -1017,7 +1017,17 @@ STD_ENTRY(interrupt_return)
 	{ move r22, zero; move r23, zero }
 	{ move r24, zero; move r25, zero }
 	{ move r26, zero; move r27, zero }
-	{ move r28, zero; move r29, zero }
+
+	/* Set r1 to errno if we are returning an error, otherwise zero. */
+	{
+	 moveli r29, 1024
+	 sub    r1, zero, r0
+	}
+	slt_u   r29, r1, r29
+	{
+	 mnz    r1, r29, r1
+	 move   r29, zero
+	}
 	iret
 
 	/*
-- 
cgit v1.1


From 947e7dc1aed0532478e10988328bfd7426e0c2bd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 20:32:41 -0400
Subject: arch/tile: Rename the hweight() implementations to __arch_hweight()

See commit 1527bc8b928dd1399c3d3467dd47d9ede210978a.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/bitops.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 84600f3..6832b4b 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -98,26 +98,27 @@ static inline int fls64(__u64 w)
 	return (sizeof(__u64) * 8) - __builtin_clzll(w);
 }
 
-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	return __builtin_popcount(w);
 }
 
-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
 {
 	return __builtin_popcount(w & 0xffff);
 }
 
-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
 {
 	return __builtin_popcount(w & 0xff);
 }
 
-static inline unsigned long hweight64(__u64 w)
+static inline unsigned long __arch_hweight64(__u64 w)
 {
 	return __builtin_popcountll(w);
 }
 
+#include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
-- 
cgit v1.1


From b3ae98ab8217a8621859e1d9cbf3ee6c4c19533b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 20:43:39 -0400
Subject: arch/tile: rename ARCH_KMALLOC_MINALIGN to ARCH_DMA_MINALIGN

See commit a6eb9fe105d5de0053b261148cee56c94b4720ca.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/tile/include/asm/cache.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index f610184..08a2815 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,10 @@
 #define L2_CACHE_ALIGN(x)	(((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
 
 /*
- * TILE-Gx is fully coherents so we don't need to define
- * ARCH_KMALLOC_MINALIGN.
+ * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
  */
 #ifndef __tilegx__
-#define ARCH_KMALLOC_MINALIGN	L2_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	L2_CACHE_BYTES
 #endif
 
 /* use the cache line size for the L2, which is where it counts */
-- 
cgit v1.1


From 3b3c1b9d04db2ac925818c3cff677f5353c0b559 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Sun, 15 Aug 2010 12:14:41 -0400
Subject: arch/tile: export only COMMAND_LINE_SIZE to userspace.

This fixes a failure in "make headers_check" for tile.
I hadn't realized this file was exported to userspace by default.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/setup.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index 823ddd4..7caf0f3 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -15,6 +15,10 @@
 #ifndef _ASM_TILE_SETUP_H
 #define _ASM_TILE_SETUP_H
 
+#define COMMAND_LINE_SIZE	2048
+
+#ifdef __KERNEL__
+
 #include <linux/pfn.h>
 #include <linux/init.h>
 
@@ -23,10 +27,10 @@
  */
 #define MAXMEM_PFN	PFN_DOWN(MAXMEM)
 
-#define COMMAND_LINE_SIZE	2048
-
 void early_panic(const char *fmt, ...);
 void warn_early_printk(void);
 void __init disable_early_printk(void);
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_TILE_SETUP_H */
-- 
cgit v1.1


From a5854dd7f30c3849edf9b9711362e2dd51d3f855 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Sun, 15 Aug 2010 15:01:45 -0400
Subject: arch/tile: don't validate CROSS_COMPILE needlessly

With this change, the arch/tile Makefile will only check for a valid
combination of CROSS_COMPILE vs "uname -m" for a few common targets
that are typically the ones we get wrong (vmlinux, all, and modules).
The change handles the case of an empty "make" goal like "make all".

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/tile/Makefile | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 07c4318..fd8f6bb 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -8,20 +8,22 @@
 # for "archclean" and "archdep" for cleaning up and making dependencies for
 # this architecture
 
-ifeq ($(CROSS_COMPILE),)
 # If building with TILERA_ROOT set (i.e. using the Tilera Multicore
 # Development Environment) we can set CROSS_COMPILE based on that.
-ifdef TILERA_ROOT
-CROSS_COMPILE	= $(TILERA_ROOT)/bin/tile-
-endif
-endif
-
 # If we're not cross-compiling, make sure we're on the right architecture.
+# Only bother to test for a few common targets, to avoid useless errors.
 ifeq ($(CROSS_COMPILE),)
-HOST_ARCH = $(shell uname -m)
-ifneq ($(HOST_ARCH),$(ARCH))
+  ifdef TILERA_ROOT
+    CROSS_COMPILE := $(TILERA_ROOT)/bin/tile-
+  else
+    goals := $(if $(MAKECMDGOALS), $(MAKECMDGOALS), all)
+    ifneq ($(strip $(filter vmlinux modules all,$(goals))),)
+      HOST_ARCH := $(shell uname -m)
+      ifneq ($(HOST_ARCH),$(ARCH))
 $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH))
-endif
+      endif
+    endif
+  endif
 endif
 
 
-- 
cgit v1.1