200 files changed, 26934 insertions, 8 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 0f3e8bb..45b3df9 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -299,6 +299,8 @@ memory-hotplug.txt
 	- Hotpluggable memory support, how to use and current status.
 memory.txt
 	- info on typical Linux memory problems.
+metag/
+	- directory with info about Linux on Meta architecture.
 mips/
 	- directory with info about Linux on MIPS architecture.
 misc-devices/
diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644
index 0000000..8c47dcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta-intc.txt
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+    - compatible: Specifies the compatibility list for the interrupt controller.
+      The type shall be <string> and the value shall include "img,meta-intc".
+
+    - num-banks: Specifies the number of interrupt banks (each of which can
+      handle 32 interrupt sources).
+
+    - interrupt-controller: The presence of this property identifies the node
+      as an interupt controller. No property value shall be defined.
+
+    - #interrupt-cells: Specifies the number of cells needed to encode an
+      interrupt source. The type shall be a <u32> and the value shall be 2.
+
+    - #address-cells: Specifies the number of cells needed to encode an
+      address. The type shall be <u32> and the value shall be 0. As such,
+      'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+    - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+  Interrupt specifiers consists of 2 cells encoded as follows:
+
+    - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+    - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+                  encoded as follows:
+                    1 = edge triggered
+                    4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+	/*
+	 * Meta external trigger block
+	 */
+	intc: intc {
+		// This is an interrupt controller node.
+		interrupt-controller;
+
+		// No address cells so that 'interrupt-map' nodes which
+		// reference this interrupt controller node do not need a parent
+		// address specifier.
+		#address-cells = <0>;
+
+		// Two cells to encode interrupt sources.
+		#interrupt-cells = <2>;
+
+		// Number of interrupt banks
+		num-banks = <2>;
+
+		// No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+		no-mask;
+
+		// Compatible with Meta hardware trigger block.
+		compatible = "img,meta-intc";
+	};
+
+Example 2:
+
+	/*
+	 * An interrupt generating device that is wired to a Meta external
+	 * trigger block.
+	 */
+	uart1: uart@0x02004c00 {
+		// Interrupt source '5' that is level-sensitive.
+		// Note that there are only two cells as specified in the
+		// interrupt parent's '#interrupt-cells' property.
+		interrupts = <5 4 /* level */>;
+
+		// The interrupt controller that this device is wired to.
+		interrupt-parent = <&intc>;
+	};
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3a54fca..4609e81 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -978,6 +978,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			       If specified, z/VM IUCV HVC accepts connections
 			       from listed z/VM user IDs only.
 
+	hwthread_map=	[METAG] Comma-separated list of Linux cpu id to
+			        hardware thread id mappings.
+				Format: <cpu>:<hwthread>
+
 	keep_bootcon	[KNL]
 			Do not unregister boot console at start. This is only
 			useful for debugging when something happens in the window
diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX
new file mode 100644
index 0000000..db11c51
--- /dev/null
+++ b/Documentation/metag/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+	- this file
+kernel-ABI.txt
+	- Documents metag ABI details
diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt
new file mode 100644
index 0000000..7b8dee8
--- /dev/null
+++ b/Documentation/metag/kernel-ABI.txt
@@ -0,0 +1,256 @@
+			==========================
+			KERNEL ABIS FOR METAG ARCH
+			==========================
+
+This document describes the Linux ABIs for the metag architecture, and has the
+following sections:
+
+ (*) Outline of registers
+ (*) Userland registers
+ (*) Kernel registers
+ (*) System call ABI
+ (*) Calling conventions
+
+
+====================
+OUTLINE OF REGISTERS
+====================
+
+The main Meta core registers are arranged in units:
+
+	UNIT	Type	DESCRIPTION	GP	EXT	PRIV	GLOBAL
+	=======	=======	===============	=======	=======	=======	=======
+	CT	Special	Control unit
+	D0	General	Data unit 0	0-7	8-15	16-31	16-31
+	D1	General	Data unit 1	0-7	8-15	16-31	16-31
+	A0	General	Address unit 0	0-3	4-7	 8-15	 8-15
+	A1	General	Address unit 1	0-3	4-7	 8-15	 8-15
+	PC	Special	PC unit		0		 1
+	PORT	Special	Ports
+	TR	Special	Trigger unit			 0-7
+	TT	Special	Trace unit			 0-5
+	FX	General	FP unit			0-15
+
+GP registers form part of the main context.
+
+Extended context registers (EXT) may not be present on all hardware threads and
+can be context switched if support is enabled and the appropriate bits are set
+in e.g. the D0.8 register to indicate what extended state to preserve.
+
+Global registers are shared between threads and are privilege protected.
+
+See arch/metag/include/asm/metag_regs.h for definitions relating to core
+registers and the fields and bits they contain. See the TRMs for further details
+about special registers.
+
+Several special registers are preserved in the main context, these are the
+interesting ones:
+
+	REG	(ALIAS)		PURPOSE
+	=======================	===============================================
+	CT.1	(TXMODE)	Processor mode bits (particularly for DSP)
+	CT.2	(TXSTATUS)	Condition flags and LSM_STEP (MGET/MSET step)
+	CT.3	(TXRPT)		Branch repeat counter
+	PC.0	(PC)		Program counter
+
+Some of the general registers have special purposes in the ABI and therefore
+have aliases:
+
+	D0 REG	(ALIAS)	PURPOSE		D1 REG	(ALIAS)	PURPOSE
+	===============	===============	===============	=======================
+	D0.0	(D0Re0)	32bit result	D1.0	(D1Re0)	Top half of 64bit result
+	D0.1	(D0Ar6)	Argument 6	D1.1	(D1Ar5)	Argument 5
+	D0.2	(D0Ar4)	Argument 4	D1.2	(D1Ar3)	Argument 3
+	D0.3	(D0Ar2)	Argument 2	D1.3	(D1Ar1)	Argument 1
+	D0.4	(D0FrT)	Frame temp	D1.4	(D1RtP)	Return pointer
+	D0.5		Call preserved	D1.5		Call preserved
+	D0.6		Call preserved	D1.6		Call preserved
+	D0.7		Call preserved	D1.7		Call preserved
+
+	A0 REG	(ALIAS)	PURPOSE		A1 REG	(ALIAS)	PURPOSE
+	===============	===============	===============	=======================
+	A0.0	(A0StP)	Stack pointer	A1.0	(A1GbP)	Global base pointer
+	A0.1	(A0FrP)	Frame pointer	A1.1	(A1LbP)	Local base pointer
+	A0.2				A1.2
+	A0.3				A1.3
+
+
+==================
+USERLAND REGISTERS
+==================
+
+All the general purpose D0, D1, A0, A1 registers are preserved when entering the
+kernel (including asynchronous events such as interrupts and timer ticks) except
+the following which have special purposes in the ABI:
+
+	REGISTERS	WHEN	STATUS		PURPOSE
+	===============	=======	===============	===============================
+	D0.8		DSP	Preserved	ECH, determines what extended
+						DSP state to preserve.
+	A0.0	(A0StP)	ALWAYS	Preserved	Stack >= A0StP may be clobbered
+						at any time by the creation of a
+						signal frame.
+	A1.0	(A1GbP)	SMP	Clobbered	Used as temporary for loading
+						kernel stack pointer and saving
+						core context.
+	A0.15		!SMP	Protected	Stores kernel stack pointer.
+	A1.15		ALWAYS	Protected	Stores kernel base pointer.
+
+On UP A0.15 is used to store the kernel stack pointer for storing the userland
+context. A0.15 is global between hardware threads though which means it cannot
+be used on SMP for this purpose. Since no protected local registers are
+available A1GbP is reserved for use as a temporary to allow a percpu stack
+pointer to be loaded for storing the rest of the context.
+
+
+================
+KERNEL REGISTERS
+================
+
+When in the kernel the following registers have special purposes in the ABI:
+
+	REGISTERS	WHEN	STATUS		PURPOSE
+	===============	=======	===============	===============================
+	A0.0	(A0StP)	ALWAYS	Preserved	Stack >= A0StP may be clobbered
+						at any time by the creation of
+						an irq signal frame.
+	A1.0	(A1GbP)	ALWAYS	Preserved	Reserved (kernel base pointer).
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+When a system call is made, the following registers are effective:
+
+	REGISTERS	CALL			RETURN
+	===============	=======================	===============================
+	D0.0	(D0Re0)				Return value (or -errno)
+	D1.0	(D1Re0)	System call number	Clobbered
+	D0.1	(D0Ar6)	Syscall arg #6		Preserved
+	D1.1	(D1Ar5)	Syscall arg #5		Preserved
+	D0.2	(D0Ar4)	Syscall arg #4		Preserved
+	D1.2	(D1Ar3)	Syscall arg #3		Preserved
+	D0.3	(D0Ar2)	Syscall arg #2		Preserved
+	D1.3	(D1Ar1)	Syscall arg #1		Preserved
+
+Due to the limited number of argument registers and some system calls with badly
+aligned 64-bit arguments, 64-bit values are always packed in consecutive
+arguments, even if this is contrary to the normal calling conventions (where the
+two halves would go in a matching pair of data registers).
+
+For example fadvise64_64 usually has the signature:
+
+	long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice);
+
+But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed:
+
+	long sys_fadvise64_64_metag(i32 fd,      i32 offs_lo,
+				    i32 offs_hi, i32 len_lo,
+				    i32 len_hi,  i32 advice)
+
+So the arguments are packed in the registers like this:
+
+	D0 REG	(ALIAS)	VALUE		D1 REG	(ALIAS)	VALUE
+	===============	===============	===============	=======================
+	D0.1	(D0Ar6)	advice		D1.1	(D1Ar5)	hi(len)
+	D0.2	(D0Ar4)	lo(len)		D1.2	(D1Ar3)	hi(offs)
+	D0.3	(D0Ar2)	lo(offs)	D1.3	(D1Ar1)	fd
+
+
+===================
+CALLING CONVENTIONS
+===================
+
+These calling conventions apply to both user and kernel code. The stack grows
+from low addresses to high addresses in the metag ABI. The stack pointer (A0StP)
+should always point to the next free address on the stack and should at all
+times be 64-bit aligned. The following registers are effective at the point of a
+call:
+
+	REGISTERS	CALL			RETURN
+	===============	=======================	===============================
+	D0.0	(D0Re0)				32bit return value
+	D1.0	(D1Re0)				Upper half of 64bit return value
+	D0.1	(D0Ar6)	32bit argument #6	Clobbered
+	D1.1	(D1Ar5)	32bit argument #5	Clobbered
+	D0.2	(D0Ar4)	32bit argument #4	Clobbered
+	D1.2	(D1Ar3)	32bit argument #3	Clobbered
+	D0.3	(D0Ar2)	32bit argument #2	Clobbered
+	D1.3	(D1Ar1)	32bit argument #1	Clobbered
+	D0.4	(D0FrT)				Clobbered
+	D1.4	(D1RtP)	Return pointer		Clobbered
+	D{0-1}.{5-7}				Preserved
+	A0.0	(A0StP)	Stack pointer		Preserved
+	A1.0	(A0GbP)				Preserved
+	A0.1	(A0FrP)	Frame pointer		Preserved
+	A1.1	(A0LbP)				Preserved
+	A{0-1},{2-3}				Clobbered
+
+64-bit arguments are placed in matching pairs of registers (i.e. the same
+register number in both D0 and D1 units), with the least significant half in D0
+and the most significant half in D1, leaving a gap where necessary. Futher
+arguments are stored on the stack in reverse order (earlier arguments at higher
+addresses):
+
+	ADDRESS		0     1     2     3	4     5     6     7
+	===============	===== ===== ===== =====	===== ===== ===== =====
+	A0StP       -->
+	A0StP-0x08	32bit argument #8	32bit argument #7
+	A0StP-0x10	32bit argument #10	32bit argument #9
+
+Function prologues tend to look a bit like this:
+
+	/* If frame pointer in use, move it to frame temp register so it can be
+	   easily pushed onto stack */
+	MOV	D0FrT,A0FrP
+
+	/* If frame pointer in use, set it to stack pointer */
+	ADD	A0FrP,A0StP,#0
+
+	/* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */
+	MSETL	[A0StP++],D0FrT,D0.5,D0.6,D0.7
+
+	/* Allocate some stack space for local variables */
+	ADD	A0StP,A0StP,#0x10
+
+At this point the stack would look like this:
+
+	ADDRESS		0     1     2     3	4     5     6     7
+	===============	===== ===== ===== =====	===== ===== ===== =====
+	A0StP       -->
+	A0StP-0x08
+	A0StP-0x10
+	A0StP-0x18	Old D0.7		Old D1.7
+	A0StP-0x20	Old D0.6		Old D1.6
+	A0StP-0x28	Old D0.5		Old D1.5
+	A0FrP       -->	Old A0FrP (frame ptr)	Old D1RtP (return ptr)
+	A0FrP-0x08	32bit argument #8	32bit argument #7
+	A0FrP-0x10	32bit argument #10	32bit argument #9
+
+Function epilogues tend to differ depending on the use of a frame pointer. An
+example of a frame pointer epilogue:
+
+	/* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */
+	MGETL	D0FrT,D0.5,D0.6,D0.7,[A0FrP++]
+	/* Restore stack pointer to where frame pointer was before increment */
+	SUB	A0StP,A0FrP,#0x20
+	/* Restore frame pointer from frame temp */
+	MOV	A0FrP,D0FrT
+	/* Return to caller via restored return pointer */
+	MOV	PC,D1RtP
+
+If the function hasn't touched the frame pointer, MGETL cannot be safely used
+with A0StP as it always increments and that would expose the stack to clobbering
+by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL
+split into separate GETL instructions:
+
+	/* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */
+	GETL	D0FrT,D1RtP,[A0StP+#-0x30]
+	GETL	D0.5,D1.5,[A0StP+#-0x28]
+	GETL	D0.6,D1.6,[A0StP+#-0x20]
+	GETL	D0.7,D1.7,[A0StP+#-0x18]
+	/* Restore stack pointer */
+	SUB	A0StP,A0StP,#0x30
+	/* Return to caller via restored return pointer */
+	MOV	PC,D1RtP
diff --git a/MAINTAINERS b/MAINTAINERS
index aea0adf..e95b1e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5204,6 +5204,18 @@ F:	drivers/mtd/
 F:	include/linux/mtd/
 F:	include/uapi/mtd/
 
+METAG ARCHITECTURE
+M:	James Hogan <james.hogan@imgtec.com>
+S:	Supported
+F:	arch/metag/
+F:	Documentation/metag/
+F:	Documentation/devicetree/bindings/metag/
+F:	drivers/clocksource/metag_generic.c
+F:	drivers/irqchip/irq-metag.c
+F:	drivers/irqchip/irq-metag-ext.c
+F:	drivers/tty/metag_da.c
+F:	fs/imgdafs/
+
 MICROBLAZE ARCHITECTURE
 M:	Michal Simek <monstr@monstr.eu>
 L:	microblaze-uclinux@itee.uq.edu.au (moderated for non-subscribers)
diff --git a/arch/Kconfig b/arch/Kconfig
index dcd91a8..5a1779c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,6 +103,22 @@ config UPROBES
 
 	  If in doubt, say "N".
 
+config HAVE_64BIT_ALIGNED_ACCESS
+	def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
+	help
+	  Some architectures require 64 bit accesses to be 64 bit
+	  aligned, which also requires structs containing 64 bit values
+	  to be 64 bit aligned too. This includes some 32 bit
+	  architectures which can do 64 bit accesses, as well as 64 bit
+	  architectures without unaligned access.
+
+	  This symbol should be selected by an architecture if 64 bit
+	  accesses are required to be 64 bit aligned in this way even
+	  though it is not a 64 bit architecture.
+
+	  See Documentation/unaligned-memory-access.txt for more
+	  information on the topic of unaligned memory accesses.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool
 	help
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
new file mode 100644
index 0000000..afc8973
--- /dev/null
+++ b/arch/metag/Kconfig
@@ -0,0 +1,290 @@
+config SYMBOL_PREFIX
+	string
+	default "_"
+
+config METAG
+	def_bool y
+	select EMBEDDED
+	select GENERIC_ATOMIC64
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_IRQ_SHOW
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_64BIT_ALIGNED_ACCESS
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_GENERIC_HARDIRQS
+	select HAVE_KERNEL_BZIP2
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_PERF_EVENTS
+	select HAVE_SYSCALL_TRACEPOINTS
+	select IRQ_DOMAIN
+	select MODULES_USE_ELF_RELA
+	select OF
+	select OF_EARLY_FLATTREE
+	select SPARSE_IRQ
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config GENERIC_GPIO
+	def_bool n
+
+config NO_IOPORT
+	def_bool y
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config MMU
+	def_bool y
+
+config STACK_GROWSUP
+	def_bool y
+
+config HOTPLUG_CPU
+	bool "Enable CPU hotplug support"
+	depends on SMP
+	help
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+
+	  Say N if you want to disable CPU hotplug.
+
+config HIGHMEM
+	bool "High Memory Support"
+	help
+	  The address space of Meta processors is only 4 Gigabytes large
+	  and it has to accommodate user address space, kernel address
+	  space as well as some memory mapped IO. That means that, if you
+	  have a large amount of physical memory and/or IO, not all of the
+	  memory can be "permanently mapped" by the kernel. The physical
+	  memory that is not permanently mapped is called "high memory".
+
+	  Depending on the selected kernel/user memory split, minimum
+	  vmalloc space and actual amount of RAM, you may not need this
+	  option which should result in a slightly faster kernel.
+
+	  If unsure, say n.
+
+source "arch/metag/mm/Kconfig"
+
+source "arch/metag/Kconfig.soc"
+
+config METAG_META12
+	bool
+	help
+	  Select this from the SoC config symbol to indicate that it contains a
+	  Meta 1.2 core.
+
+config METAG_META21
+	bool
+	help
+	  Select this from the SoC config symbol to indicate that it contains a
+	  Meta 2.1 core.
+
+config SMP
+	bool "Symmetric multi-processing support"
+	depends on METAG_META21 && METAG_META21_MMU
+	select USE_GENERIC_SMP_HELPERS
+	help
+	  This enables support for systems with more than one thread running
+	  Linux. If you have a system with only one thread running Linux,
+	  say N. Otherwise, say Y.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-4)" if SMP
+	range 2 4 if SMP
+	default "1" if !SMP
+	default "4" if SMP
+
+config METAG_SMP_WRITE_REORDERING
+	bool
+	help
+	  This attempts to prevent cache-memory incoherence due to external
+	  reordering of writes from different hardware threads when SMP is
+	  enabled. It adds fences (system event 0) to smp_mb and smp_rmb in an
+	  attempt to catch some of the cases, and also before writes to shared
+	  memory in LOCK1 protected atomics and spinlocks.
+	  This will not completely prevent cache incoherency on affected cores.
+
+config METAG_LNKGET_AROUND_CACHE
+	bool
+	depends on METAG_META21
+	help
+	  This indicates that the LNKGET/LNKSET instructions go around the
+	  cache, which requires some extra cache flushes when the memory needs
+	  to be accessed by normal GET/SET instructions too.
+
+choice
+	prompt "Atomicity primitive"
+	default METAG_ATOMICITY_LNKGET
+	help
+	  This option selects the mechanism for performing atomic operations.
+
+config METAG_ATOMICITY_IRQSOFF
+	depends on !SMP
+	bool "irqsoff"
+	help
+	  This option disables interrupts to achieve atomicity. This mechanism
+	  is not SMP-safe.
+
+config METAG_ATOMICITY_LNKGET
+	depends on METAG_META21
+	bool "lnkget/lnkset"
+	help
+	  This option uses the LNKGET and LNKSET instructions to achieve
+	  atomicity. LNKGET/LNKSET are load-link/store-conditional instructions.
+	  Choose this option if your system requires low latency.
+
+config METAG_ATOMICITY_LOCK1
+	depends on SMP
+	bool "lock1"
+	help
+	  This option uses the LOCK1 instruction for atomicity. This is mainly
+	  provided as a debugging aid if the lnkget/lnkset atomicity primitive
+	  isn't working properly.
+
+endchoice
+
+config METAG_FPU
+	bool "FPU Support"
+	depends on METAG_META21
+	default y
+	help
+	  This option allows processes to use FPU hardware available with this
+	  CPU. If this option is not enabled FPU registers will not be saved
+	  and restored on context-switch.
+
+	  If you plan on running programs which are compiled to use hard floats
+	  say Y here.
+
+config METAG_DSP
+	bool "DSP Support"
+	help
+	  This option allows processes to use DSP hardware available
+	  with this CPU. If this option is not enabled DSP registers
+	  will not be saved and restored on context-switch.
+
+	  If you plan on running DSP programs say Y here.
+
+config METAG_PERFCOUNTER_IRQS
+	bool "PerfCounters interrupt support"
+	depends on METAG_META21
+	help
+	  This option enables using interrupts to collect information from
+	  Performance Counters. This option is supported in new META21
+	  (starting from HTP265).
+
+	  When disabled, Performance Counters information will be collected
+	  based on Timer Interrupt.
+
+config METAG_DA
+	bool "DA support"
+	help
+	  Say Y if you plan to use a DA debug adapter with Linux. The presence
+	  of the DA will be detected automatically at boot, so it is safe to say
+	  Y to this option even when booting without a DA.
+
+	  This enables support for services provided by DA JTAG debug adapters,
+	  such as:
+	  - communication over DA channels (such as the console driver).
+	  - use of the DA filesystem.
+
+menu "Boot options"
+
+config METAG_BUILTIN_DTB
+	bool "Embed DTB in kernel image"
+	default y
+	help
+	  Embeds a device tree binary in the kernel image.
+
+config METAG_BUILTIN_DTB_NAME
+	string "Built in DTB"
+	depends on METAG_BUILTIN_DTB
+	help
+	  Set the name of the DTB to embed (leave blank to pick one
+	  automatically based on kernel configuration).
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+
+config CMDLINE
+	string "Kernel command line"
+	depends on CMDLINE_BOOL
+	help
+	  On some architectures there is currently no way for the boot loader
+	  to pass arguments to the kernel. For these architectures, you should
+	  supply some command-line options at build time by entering them
+	  here.
+
+config CMDLINE_FORCE
+	bool "Force default kernel command string"
+	depends on CMDLINE_BOOL
+	help
+	  Set this to have arguments from the default kernel command string
+	  override those passed by the boot loader.
+
+endmenu
+
+source "kernel/Kconfig.preempt"
+
+source kernel/Kconfig.hz
+
+endmenu
+
+menu "Power management options"
+
+source kernel/power/Kconfig
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/metag/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug
new file mode 100644
index 0000000..e45bbf6
--- /dev/null
+++ b/arch/metag/Kconfig.debug
@@ -0,0 +1,40 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
+source "lib/Kconfig.debug"
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+	help
+	  This option will cause messages to be printed if free stack space
+	  drops below a certain limit.
+
+config 4KSTACKS
+	bool "Use 4Kb for kernel stacks instead of 8Kb"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here the kernel will use a 4Kb stacksize for the
+	  kernel stack attached to each process/thread. This facilitates
+	  running more threads on a system and also reduces the pressure
+	  on the VM subsystem for higher order allocations. This option
+	  will also use IRQ stacks to compensate for the reduced stackspace.
+
+config METAG_FUNCTION_TRACE
+	bool "Output Meta real-time trace data for function entry/exit"
+	help
+	  If you say Y here the kernel will use the Meta hardware trace
+	  unit to output information about function entry and exit that
+	  can be used by a debugger for profiling and call-graphs.
+
+config METAG_POISON_CATCH_BUFFERS
+	bool "Poison catch buffer contents on kernel entry"
+	help
+	  If you say Y here the kernel will write poison data to the
+	  catch buffer registers on kernel entry. This will make any
+	  problem with catch buffer handling much more apparent.
+
+endmenu
diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc
new file mode 100644
index 0000000..ec079cf
--- /dev/null
+++ b/arch/metag/Kconfig.soc
@@ -0,0 +1,55 @@
+choice
+	prompt "SoC Type"
+	default META21_FPGA
+
+config META12_FPGA
+	bool "Meta 1.2 FPGA"
+	select METAG_META12
+	help
+	  This is a Meta 1.2 FPGA bitstream, just a bare CPU.
+
+config META21_FPGA
+	bool "Meta 2.1 FPGA"
+	select METAG_META21
+	help
+	  This is a Meta 2.1 FPGA bitstream, just a bare CPU.
+
+endchoice
+
+menu "SoC configuration"
+
+if METAG_META21
+
+# Meta 2.x specific options
+
+config METAG_META21_MMU
+	bool "Meta 2.x MMU mode"
+	default y
+	help
+	  Use the Meta 2.x MMU in extended mode.
+
+config METAG_UNALIGNED
+	bool "Meta 2.x unaligned access checking"
+	default y
+	help
+	  All memory accesses will be checked for alignment and an exception
+	  raised on unaligned accesses. This feature does cost performance
+	  but without it there will be no notification of this type of error.
+
+config METAG_USER_TCM
+	bool "Meta on-chip memory support for userland"
+	select GENERIC_ALLOCATOR
+	default y
+	help
+	  Allow the on-chip memories of Meta SoCs to be used by user
+	  applications.
+
+endif
+
+config METAG_HALT_ON_PANIC
+	bool "Halt the core on panic"
+	help
+	  Halt the core when a panic occurs. This is useful when running
+	  pre-production silicon or in an FPGA environment.
+
+endmenu
diff --git a/arch/metag/Makefile b/arch/metag/Makefile
new file mode 100644
index 0000000..81bd6a1
--- /dev/null
+++ b/arch/metag/Makefile
@@ -0,0 +1,87 @@
+#
+# metag/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#               2007,2008,2012 by Imagination Technologies Ltd.
+#
+
+LDFLAGS					:=
+OBJCOPYFLAGS				:= -O binary -R .note -R .comment -S
+
+checkflags-$(CONFIG_METAG_META12)	+= -DMETAC_1_2
+checkflags-$(CONFIG_METAG_META21)	+= -DMETAC_2_1
+CHECKFLAGS				+= -D__metag__ $(checkflags-y)
+
+KBUILD_DEFCONFIG			:= meta2_defconfig
+
+sflags-$(CONFIG_METAG_META12)		+= -mmetac=1.2
+ifeq ($(CONFIG_METAG_META12),y)
+# Only use TBI API 1.4 if DSP is enabled for META12 cores
+sflags-$(CONFIG_METAG_DSP)		+= -DTBI_1_4
+endif
+sflags-$(CONFIG_METAG_META21)		+= -mmetac=2.1 -DTBI_1_4
+
+cflags-$(CONFIG_METAG_FUNCTION_TRACE)	+= -mhwtrace-leaf -mhwtrace-retpc
+cflags-$(CONFIG_METAG_META21)		+= -mextensions=bex
+
+KBUILD_CFLAGS				+= -pipe
+KBUILD_CFLAGS				+= -ffunction-sections
+
+KBUILD_CFLAGS				+= $(sflags-y) $(cflags-y)
+KBUILD_AFLAGS				+= $(sflags-y)
+
+LDFLAGS_vmlinux				:= $(ldflags-y)
+
+head-y					:= arch/metag/kernel/head.o
+
+core-y					+= arch/metag/boot/dts/
+core-y					+= arch/metag/kernel/
+core-y					+= arch/metag/mm/
+
+libs-y					+= arch/metag/lib/
+libs-y					+= arch/metag/tbx/
+
+boot					:= arch/metag/boot
+
+boot_targets				+= uImage
+boot_targets				+= uImage.gz
+boot_targets				+= uImage.bz2
+boot_targets				+= uImage.xz
+boot_targets				+= uImage.lzo
+boot_targets				+= uImage.bin
+boot_targets				+= vmlinux.bin
+
+PHONY					+= $(boot_targets)
+
+all: vmlinux.bin
+
+$(boot_targets): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+%.dtb %.dtb.S %.dtb.o: scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
+
+dtbs: scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts dtbs
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+  echo  '* vmlinux.bin	- Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)'
+  @echo '  uImage  	- Alias to bootable U-Boot image'
+  @echo '  uImage.bin	- Kernel-only image for U-Boot (bin)'
+  @echo '  uImage.gz	- Kernel-only image for U-Boot (gzip)'
+  @echo '  uImage.bz2	- Kernel-only image for U-Boot (bzip2)'
+  @echo '  uImage.xz	- Kernel-only image for U-Boot (xz)'
+  @echo '  uImage.lzo	- Kernel-only image for U-Boot (lzo)'
+  @echo '  dtbs		- Build device tree blobs for enabled boards'
+endef
diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore
new file mode 100644
index 0000000..a021da2
--- /dev/null
+++ b/arch/metag/boot/.gitignore
@@ -0,0 +1,4 @@
+vmlinux*
+uImage*
+ramdisk.*
+*.dtb
diff --git a/arch/metag/boot/Makefile b/arch/metag/boot/Makefile
new file mode 100644
index 0000000..5a1f88c
--- /dev/null
+++ b/arch/metag/boot/Makefile
@@ -0,0 +1,68 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2007,2012  Imagination Technologies Ltd.
+#
+
+suffix-y := bin
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
+suffix-$(CONFIG_KERNEL_XZ)	:= xz
+suffix-$(CONFIG_KERNEL_LZO)	:= lzo
+
+targets += vmlinux.bin
+targets += uImage
+targets += uImage.gz
+targets += uImage.bz2
+targets += uImage.xz
+targets += uImage.lzo
+targets += uImage.bin
+
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.bz2
+extra-y += vmlinux.bin.xz
+extra-y += vmlinux.bin.lzo
+
+UIMAGE_LOADADDR = $(CONFIG_PAGE_OFFSET)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+orig_cflags := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(orig_cflags))
+endif
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,bzip2)
+
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,xzkern)
+
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzo)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,uimage,gzip)
+
+$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE
+	$(call if_changed,uimage,bzip2)
+
+$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE
+	$(call if_changed,uimage,xz)
+
+$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
+	$(call if_changed,uimage,lzo)
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,uimage,none)
+
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+	@ln -sf $(notdir $<) $@
+	@echo '  Image $@ is ready'
diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile
new file mode 100644
index 0000000..e0b5afd
--- /dev/null
+++ b/arch/metag/boot/dts/Makefile
@@ -0,0 +1,16 @@
+dtb-y	+= skeleton.dtb
+
+# Built-in dtb
+builtindtb-y				:= skeleton
+
+ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"")
+	builtindtb-y			:= $(CONFIG_METAG_BUILTIN_DTB_NAME)
+endif
+obj-$(CONFIG_METAG_BUILTIN_DTB)	+= $(patsubst "%",%,$(builtindtb-y)).dtb.o
+
+targets	+= dtbs
+targets	+= $(dtb-y)
+
+dtbs: $(addprefix $(obj)/, $(dtb-y))
+
+clean-files += *.dtb
diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts
new file mode 100644
index 0000000..7244d1f
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dts
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
diff --git a/arch/metag/boot/dts/skeleton.dtsi b/arch/metag/boot/dts/skeleton.dtsi
new file mode 100644
index 0000000..78229ea
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dtsi
@@ -0,0 +1,14 @@
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value.  The bootloader will typically populate the memory
+ * node.
+ */
+
+/ {
+	compatible = "img,meta";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+	memory { device_type = "memory"; reg = <0 0>; };
+};
diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig
new file mode 100644
index 0000000..c35a75e
--- /dev/null
+++ b/arch/metag/configs/meta1_defconfig
@@ -0,0 +1,40 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_META12_FPGA=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig
new file mode 100644
index 0000000..fb31484
--- /dev/null
+++ b/arch/metag/configs/meta2_defconfig
@@ -0,0 +1,41 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig
new file mode 100644
index 0000000..6c7b777
--- /dev/null
+++ b/arch/metag/configs/meta2_smp_defconfig
@@ -0,0 +1,42 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_SMP=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
new file mode 100644
index 0000000..6ae0ccb
--- /dev/null
+++ b/arch/metag/include/asm/Kbuild
@@ -0,0 +1,54 @@
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += bugs.h
+generic-y += clkdev.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += dma.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += exec.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kvm_para.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += switch_to.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += timex.h
+generic-y += trace_clock.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += unaligned.h
+generic-y += user.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
new file mode 100644
index 0000000..307ecd2
--- /dev/null
+++ b/arch/metag/include/asm/atomic.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_METAG_ATOMIC_H
+#define __ASM_METAG_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+/* The simple UP case. */
+#include <asm-generic/atomic.h>
+#else
+
+#if defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/atomic_lock1.h>
+#else
+#include <asm/atomic_lnkget.h>
+#endif
+
+#define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
+
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_dec(v) atomic_sub(1, (v))
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#endif
+
+#define atomic_dec_if_positive(v)       atomic_sub_if_positive(1, v)
+
+#include <asm-generic/atomic64.h>
+
+#endif /* __ASM_METAG_ATOMIC_H */
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
new file mode 100644
index 0000000..d2e60a1
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -0,0 +1,234 @@
+#ifndef __ASM_METAG_ATOMIC_LNKGET_H
+#define __ASM_METAG_ATOMIC_LNKGET_H
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#define atomic_set(v, i)		((v)->counter = (i))
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"LNKGETD %0, [%1]\n"
+		: "=da" (temp)
+		: "da" (&v->counter));
+
+	return temp;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	ADD	%0, %0, %2\n"
+		"	LNKSETD [%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	SUB	%0, %0, %2\n"
+		"	LNKSETD [%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ 1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	ADD	%1, %1, %3\n"
+		"	LNKSETD [%2], %1\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ 1b\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	SUB	%1, %1, %3\n"
+		"	LNKSETD [%2], %1\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	AND	%0, %0, %2\n"
+		"	LNKSETD	[%1] %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (~mask)
+		: "cc");
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	OR	%0, %0, %2\n"
+		"	LNKSETD	[%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (mask)
+		: "cc");
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD	%1, [%2]\n"
+		"	CMP	%1, %3\n"
+		"	LNKSETDEQ [%2], %4\n"
+		"	BNE	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&d" (result)
+		: "da" (&v->counter), "bd" (old), "da" (new)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	int temp, old;
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	LNKSETD	[%2], %3\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp), "=&d" (old)
+		: "da" (&v->counter), "da" (new)
+		: "cc");
+
+	return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	CMP	%1, %3\n"
+		"	ADD	%0, %1, %4\n"
+		"	LNKSETDNE [%2], %0\n"
+		"	BEQ	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&d" (result)
+		: "da" (&v->counter), "bd" (u), "bd" (a)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int result, temp;
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	SUBS	%1, %1, %3\n"
+		"	LNKSETDGE [%2], %1\n"
+		"	BLT	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	return result;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LNKGET_H */
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
new file mode 100644
index 0000000..e578955
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -0,0 +1,160 @@
+#ifndef __ASM_METAG_ATOMIC_LOCK1_H
+#define __ASM_METAG_ATOMIC_LOCK1_H
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+static inline int atomic_read(const atomic_t *v)
+{
+	return (v)->counter;
+}
+
+/*
+ * atomic_set needs to be take the lock to protect atomic_add_unless from a
+ * possible race, as it reads the counter twice:
+ *
+ *  CPU0                               CPU1
+ *  atomic_add_unless(1, 0)
+ *    ret = v->counter (non-zero)
+ *    if (ret != u)                    v->counter = 0
+ *      v->counter += 1 (counter set to 1)
+ *
+ * Making atomic_set take the lock ensures that ordering and logical
+ * consistency is preserved.
+ */
+static inline int atomic_set(atomic_t *v, int i)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter = i;
+	__global_unlock1(flags);
+	return i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter += i;
+	__global_unlock1(flags);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter -= i;
+	__global_unlock1(flags);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	unsigned long result;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	result = v->counter;
+	result += i;
+	fence();
+	v->counter = result;
+	__global_unlock1(flags);
+
+	return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	unsigned long result;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	result = v->counter;
+	result -= i;
+	fence();
+	v->counter = result;
+	__global_unlock1(flags);
+
+	return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter &= ~mask;
+	__global_unlock1(flags);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter |= mask;
+	__global_unlock1(flags);
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter;
+	if (ret == old) {
+		fence();
+		v->counter = new;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter;
+	if (ret != u) {
+		fence();
+		v->counter += a;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter - 1;
+	if (ret >= 0) {
+		fence();
+		v->counter = ret;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LOCK1_H */
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
new file mode 100644
index 0000000..c90bfc6
--- /dev/null
+++ b/arch/metag/include/asm/barrier.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_METAG_BARRIER_H
+#define _ASM_METAG_BARRIER_H
+
+#include <asm/metag_mem.h>
+
+#define nop()		asm volatile ("NOP")
+#define mb()		wmb()
+#define rmb()		barrier()
+
+#ifdef CONFIG_METAG_META21
+
+/* HTP and above have a system event to fence writes */
+static inline void wr_fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE;
+	barrier();
+	*flushptr = 0;
+}
+
+#else /* CONFIG_METAG_META21 */
+
+/*
+ * ATP doesn't have system event to fence writes, so it is necessary to flush
+ * the processor write queues as well as possibly the write combiner (depending
+ * on the page being written).
+ * To ensure the write queues are flushed we do 4 writes to a system event
+ * register (in this case write combiner flush) which will also flush the write
+ * combiner.
+ */
+static inline void wr_fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;
+	barrier();
+	*flushptr = 0;
+	*flushptr = 0;
+	*flushptr = 0;
+	*flushptr = 0;
+}
+
+#endif /* !CONFIG_METAG_META21 */
+
+static inline void wmb(void)
+{
+	/* flush writes through the write combiner */
+	wr_fence();
+}
+
+#define read_barrier_depends()  do { } while (0)
+
+#ifndef CONFIG_SMP
+#define fence()		do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#else
+
+#ifdef CONFIG_METAG_SMP_WRITE_REORDERING
+/*
+ * Write to the atomic memory unlock system event register (command 0). This is
+ * needed before a write to shared memory in a critical section, to prevent
+ * external reordering of writes before the fence on other threads with writes
+ * after the fence on this thread (and to prevent the ensuing cache-memory
+ * incoherence). It is therefore ineffective if used after and on the same
+ * thread as a write.
+ */
+static inline void fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK;
+	barrier();
+	*flushptr = 0;
+}
+#define smp_mb()        fence()
+#define smp_rmb()       fence()
+#define smp_wmb()       barrier()
+#else
+#define fence()		do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#endif
+#endif
+#define smp_read_barrier_depends()     do { } while (0)
+#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+
+#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
new file mode 100644
index 0000000..c0d0df0
--- /dev/null
+++ b/arch/metag/include/asm/bitops.h
@@ -0,0 +1,132 @@
+#ifndef __ASM_METAG_BITOPS_H
+#define __ASM_METAG_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler.
+ */
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+#ifdef CONFIG_SMP
+/*
+ * These functions are the basis of our bit ops.
+ */
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p |= mask;
+	__global_unlock1(flags);
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p &= ~mask;
+	__global_unlock1(flags);
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p ^= mask;
+	__global_unlock1(flags);
+}
+
+static inline int test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	old = *p;
+	if (!(old & mask)) {
+		fence();
+		*p = old | mask;
+	}
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(unsigned int bit,
+				     volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	old = *p;
+	if (old & mask) {
+		fence();
+		*p = old & ~mask;
+	}
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(unsigned int bit,
+				      volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	old = *p;
+	*p = old ^ mask;
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+#else
+#include <asm-generic/bitops/atomic.h>
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __ASM_METAG_BITOPS_H */
diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h
new file mode 100644
index 0000000..d04b48c
--- /dev/null
+++ b/arch/metag/include/asm/bug.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_METAG_BUG_H
+#define _ASM_METAG_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+extern const char *trap_name(int trapno);
+extern void die(const char *str, struct pt_regs *regs, long err,
+		unsigned long addr) __attribute__ ((noreturn));
+
+#endif
diff --git a/arch/metag/include/asm/cache.h b/arch/metag/include/asm/cache.h
new file mode 100644
index 0000000..a43b650
--- /dev/null
+++ b/arch/metag/include/asm/cache.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_METAG_CACHE_H
+#define __ASM_METAG_CACHE_H
+
+/* L1 cache line size (64 bytes) */
+#define L1_CACHE_SHIFT		6
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+/* Meta requires large data items to be 8 byte aligned. */
+#define ARCH_SLAB_MINALIGN	8
+
+/*
+ * With an L2 cache, we may invalidate dirty lines, so we need to ensure DMA
+ * buffers have cache line alignment.
+ */
+#ifdef CONFIG_METAG_L2C
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+#else
+#define ARCH_DMA_MINALIGN	8
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/metag/include/asm/cacheflush.h b/arch/metag/include/asm/cacheflush.h
new file mode 100644
index 0000000..7787ec5
--- /dev/null
+++ b/arch/metag/include/asm/cacheflush.h
@@ -0,0 +1,250 @@
+#ifndef _METAG_CACHEFLUSH_H
+#define _METAG_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+void metag_cache_probe(void);
+
+void metag_data_cache_flush_all(const void *start);
+void metag_code_cache_flush_all(const void *start);
+
+/*
+ * Routines to flush physical cache lines that may be used to cache data or code
+ * normally accessed via the linear address range supplied. The region flushed
+ * must either lie in local or global address space determined by the top bit of
+ * the pStart address. If Bytes is >= 4K then the whole of the related cache
+ * state will be flushed rather than a limited range.
+ */
+void metag_data_cache_flush(const void *start, int bytes);
+void metag_code_cache_flush(const void *start, int bytes);
+
+#ifdef CONFIG_METAG_META12
+
+/* Write through, virtually tagged, split I/D cache. */
+
+static inline void __flush_cache_all(void)
+{
+	metag_code_cache_flush_all((void *) PAGE_OFFSET);
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_cache_all() __flush_cache_all()
+
+/* flush the entire user address space referenced in this mm structure */
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+	if (mm == current->mm)
+		__flush_cache_all();
+}
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+/* flush a range of addresses from this mm */
+static inline void flush_cache_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
+{
+	flush_cache_mm(vma->vm_mm);
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long vmaddr, unsigned long pfn)
+{
+	flush_cache_mm(vma->vm_mm);
+}
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE	1
+static inline void flush_dcache_page(struct page *page)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+static inline void flush_icache_page(struct vm_area_struct *vma,
+				     struct page *page)
+{
+	metag_code_cache_flush(page_to_virt(page), PAGE_SIZE);
+}
+
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#else
+
+/* Write through, physically tagged, split I/D cache. */
+
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE	1
+static inline void flush_dcache_page(struct page *page)
+{
+	/* FIXME: We can do better than this. All we are trying to do is
+	 * make the i-cache coherent, we should use the PG_arch_1 bit like
+	 * e.g. powerpc.
+	 */
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush_all((void *) PAGE_OFFSET);
+#endif
+}
+
+#endif
+
+/* Push n pages at kernel virtual address and clear the icache */
+static inline void flush_icache_range(unsigned long address,
+				      unsigned long endaddr)
+{
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush((void *) address, endaddr - address);
+#endif
+}
+
+static inline void flush_cache_sigtramp(unsigned long addr, int size)
+{
+	/*
+	 * Flush the icache in case there was previously some code
+	 * fetched from this address, perhaps a previous sigtramp.
+	 *
+	 * We don't need to flush the dcache, it's write through and
+	 * we just wrote the sigtramp code through it.
+	 */
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush((void *) addr, size);
+#endif
+}
+
+#ifdef CONFIG_METAG_L2C
+
+/*
+ * Perform a single specific CACHEWD operation on an address, masking lower bits
+ * of address first.
+ */
+static inline void cachewd_line(void *addr, unsigned int data)
+{
+	unsigned long masked = (unsigned long)addr & -0x40;
+	__builtin_meta2_cachewd((void *)masked, data);
+}
+
+/* Perform a certain CACHEW op on each cache line in a range */
+static inline void cachew_region_op(void *start, unsigned long size,
+				    unsigned int op)
+{
+	unsigned long offset = (unsigned long)start & 0x3f;
+	int i;
+	if (offset) {
+		size += offset;
+		start -= offset;
+	}
+	i = (size - 1) >> 6;
+	do {
+		__builtin_meta2_cachewd(start, op);
+		start += 0x40;
+	} while (i--);
+}
+
+/* prevent write fence and flushbacks being reordered in L2 */
+static inline void l2c_fence_flush(void *addr)
+{
+	/*
+	 * Synchronise by reading back and re-flushing.
+	 * It is assumed this access will miss, as the caller should have just
+	 * flushed the cache line.
+	 */
+	(void)(volatile u8 *)addr;
+	cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+}
+
+/* prevent write fence and writebacks being reordered in L2 */
+static inline void l2c_fence(void *addr)
+{
+	/*
+	 * A write back has occurred, but not necessarily an invalidate, so the
+	 * readback in l2c_fence_flush() would hit in the cache and have no
+	 * effect. Therefore fully flush the line first.
+	 */
+	cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+	l2c_fence_flush(addr);
+}
+
+/* Used to keep memory consistent when doing DMA. */
+static inline void flush_dcache_region(void *start, unsigned long size)
+{
+	/* metag_data_cache_flush won't flush L2 cache lines if size >= 4096 */
+	if (meta_l2c_is_enabled()) {
+		cachew_region_op(start, size, CACHEW_FLUSH_L1D_L2);
+		if (meta_l2c_is_writeback())
+			l2c_fence_flush(start + size - 1);
+	} else {
+		metag_data_cache_flush(start, size);
+	}
+}
+
+/* Write back dirty lines to memory (or do nothing if no writeback caches) */
+static inline void writeback_dcache_region(void *start, unsigned long size)
+{
+	if (meta_l2c_is_enabled() && meta_l2c_is_writeback()) {
+		cachew_region_op(start, size, CACHEW_WRITEBACK_L1D_L2);
+		l2c_fence(start + size - 1);
+	}
+}
+
+/* Invalidate (may also write back if necessary) */
+static inline void invalidate_dcache_region(void *start, unsigned long size)
+{
+	if (meta_l2c_is_enabled())
+		cachew_region_op(start, size, CACHEW_INVALIDATE_L1D_L2);
+	else
+		metag_data_cache_flush(start, size);
+}
+#else
+#define flush_dcache_region(s, l)	metag_data_cache_flush((s), (l))
+#define writeback_dcache_region(s, l)	do {} while (0)
+#define invalidate_dcache_region(s, l)	flush_dcache_region((s), (l))
+#endif
+
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+				     struct page *page, unsigned long vaddr,
+				     void *dst, const void *src,
+				     unsigned long len)
+{
+	memcpy(dst, src, len);
+	flush_icache_range((unsigned long)dst, (unsigned long)dst + len);
+}
+
+static inline void copy_from_user_page(struct vm_area_struct *vma,
+				       struct page *page, unsigned long vaddr,
+				       void *dst, const void *src,
+				       unsigned long len)
+{
+	memcpy(dst, src, len);
+}
+
+#endif /* _METAG_CACHEFLUSH_H */
diff --git a/arch/metag/include/asm/cachepart.h b/arch/metag/include/asm/cachepart.h
new file mode 100644
index 0000000..cf6b44e
--- /dev/null
+++ b/arch/metag/include/asm/cachepart.h
@@ -0,0 +1,42 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_CACHEPART_H_
+#define _METAG_CACHEPART_H_
+
+/**
+ * get_dcache_size() - Get size of data cache.
+ */
+unsigned int get_dcache_size(void);
+
+/**
+ * get_icache_size() - Get size of code cache.
+ */
+unsigned int get_icache_size(void);
+
+/**
+ * get_global_dcache_size() - Get the thread's global dcache.
+ *
+ * Returns the size of the current thread's global dcache partition.
+ */
+unsigned int get_global_dcache_size(void);
+
+/**
+ * get_global_icache_size() - Get the thread's global icache.
+ *
+ * Returns the size of the current thread's global icache partition.
+ */
+unsigned int get_global_icache_size(void);
+
+/**
+ * check_for_dache_aliasing() - Ensure that the bootloader has configured the
+ * dache and icache properly to avoid aliasing
+ * @thread_id: Hardware thread ID
+ *
+ */
+void check_for_cache_aliasing(int thread_id);
+
+#endif
diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h
new file mode 100644
index 0000000..999bf76
--- /dev/null
+++ b/arch/metag/include/asm/checksum.h
@@ -0,0 +1,92 @@
+#ifndef _METAG_CHECKSUM_H
+#define _METAG_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+				__wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum)	\
+	csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum)
+{
+	unsigned long len_proto = (proto + len) << 8;
+	asm ("ADD    %0, %0, %1\n"
+	     "ADDS   %0, %0, %2\n"
+	     "ADDCS  %0, %0, #1\n"
+	     "ADDS   %0, %0, %3\n"
+	     "ADDCS  %0, %0, #1\n"
+	     : "=d" (sum)
+	     : "d" (daddr), "d" (saddr), "d" (len_proto),
+	       "0" (sum)
+	     : "cc");
+	return sum;
+}
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+		  unsigned short proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* _METAG_CHECKSUM_H */
diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h
new file mode 100644
index 0000000..3e2915a
--- /dev/null
+++ b/arch/metag/include/asm/clock.h
@@ -0,0 +1,51 @@
+/*
+ * arch/metag/include/asm/clock.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_CLOCK_H_
+#define _METAG_CLOCK_H_
+
+#include <asm/mach/arch.h>
+
+/**
+ * struct meta_clock_desc - Meta Core clock callbacks.
+ * @get_core_freq:	Get the frequency of the Meta core. If this is NULL, the
+ *			core frequency will be determined like this:
+ *			Meta 1: based on loops_per_jiffy.
+ *			Meta 2: (EXPAND_TIMER_DIV + 1) MHz.
+ */
+struct meta_clock_desc {
+	unsigned long		(*get_core_freq)(void);
+};
+
+extern struct meta_clock_desc _meta_clock;
+
+/*
+ * Set up the default clock, ensuring all callbacks are valid - only accessible
+ * during boot.
+ */
+void setup_meta_clocks(struct meta_clock_desc *desc);
+
+/**
+ * get_coreclock() - Get the frequency of the Meta core clock.
+ *
+ * Returns:	The Meta core clock frequency in Hz.
+ */
+static inline unsigned long get_coreclock(void)
+{
+	/*
+	 * Use the current clock callback. If set correctly this will provide
+	 * the most accurate frequency as it can be calculated directly from the
+	 * PLL configuration. otherwise a default callback will have been set
+	 * instead.
+	 */
+	return _meta_clock.get_core_freq();
+}
+
+#endif /* _METAG_CLOCK_H_ */
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
new file mode 100644
index 0000000..b1bc1be
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -0,0 +1,65 @@
+#ifndef __ASM_METAG_CMPXCHG_H
+#define __ASM_METAG_CMPXCHG_H
+
+#include <asm/barrier.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+#include <asm/cmpxchg_irq.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/cmpxchg_lock1.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+#include <asm/cmpxchg_lnkget.h>
+#endif
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#define __xchg(ptr, x, size)				\
+({							\
+	unsigned long __xchg__res;			\
+	volatile void *__xchg_ptr = (ptr);		\
+	switch (size) {					\
+	case 4:						\
+		__xchg__res = xchg_u32(__xchg_ptr, x);	\
+		break;					\
+	case 1:						\
+		__xchg__res = xchg_u8(__xchg_ptr, x);	\
+		break;					\
+	default:					\
+		__xchg_called_with_bad_pointer();	\
+		__xchg__res = x;			\
+		break;					\
+	}						\
+							\
+	__xchg__res;					\
+})
+
+#define xchg(ptr, x)	\
+	((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(x), sizeof(*(ptr))))
+
+/* This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+#define cmpxchg(ptr, o, n)						\
+	({								\
+		__typeof__(*(ptr)) _o_ = (o);				\
+		__typeof__(*(ptr)) _n_ = (n);				\
+		(__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+					       (unsigned long)_n_,	\
+					       sizeof(*(ptr)));		\
+	})
+
+#endif /* __ASM_METAG_CMPXCHG_H */
diff --git a/arch/metag/include/asm/cmpxchg_irq.h b/arch/metag/include/asm/cmpxchg_irq.h
new file mode 100644
index 0000000..6495731
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_irq.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_CMPXCHG_IRQ_H
+#define __ASM_METAG_CMPXCHG_IRQ_H
+
+#include <linux/irqflags.h>
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	local_irq_save(flags);
+	retval = *m;
+	*m = val;
+	local_irq_restore(flags);
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	local_irq_save(flags);
+	retval = *m;
+	*m = val & 0xff;
+	local_irq_restore(flags);
+	return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	if (retval == old)
+		*m = new;
+	local_irq_restore(flags);       /* implies memory barrier  */
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_IRQ_H */
diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
new file mode 100644
index 0000000..0154e28
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lnkget.h
@@ -0,0 +1,86 @@
+#ifndef __ASM_METAG_CMPXCHG_LNKGET_H
+#define __ASM_METAG_CMPXCHG_LNKGET_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	int temp, old;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD %1, [%2]\n"
+		      "	LNKSETD	[%2], %3\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      : "=&d" (temp), "=&d" (old)
+		      : "da" (m), "da" (val)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return old;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	int temp, old;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD %1, [%2]\n"
+		      "	LNKSETD	[%2], %3\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      : "=&d" (temp), "=&d" (old)
+		      : "da" (m), "da" (val & 0xff)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return old;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval, temp;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD	%1, [%2]\n"
+		      "	CMP	%1, %3\n"
+		      "	LNKSETDEQ [%2], %4\n"
+		      "	BNE	2f\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      "2:\n"
+		      : "=&d" (temp), "=&da" (retval)
+		      : "da" (m), "bd" (old), "da" (new)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LNKGET_H */
diff --git a/arch/metag/include/asm/cmpxchg_lock1.h b/arch/metag/include/asm/cmpxchg_lock1.h
new file mode 100644
index 0000000..fd68504
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lock1.h
@@ -0,0 +1,48 @@
+#ifndef __ASM_METAG_CMPXCHG_LOCK1_H
+#define __ASM_METAG_CMPXCHG_LOCK1_H
+
+#include <asm/global_lock.h>
+
+/* Use LOCK2 as these have to be atomic w.r.t. ordinary accesses. */
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	__global_lock2(flags);
+	fence();
+	retval = *m;
+	*m = val;
+	__global_unlock2(flags);
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	__global_lock2(flags);
+	fence();
+	retval = *m;
+	*m = val & 0xff;
+	__global_unlock2(flags);
+	return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval;
+	unsigned long flags;
+
+	__global_lock2(flags);
+	retval = *m;
+	if (retval == old) {
+		fence();
+		*m = new;
+	}
+	__global_unlock2(flags);
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LOCK1_H */
diff --git a/arch/metag/include/asm/core_reg.h b/arch/metag/include/asm/core_reg.h
new file mode 100644
index 0000000..bdbc3a5
--- /dev/null
+++ b/arch/metag/include/asm/core_reg.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_METAG_CORE_REG_H_
+#define __ASM_METAG_CORE_REG_H_
+
+#include <asm/metag_regs.h>
+
+extern void core_reg_write(int unit, int reg, int thread, unsigned int val);
+extern unsigned int core_reg_read(int unit, int reg, int thread);
+
+/*
+ * These macros allow direct access from C to any register known to the
+ * assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT.
+ */
+
+#define __core_reg_get(reg) ({						\
+	unsigned int __grvalue;						\
+	asm volatile("MOV	%0," #reg				\
+		     : "=r" (__grvalue));				\
+	__grvalue;							\
+})
+
+#define __core_reg_set(reg, value) do {					\
+	unsigned int __srvalue = (value);				\
+	asm volatile("MOV	" #reg ",%0"				\
+		     :							\
+		     : "r" (__srvalue));				\
+} while (0)
+
+#define __core_reg_swap(reg, value) do {				\
+	unsigned int __srvalue = (value);				\
+	asm volatile("SWAP	" #reg ",%0"				\
+		     : "+r" (__srvalue));				\
+	(value) = __srvalue;						\
+} while (0)
+
+#endif
diff --git a/arch/metag/include/asm/cpu.h b/arch/metag/include/asm/cpu.h
new file mode 100644
index 0000000..decf129
--- /dev/null
+++ b/arch/metag/include/asm/cpu.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_METAG_CPU_H
+#define _ASM_METAG_CPU_H
+
+#include <linux/percpu.h>
+
+struct cpuinfo_metag {
+	struct cpu cpu;
+#ifdef CONFIG_SMP
+	unsigned long loops_per_jiffy;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_metag, cpu_data);
+#endif /* _ASM_METAG_CPU_H */
diff --git a/arch/metag/include/asm/da.h b/arch/metag/include/asm/da.h
new file mode 100644
index 0000000..81bd521
--- /dev/null
+++ b/arch/metag/include/asm/da.h
@@ -0,0 +1,43 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_DA_H_
+#define _METAG_DA_H_
+
+#ifdef CONFIG_METAG_DA
+
+#include <linux/init.h>
+#include <linux/types.h>
+
+extern bool _metag_da_present;
+
+/**
+ * metag_da_enabled() - Find whether a DA is currently enabled.
+ *
+ * Returns:	true if a DA was detected, false if not.
+ */
+static inline bool metag_da_enabled(void)
+{
+	return _metag_da_present;
+}
+
+/**
+ * metag_da_probe() - Try and detect a connected DA.
+ *
+ * This is used at start up to detect whether a DA is active.
+ *
+ * Returns:	0 on detection, -err otherwise.
+ */
+int __init metag_da_probe(void);
+
+#else /* !CONFIG_METAG_DA */
+
+#define metag_da_enabled() false
+#define metag_da_probe() do {} while (0)
+
+#endif
+
+#endif /* _METAG_DA_H_ */
diff --git a/arch/metag/include/asm/delay.h b/arch/metag/include/asm/delay.h
new file mode 100644
index 0000000..9c92f99
--- /dev/null
+++ b/arch/metag/include/asm/delay.h
@@ -0,0 +1,29 @@
+#ifndef _METAG_DELAY_H
+#define _METAG_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/metag/lib/delay.c
+ */
+
+/* Undefined functions to get compile-time errors */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long xloops);
+extern void __delay(unsigned long loops);
+
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
+	__udelay(n))
+
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+	__ndelay(n))
+
+#endif /* _METAG_DELAY_H */
diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h
new file mode 100644
index 0000000..0fdd116
--- /dev/null
+++ b/arch/metag/include/asm/div64.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_DIV64_H__
+#define __ASM_DIV64_H__
+
+#include <asm-generic/div64.h>
+
+extern u64 div_u64(u64 dividend, u64 divisor);
+extern s64 div_s64(s64 dividend, s64 divisor);
+
+#define div_u64 div_u64
+#define div_s64 div_s64
+
+#endif
diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h
new file mode 100644
index 0000000..14b23ef
--- /dev/null
+++ b/arch/metag/include/asm/dma-mapping.h
@@ -0,0 +1,190 @@
+#ifndef _ASM_METAG_DMA_MAPPING_H
+#define _ASM_METAG_DMA_MAPPING_H
+
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <linux/scatterlist.h>
+#include <asm/bug.h>
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *vaddr, dma_addr_t dma_handle);
+
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(size == 0);
+	dma_sync_for_device(ptr, size, direction);
+	return virt_to_phys(ptr);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+	   enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nents == 0 || sglist[0].length == 0);
+
+	for_each_sg(sglist, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg->dma_address = sg_phys(sg);
+		dma_sync_for_device(sg_virt(sg), sg->length, direction);
+	}
+
+	return nents;
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+	     size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
+			    direction);
+	return page_to_phys(page) + offset;
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
+}
+
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nhwentries == 0 || sglist[0].length == 0);
+
+	for_each_sg(sglist, sg, nhwentries, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg->dma_address = sg_phys(sg);
+		dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+	}
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
+{
+	dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+			   size_t size, enum dma_data_direction direction)
+{
+	dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			      unsigned long offset, size_t size,
+			      enum dma_data_direction direction)
+{
+	dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
+			 direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
+{
+	dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
+			    direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+	int i;
+	for (i = 0; i < nelems; i++, sg++)
+		dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		       enum dma_data_direction direction)
+{
+	int i;
+	for (i = 0; i < nelems; i++, sg++)
+		dma_sync_for_device(sg_virt(sg), sg->length, direction);
+}
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+#define dma_supported(dev, mask)        (1)
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
+ * do any flushing here.
+ */
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+	       enum dma_data_direction direction)
+{
+}
+
+/* drivers/base/dma-mapping.c */
+extern int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+				  void *cpu_addr, dma_addr_t dma_addr,
+				  size_t size);
+
+#define dma_get_sgtable(d, t, v, h, s) dma_common_get_sgtable(d, t, v, h, s)
+
+#endif
diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h
new file mode 100644
index 0000000..d63b9d0
--- /dev/null
+++ b/arch/metag/include/asm/elf.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_METAG_ELF_H
+#define __ASM_METAG_ELF_H
+
+#define EM_METAG      174
+
+/* Meta relocations */
+#define R_METAG_HIADDR16                 0
+#define R_METAG_LOADDR16                 1
+#define R_METAG_ADDR32                   2
+#define R_METAG_NONE                     3
+#define R_METAG_RELBRANCH                4
+#define R_METAG_GETSETOFF                5
+
+/* Backward compatability */
+#define R_METAG_REG32OP1                 6
+#define R_METAG_REG32OP2                 7
+#define R_METAG_REG32OP3                 8
+#define R_METAG_REG16OP1                 9
+#define R_METAG_REG16OP2                10
+#define R_METAG_REG16OP3                11
+#define R_METAG_REG32OP4                12
+
+#define R_METAG_HIOG                    13
+#define R_METAG_LOOG                    14
+
+/* GNU */
+#define R_METAG_GNU_VTINHERIT           30
+#define R_METAG_GNU_VTENTRY             31
+
+/* PIC relocations */
+#define R_METAG_HI16_GOTOFF             32
+#define R_METAG_LO16_GOTOFF             33
+#define R_METAG_GETSET_GOTOFF           34
+#define R_METAG_GETSET_GOT              35
+#define R_METAG_HI16_GOTPC              36
+#define R_METAG_LO16_GOTPC              37
+#define R_METAG_HI16_PLT                38
+#define R_METAG_LO16_PLT                39
+#define R_METAG_RELBRANCH_PLT           40
+#define R_METAG_GOTOFF                  41
+#define R_METAG_PLT                     42
+#define R_METAG_COPY                    43
+#define R_METAG_JMP_SLOT                44
+#define R_METAG_RELATIVE                45
+#define R_METAG_GLOB_DAT                46
+
+/*
+ * ELF register definitions.
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_gp_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef unsigned long elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_METAG)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_METAG
+
+#define ELF_PLAT_INIT(_r, load_addr)	\
+	do { _r->ctx.AX[0].U0 = 0; } while (0)
+
+#define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         0x08000000UL
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *)&_dest, (char *)_regs, sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM  (NULL)
+
+#define SET_PERSONALITY(ex) \
+	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
+
+#define STACK_RND_MASK (0)
+
+#ifdef CONFIG_METAG_USER_TCM
+
+struct elf32_phdr;
+struct file;
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+			      struct elf32_phdr *eppnt, int prot, int type,
+			      unsigned long total_size);
+
+static inline unsigned long metag_elf_map(struct file *filep,
+					  unsigned long addr,
+					  struct elf32_phdr *eppnt, int prot,
+					  int type, unsigned long total_size)
+{
+	return __metag_elf_map(filep, addr, eppnt, prot, type, total_size);
+}
+#define elf_map metag_elf_map
+
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h
new file mode 100644
index 0000000..3331275
--- /dev/null
+++ b/arch/metag/include/asm/fixmap.h
@@ -0,0 +1,99 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <asm/pgtable.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of the consistent memory region backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+#define FIX_N_COLOURS 8
+#ifdef CONFIG_HIGHMEM
+	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_BEGIN,
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	__end_of_fixed_addresses
+};
+
+#define FIXADDR_TOP     (CONSISTENT_START - PAGE_SIZE)
+#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#define kmap_get_fixmap_pte(vaddr) \
+	pte_offset_kernel( \
+		pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \
+		(vaddr) \
+	)
+
+/*
+ * Called from pgtable_init()
+ */
+extern void fixrange_init(unsigned long start, unsigned long end,
+	pgd_t *pgd_base);
+
+
+#endif
diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h
new file mode 100644
index 0000000..2901f0f
--- /dev/null
+++ b/arch/metag/include/asm/ftrace.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_METAG_FTRACE
+#define _ASM_METAG_FTRACE
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE	8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount_wrapper(void);
+#define MCOUNT_ADDR		((long)(mcount_wrapper))
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+	/* No extra data needed on metag */
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_METAG_FTRACE */
diff --git a/arch/metag/include/asm/global_lock.h b/arch/metag/include/asm/global_lock.h
new file mode 100644
index 0000000..fc831c8
--- /dev/null
+++ b/arch/metag/include/asm/global_lock.h
@@ -0,0 +1,100 @@
+#ifndef __ASM_METAG_GLOBAL_LOCK_H
+#define __ASM_METAG_GLOBAL_LOCK_H
+
+#include <asm/metag_mem.h>
+
+/**
+ * __global_lock1() - Acquire global voluntary lock (LOCK1).
+ * @flags:	Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock (LOCK1), also taking care to disable
+ * all triggers so we cannot be interrupted, and to enforce a compiler barrier
+ * so that the compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to acquire the voluntary or exclusive
+ * locks until the voluntary lock is released with @__global_unlock1, but they
+ * may continue to execute as long as they aren't trying to acquire either of
+ * the locks.
+ */
+#define __global_lock1(flags) do {					\
+	unsigned int __trval;						\
+	asm volatile("MOV	%0,#0\n\t"				\
+		     "SWAP	%0,TXMASKI\n\t"				\
+		     "LOCK1"						\
+		     : "=r" (__trval)					\
+		     :							\
+		     : "memory");					\
+	(flags) = __trval;						\
+} while (0)
+
+/**
+ * __global_unlock1() - Release global voluntary lock (LOCK1).
+ * @flags:	Variable to restore flags from.
+ *
+ * Releases the Meta global voluntary lock (LOCK1) acquired with
+ * @__global_lock1, also taking care to re-enable triggers, and to enforce a
+ * compiler barrier so that the compiler cannot reorder memory accesses across
+ * the unlock.
+ *
+ * This immediately allows another hardware thread to acquire the voluntary or
+ * exclusive locks.
+ */
+#define __global_unlock1(flags) do {					\
+	unsigned int __trval = (flags);					\
+	asm volatile("LOCK0\n\t"					\
+		     "MOV	TXMASKI,%0"				\
+		     :							\
+		     : "r" (__trval)					\
+		     : "memory");					\
+} while (0)
+
+/**
+ * __global_lock2() - Acquire global exclusive lock (LOCK2).
+ * @flags:	Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock and global exclusive lock (LOCK2),
+ * also taking care to disable all triggers so we cannot be interrupted, to take
+ * the atomic lock (system event) and to enforce a compiler barrier so that the
+ * compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to execute code until the locks are
+ * released with @__global_unlock2.
+ */
+#define __global_lock2(flags) do {					\
+	unsigned int __trval;						\
+	unsigned int __aloc_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+	asm volatile("MOV	%0,#0\n\t"				\
+		     "SWAP	%0,TXMASKI\n\t"				\
+		     "LOCK2\n\t"					\
+		     "SETD	[%1+#0x40],D1RtP"			\
+		     : "=r&" (__trval)					\
+		     : "u" (__aloc_hi)					\
+		     : "memory");					\
+	(flags) = __trval;						\
+} while (0)
+
+/**
+ * __global_unlock2() - Release global exclusive lock (LOCK2).
+ * @flags:	Variable to restore flags from.
+ *
+ * Releases the Meta global exclusive lock (LOCK2) and global voluntary lock
+ * acquired with @__global_lock2, also taking care to release the atomic lock
+ * (system event), re-enable triggers, and to enforce a compiler barrier so that
+ * the compiler cannot reorder memory accesses across the unlock.
+ *
+ * This immediately allows other hardware threads to continue executing and one
+ * of them to acquire locks.
+ */
+#define __global_unlock2(flags) do {					\
+	unsigned int __trval = (flags);					\
+	unsigned int __alock_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+	asm volatile("SETD	[%1+#0x00],D1RtP\n\t"			\
+		     "LOCK0\n\t"					\
+		     "MOV	TXMASKI,%0"				\
+		     :							\
+		     : "r" (__trval),					\
+		       "u" (__alock_hi)					\
+		     : "memory");					\
+} while (0)
+
+#endif /* __ASM_METAG_GLOBAL_LOCK_H */
diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h
new file mode 100644
index 0000000..b3799d8
--- /dev/null
+++ b/arch/metag/include/asm/gpio.h
@@ -0,0 +1,4 @@
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
new file mode 100644
index 0000000..6646a15
--- /dev/null
+++ b/arch/metag/include/asm/highmem.h
@@ -0,0 +1,62 @@
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+/*
+ * Ordering is (from lower to higher memory addresses):
+ *
+ * high_memory
+ *			Persistent kmap area
+ * PKMAP_BASE
+ *			fixed_addresses
+ * FIXADDR_START
+ * FIXADDR_TOP
+ *			Vmalloc area
+ * VMALLOC_START
+ * VMALLOC_END
+ */
+#define PKMAP_BASE		(FIXADDR_START - PMD_SIZE)
+#define LAST_PKMAP		PTRS_PER_PTE
+#define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
+#define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define kmap_prot		PAGE_KERNEL
+
+static inline void flush_cache_kmaps(void)
+{
+	flush_cache_all();
+}
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+extern void *kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
+
+extern void kmap_init(void);
+
+/*
+ * The following functions are already defined by <linux/highmem.h>
+ * when CONFIG_HIGHMEM is not set.
+ */
+#ifdef CONFIG_HIGHMEM
+extern void *kmap(struct page *page);
+extern void kunmap(struct page *page);
+extern void *kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
+extern struct page *kmap_atomic_to_page(void *ptr);
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h
new file mode 100644
index 0000000..f545477
--- /dev/null
+++ b/arch/metag/include/asm/hugetlb.h
@@ -0,0 +1,86 @@
+#ifndef _ASM_METAG_HUGETLB_H
+#define _ASM_METAG_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+						unsigned long len);
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_METAG_HUGETLB_H */
diff --git a/arch/metag/include/asm/hwthread.h b/arch/metag/include/asm/hwthread.h
new file mode 100644
index 0000000..8f97866
--- /dev/null
+++ b/arch/metag/include/asm/hwthread.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies
+ */
+#ifndef __METAG_HWTHREAD_H
+#define __METAG_HWTHREAD_H
+
+#include <linux/bug.h>
+#include <linux/io.h>
+
+#include <asm/metag_mem.h>
+
+#define BAD_HWTHREAD_ID		(0xFFU)
+#define BAD_CPU_ID		(0xFFU)
+
+extern u8 cpu_2_hwthread_id[];
+extern u8 hwthread_id_2_cpu[];
+
+/*
+ * Each hardware thread's Control Unit registers are memory-mapped
+ * and can therefore be accessed by any other hardware thread.
+ *
+ * This helper function returns the memory address where "thread"'s
+ * register "regnum" is mapped.
+ */
+static inline
+void __iomem *__CU_addr(unsigned int thread, unsigned int regnum)
+{
+	unsigned int base, thread_offset, thread_regnum;
+
+	WARN_ON(thread == BAD_HWTHREAD_ID);
+
+	base = T0UCTREG0;	/* Control unit base */
+
+	thread_offset = TnUCTRX_STRIDE * thread;
+	thread_regnum = TXUCTREGn_STRIDE * regnum;
+
+	return (void __iomem *)(base + thread_offset + thread_regnum);
+}
+
+#endif /* __METAG_HWTHREAD_H */
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
new file mode 100644
index 0000000..9359e50
--- /dev/null
+++ b/arch/metag/include/asm/io.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_METAG_IO_H
+#define _ASM_METAG_IO_H
+
+#include <linux/types.h>
+
+#define IO_SPACE_LIMIT  0
+
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+/*
+ * Generic I/O
+ */
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 ret;
+	asm volatile("GETB %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 ret;
+	asm volatile("GETW %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 ret;
+	asm volatile("GETD %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 ret;
+	asm volatile("GETL %0,%t0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	asm volatile("SETB [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	asm volatile("SETW [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	asm volatile("SETD [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	asm volatile("SETL [%0],%1,%t1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+/*
+ * The generic io.h can define all the other generic accessors
+ */
+
+#include <asm-generic/io.h>
+
+/*
+ * Despite being a 32bit architecture, Meta can do 64bit memory accesses
+ * (assuming the bus supports it).
+ */
+
+#define readq	__raw_readq
+#define writeq	__raw_writeq
+
+/*
+ * Meta specific I/O for accessing non-MMU areas.
+ *
+ * These can be provided with a physical address rather than an __iomem pointer
+ * and should only be used by core architecture code for accessing fixed core
+ * registers. Generic drivers should use ioremap and the generic I/O accessors.
+ */
+
+#define metag_in8(addr)		__raw_readb((volatile void __iomem *)(addr))
+#define metag_in16(addr)	__raw_readw((volatile void __iomem *)(addr))
+#define metag_in32(addr)	__raw_readl((volatile void __iomem *)(addr))
+#define metag_in64(addr)	__raw_readq((volatile void __iomem *)(addr))
+
+#define metag_out8(b, addr)	__raw_writeb(b, (volatile void __iomem *)(addr))
+#define metag_out16(b, addr)	__raw_writew(b, (volatile void __iomem *)(addr))
+#define metag_out32(b, addr)	__raw_writel(b, (volatile void __iomem *)(addr))
+#define metag_out64(b, addr)	__raw_writeq(b, (volatile void __iomem *)(addr))
+
+/*
+ * io remapping functions
+ */
+
+extern void __iomem *__ioremap(unsigned long offset,
+			       size_t size, unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/**
+ *	ioremap		-	map bus memory into CPU space
+ *	@offset:	bus address of the memory
+ *	@size:		size of the resource to map
+ *
+ *	ioremap performs a platform specific sequence of operations to
+ *	make bus memory CPU accessible via the readb/readw/readl/writeb/
+ *	writew/writel functions and the other mmio helpers. The returned
+ *	address is not guaranteed to be usable directly as a virtual
+ *	address.
+ */
+#define ioremap(offset, size)                   \
+	__ioremap((offset), (size), 0)
+
+#define ioremap_nocache(offset, size)           \
+	__ioremap((offset), (size), 0)
+
+#define ioremap_cached(offset, size)            \
+	__ioremap((offset), (size), _PAGE_CACHEABLE)
+
+#define ioremap_wc(offset, size)                \
+	__ioremap((offset), (size), _PAGE_WR_COMBINE)
+
+#define iounmap(addr)                           \
+	__iounmap(addr)
+
+#endif  /* _ASM_METAG_IO_H */
diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h
new file mode 100644
index 0000000..be0c8f3
--- /dev/null
+++ b/arch/metag/include/asm/irq.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_METAG_IRQ_H
+#define __ASM_METAG_IRQ_H
+
+#ifdef CONFIG_4KSTACKS
+extern void irq_ctx_init(int cpu);
+extern void irq_ctx_exit(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
+#else
+# define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
+#endif
+
+void tbi_startup_interrupt(int);
+void tbi_shutdown_interrupt(int);
+
+struct pt_regs;
+
+int tbisig_map(unsigned int hw);
+extern void do_IRQ(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+int traps_save_context(void);
+int traps_restore_context(void);
+#endif
+
+#include <asm-generic/irq.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void migrate_irqs(void);
+#endif
+
+#endif /* __ASM_METAG_IRQ_H */
diff --git a/arch/metag/include/asm/irqflags.h b/arch/metag/include/asm/irqflags.h
new file mode 100644
index 0000000..339b16f
--- /dev/null
+++ b/arch/metag/include/asm/irqflags.h
@@ -0,0 +1,93 @@
+/*
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/core_reg.h>
+#include <asm/metag_regs.h>
+
+#define INTS_OFF_MASK TXSTATI_BGNDHALT_BIT
+
+#ifdef CONFIG_SMP
+extern unsigned int get_trigger_mask(void);
+#else
+
+extern unsigned int global_trigger_mask;
+
+static inline unsigned int get_trigger_mask(void)
+{
+	return global_trigger_mask;
+}
+#endif
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return __core_reg_get(TXMASKI);
+}
+
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & ~INTS_OFF_MASK) == 0;
+}
+
+static inline int arch_irqs_disabled(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	return arch_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __irqs_disabled(void)
+{
+	/*
+	 * We shouldn't enable exceptions if they are not already
+	 * enabled. This is required for chancalls to work correctly.
+	 */
+	return arch_local_save_flags() & INTS_OFF_MASK;
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = __irqs_disabled();
+
+	asm volatile("SWAP %0,TXMASKI\n" : "=r" (flags) : "0" (flags)
+		     : "memory");
+
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long flags = __irqs_disabled();
+
+	asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+#ifdef CONFIG_SMP
+/* Avoid circular include dependencies through <linux/preempt.h> */
+void arch_local_irq_enable(void);
+#else
+static inline void arch_local_irq_enable(void)
+{
+	arch_local_irq_restore(get_trigger_mask());
+}
+#endif
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/metag/include/asm/l2cache.h b/arch/metag/include/asm/l2cache.h
new file mode 100644
index 0000000..bffbeaa
--- /dev/null
+++ b/arch/metag/include/asm/l2cache.h
@@ -0,0 +1,258 @@
+#ifndef _METAG_L2CACHE_H
+#define _METAG_L2CACHE_H
+
+#ifdef CONFIG_METAG_L2C
+
+#include <asm/global_lock.h>
+#include <asm/io.h>
+
+/*
+ * Store the last known value of pfenable (we don't want prefetch enabled while
+ * L2 is off).
+ */
+extern int l2c_pfenable;
+
+/* defined in arch/metag/drivers/core-sysfs.c */
+extern struct sysdev_class cache_sysclass;
+
+static inline void wr_fence(void);
+
+/*
+ * Functions for reading of L2 cache configuration.
+ */
+
+/* Get raw L2 config register (CORE_CONFIG3) */
+static inline unsigned int meta_l2c_config(void)
+{
+	const unsigned int *corecfg3 = (const unsigned int *)METAC_CORE_CONFIG3;
+	return *corecfg3;
+}
+
+/* Get whether the L2 is present */
+static inline int meta_l2c_is_present(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_HAVE_L2C_BIT;
+}
+
+/* Get whether the L2 is configured for write-back instead of write-through */
+static inline int meta_l2c_is_writeback(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_MODE_BIT;
+}
+
+/* Get whether the L2 is unified instead of separated code/data */
+static inline int meta_l2c_is_unified(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_UNIFIED_BIT;
+}
+
+/* Get the L2 cache size in bytes */
+static inline unsigned int meta_l2c_size(void)
+{
+	unsigned int size_s;
+	if (!meta_l2c_is_present())
+		return 0;
+	size_s = (meta_l2c_config() & METAC_CORECFG3_L2C_SIZE_BITS)
+			>> METAC_CORECFG3_L2C_SIZE_S;
+	/* L2CSIZE is in KiB */
+	return 1024 << size_s;
+}
+
+/* Get the number of ways in the L2 cache */
+static inline unsigned int meta_l2c_ways(void)
+{
+	unsigned int ways_s;
+	if (!meta_l2c_is_present())
+		return 0;
+	ways_s = (meta_l2c_config() & METAC_CORECFG3_L2C_NUM_WAYS_BITS)
+			>> METAC_CORECFG3_L2C_NUM_WAYS_S;
+	return 0x1 << ways_s;
+}
+
+/* Get the line size of the L2 cache */
+static inline unsigned int meta_l2c_linesize(void)
+{
+	unsigned int line_size;
+	if (!meta_l2c_is_present())
+		return 0;
+	line_size = (meta_l2c_config() & METAC_CORECFG3_L2C_LINE_SIZE_BITS)
+			>> METAC_CORECFG3_L2C_LINE_SIZE_S;
+	switch (line_size) {
+	case METAC_CORECFG3_L2C_LINE_SIZE_64B:
+		return 64;
+	default:
+		return 0;
+	}
+}
+
+/* Get the revision ID of the L2 cache */
+static inline unsigned int meta_l2c_revision(void)
+{
+	return (meta_l2c_config() & METAC_CORECFG3_L2C_REV_ID_BITS)
+			>> METAC_CORECFG3_L2C_REV_ID_S;
+}
+
+
+/*
+ * Start an initialisation of the L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section while the L2
+ * is disabled.
+ */
+static inline void _meta_l2c_init(void)
+{
+	metag_out32(SYSC_L2C_INIT_INIT, SYSC_L2C_INIT);
+	while (metag_in32(SYSC_L2C_INIT) == SYSC_L2C_INIT_IN_PROGRESS)
+		/* do nothing */;
+}
+
+/*
+ * Start a writeback of dirty L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section.
+ */
+static inline void _meta_l2c_purge(void)
+{
+	metag_out32(SYSC_L2C_PURGE_PURGE, SYSC_L2C_PURGE);
+	while (metag_in32(SYSC_L2C_PURGE) == SYSC_L2C_PURGE_IN_PROGRESS)
+		/* do nothing */;
+}
+
+/* Set whether the L2 cache is enabled. */
+static inline void _meta_l2c_enable(int enabled)
+{
+	unsigned int enable;
+
+	enable = metag_in32(SYSC_L2C_ENABLE);
+	if (enabled)
+		enable |= SYSC_L2C_ENABLE_ENABLE_BIT;
+	else
+		enable &= ~SYSC_L2C_ENABLE_ENABLE_BIT;
+	metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Set whether the L2 cache prefetch is enabled. */
+static inline void _meta_l2c_pf_enable(int pfenabled)
+{
+	unsigned int enable;
+
+	enable = metag_in32(SYSC_L2C_ENABLE);
+	if (pfenabled)
+		enable |= SYSC_L2C_ENABLE_PFENABLE_BIT;
+	else
+		enable &= ~SYSC_L2C_ENABLE_PFENABLE_BIT;
+	metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Return whether the L2 cache is enabled */
+static inline int _meta_l2c_is_enabled(void)
+{
+	return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_ENABLE_BIT;
+}
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int _meta_l2c_pf_is_enabled(void)
+{
+	return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_PFENABLE_BIT;
+}
+
+
+/* Return whether the L2 cache is enabled */
+static inline int meta_l2c_is_enabled(void)
+{
+	int en;
+
+	/*
+	 * There is no need to lock at the moment, as the enable bit is never
+	 * intermediately changed, so we will never see an intermediate result.
+	 */
+	en = _meta_l2c_is_enabled();
+
+	return en;
+}
+
+/*
+ * Ensure the L2 cache is disabled.
+ * Return whether the L2 was previously disabled.
+ */
+int meta_l2c_disable(void);
+
+/*
+ * Ensure the L2 cache is enabled.
+ * Return whether the L2 was previously enabled.
+ */
+int meta_l2c_enable(void);
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int meta_l2c_pf_is_enabled(void)
+{
+	return l2c_pfenable;
+}
+
+/*
+ * Set whether the L2 cache prefetch is enabled.
+ * Return whether the L2 prefetch was previously enabled.
+ */
+int meta_l2c_pf_enable(int pfenable);
+
+/*
+ * Flush the L2 cache.
+ * Return 1 if the L2 is disabled.
+ */
+int meta_l2c_flush(void);
+
+/*
+ * Write back all dirty cache lines in the L2 cache.
+ * Return 1 if the L2 is disabled or there isn't any writeback.
+ */
+static inline int meta_l2c_writeback(void)
+{
+	unsigned long flags;
+	int en;
+
+	/* no need to purge if it's not a writeback cache */
+	if (!meta_l2c_is_writeback())
+		return 1;
+
+	/*
+	 * Purge only works if the L2 is enabled, and involves reading back to
+	 * detect completion, so keep this operation atomic with other threads.
+	 */
+	__global_lock1(flags);
+	en = meta_l2c_is_enabled();
+	if (likely(en)) {
+		wr_fence();
+		_meta_l2c_purge();
+	}
+	__global_unlock1(flags);
+
+	return !en;
+}
+
+#else /* CONFIG_METAG_L2C */
+
+#define meta_l2c_config()		0
+#define meta_l2c_is_present()		0
+#define meta_l2c_is_writeback()		0
+#define meta_l2c_is_unified()		0
+#define meta_l2c_size()			0
+#define meta_l2c_ways()			0
+#define meta_l2c_linesize()		0
+#define meta_l2c_revision()		0
+
+#define meta_l2c_is_enabled()		0
+#define _meta_l2c_pf_is_enabled()	0
+#define meta_l2c_pf_is_enabled()	0
+#define meta_l2c_disable()		1
+#define meta_l2c_enable()		0
+#define meta_l2c_pf_enable(X)		0
+static inline int meta_l2c_flush(void)
+{
+	return 1;
+}
+static inline int meta_l2c_writeback(void)
+{
+	return 1;
+}
+
+#endif /* CONFIG_METAG_L2C */
+
+#endif /* _METAG_L2CACHE_H */
diff --git a/arch/metag/include/asm/linkage.h b/arch/metag/include/asm/linkage.h
new file mode 100644
index 0000000..73bf25b
--- /dev/null
+++ b/arch/metag/include/asm/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .p2align 2
+#define __ALIGN_STR ".p2align 2"
+
+#endif
diff --git a/arch/metag/include/asm/mach/arch.h b/arch/metag/include/asm/mach/arch.h
new file mode 100644
index 0000000..12c5664
--- /dev/null
+++ b/arch/metag/include/asm/mach/arch.h
@@ -0,0 +1,86 @@
+/*
+ * arch/metag/include/asm/mach/arch.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * based on the ARM version:
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_MACH_ARCH_H_
+#define _METAG_MACH_ARCH_H_
+
+#include <linux/stddef.h>
+
+#include <asm/clock.h>
+
+/**
+ * struct machine_desc - Describes a board controlled by a Meta.
+ * @name:		Board/SoC name.
+ * @dt_compat:		Array of device tree 'compatible' strings.
+ * @clocks:		Clock callbacks.
+ *
+ * @nr_irqs:		Maximum number of IRQs.
+ *			If 0, defaults to NR_IRQS in asm-generic/irq.h.
+ *
+ * @init_early:		Early init callback.
+ * @init_irq:		IRQ init callback for setting up IRQ controllers.
+ * @init_machine:	Arch init callback for setting up devices.
+ * @init_late:		Late init callback.
+ *
+ * This structure is provided by each board which can be controlled by a Meta.
+ * It is chosen by matching the compatible strings in the device tree provided
+ * by the bootloader with the strings in @dt_compat, and sets up any aspects of
+ * the machine that aren't configured with device tree (yet).
+ */
+struct machine_desc {
+	const char		*name;
+	const char		**dt_compat;
+	struct meta_clock_desc	*clocks;
+
+	unsigned int		nr_irqs;
+
+	void			(*init_early)(void);
+	void			(*init_irq)(void);
+	void			(*init_machine)(void);
+	void			(*init_late)(void);
+};
+
+/*
+ * Current machine - only accessible during boot.
+ */
+extern struct machine_desc *machine_desc;
+
+/*
+ * Machine type table - also only accessible during boot
+ */
+extern struct machine_desc __arch_info_begin[], __arch_info_end[];
+#define for_each_machine_desc(p)			\
+	for (p = __arch_info_begin; p < __arch_info_end; p++)
+
+static inline struct machine_desc *default_machine_desc(void)
+{
+	/* the default machine is the last one linked in */
+	if (__arch_info_end - 1 < __arch_info_begin)
+		return NULL;
+	return __arch_info_end - 1;
+}
+
+/*
+ * Set of macros to define architecture features.  This is built into
+ * a table by the linker.
+ */
+#define MACHINE_START(_type, _name)			\
+static const struct machine_desc __mach_desc_##_type	\
+__used							\
+__attribute__((__section__(".arch.info.init"))) = {	\
+	.name		= _name,
+
+#define MACHINE_END				\
+};
+
+#endif /* _METAG_MACH_ARCH_H_ */
diff --git a/arch/metag/include/asm/metag_isa.h b/arch/metag/include/asm/metag_isa.h
new file mode 100644
index 0000000..c8aa2ae
--- /dev/null
+++ b/arch/metag/include/asm/metag_isa.h
@@ -0,0 +1,81 @@
+/*
+ * asm/metag_isa.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta instruction set.
+ */
+
+#ifndef _ASM_METAG_ISA_H_
+#define _ASM_METAG_ISA_H_
+
+
+/* L1 cache layout */
+
+/* Data cache line size as bytes and shift */
+#define DCACHE_LINE_BYTES 64
+#define DCACHE_LINE_S     6
+
+/* Number of ways in the data cache */
+#define DCACHE_WAYS       4
+
+/* Instruction cache line size as bytes and shift */
+#define ICACHE_LINE_BYTES 64
+#define ICACHE_LINE_S     6
+
+/* Number of ways in the instruction cache */
+#define ICACHE_WAYS       4
+
+
+/*
+ * CACHEWD/CACHEWL instructions use the bottom 8 bits of the data presented to
+ * control the operation actually achieved.
+ */
+/* Use of these two bits should be discouraged since the bits dont have
+ * consistent meanings
+ */
+#define CACHEW_ICACHE_BIT           0x01
+#define CACHEW_TLBFLUSH_BIT         0x02
+
+#define CACHEW_FLUSH_L1D_L2         0x0
+#define CACHEW_INVALIDATE_L1I       0x1
+#define CACHEW_INVALIDATE_L1DTLB    0x2
+#define CACHEW_INVALIDATE_L1ITLB    0x3
+#define CACHEW_WRITEBACK_L1D_L2     0x4
+#define CACHEW_INVALIDATE_L1D       0x8
+#define CACHEW_INVALIDATE_L1D_L2    0xC
+
+/*
+ * CACHERD/CACHERL instructions use bits 3:5 of the address presented to
+ * control the operation achieved and hence the specific result.
+ */
+#define CACHER_ADDR_BITS            0xFFFFFFC0
+#define CACHER_OPER_BITS            0x00000030
+#define CACHER_OPER_S               4
+#define     CACHER_OPER_LINPHY          0
+#define CACHER_ICACHE_BIT           0x00000008
+#define CACHER_ICACHE_S             3
+
+/*
+ * CACHERD/CACHERL LINPHY Oper result is one/two 32-bit words
+ *
+ *  If CRLINPHY0_VAL_BIT (Bit 0) set then,
+ *      Lower 32-bits corresponds to MMCU_ENTRY_* above.
+ *      Upper 32-bits corresponds to CRLINPHY1_* values below (if requested).
+ *  else
+ *      Lower 32-bits corresponds to CRLINPHY0_* values below.
+ *      Upper 32-bits undefined.
+ */
+#define CRLINPHY0_VAL_BIT      0x00000001
+#define CRLINPHY0_FIRST_BIT    0x00000004 /* Set if VAL=0 due to first level */
+
+#define CRLINPHY1_READ_BIT     0x00000001 /* Set if reads permitted          */
+#define CRLINPHY1_SINGLE_BIT   0x00000004 /* Set if TLB does not cache entry */
+#define CRLINPHY1_PAGEMSK_BITS 0x0000FFF0 /* Set to ((2^n-1)>>12) value      */
+#define CRLINPHY1_PAGEMSK_S    4
+
+#endif /* _ASM_METAG_ISA_H_ */
diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h
new file mode 100644
index 0000000..3f7b54d
--- /dev/null
+++ b/arch/metag/include/asm/metag_mem.h
@@ -0,0 +1,1106 @@
+/*
+ * asm/metag_mem.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta (memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_MEM_H_
+#define _ASM_METAG_MEM_H_
+
+/*****************************************************************************
+ *                   META MEMORY MAP LINEAR ADDRESS VALUES
+ ****************************************************************************/
+/*
+ * COMMON MEMORY MAP
+ * -----------------
+ */
+
+#define LINSYSTEM_BASE  0x00200000
+#define LINSYSTEM_LIMIT 0x07FFFFFF
+
+/* Linear cache flush now implemented via DCACHE instruction. These defines
+   related to a special region that used to exist for achieving cache flushes.
+ */
+#define         LINSYSLFLUSH_S 0
+
+#define     LINSYSRES0_BASE     0x00200000
+#define     LINSYSRES0_LIMIT    0x01FFFFFF
+
+#define     LINSYSCUSTOM_BASE 0x02000000
+#define     LINSYSCUSTOM_LIMIT   0x02FFFFFF
+
+#define     LINSYSEXPAND_BASE 0x03000000
+#define     LINSYSEXPAND_LIMIT   0x03FFFFFF
+
+#define     LINSYSEVENT_BASE  0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_UNLOCK    0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_LOCK      0x04000040
+#define         LINSYSEVENT_WR_CACHE_DISABLE    0x04000080
+#define         LINSYSEVENT_WR_CACHE_ENABLE     0x040000C0
+#define         LINSYSEVENT_WR_COMBINE_FLUSH    0x04000100
+#define         LINSYSEVENT_WR_FENCE            0x04000140
+#define     LINSYSEVENT_LIMIT   0x04000FFF
+
+#define     LINSYSCFLUSH_BASE   0x04400000
+#define         LINSYSCFLUSH_DCACHE_LINE    0x04400000
+#define         LINSYSCFLUSH_ICACHE_LINE    0x04500000
+#define         LINSYSCFLUSH_MMCU           0x04700000
+#ifndef METAC_1_2
+#define         LINSYSCFLUSH_TxMMCU_BASE    0x04700020
+#define         LINSYSCFLUSH_TxMMCU_STRIDE  0x00000008
+#endif
+#define         LINSYSCFLUSH_ADDR_BITS      0x000FFFFF
+#define         LINSYSCFLUSH_ADDR_S         0
+#define     LINSYSCFLUSH_LIMIT  0x047FFFFF
+
+#define     LINSYSCTRL_BASE     0x04800000
+#define     LINSYSCTRL_LIMIT    0x04FFFFFF
+
+#define     LINSYSMTABLE_BASE   0x05000000
+#define     LINSYSMTABLE_LIMIT  0x05FFFFFF
+
+#define     LINSYSDIRECT_BASE   0x06000000
+#define     LINSYSDIRECT_LIMIT  0x07FFFFFF
+
+#define LINLOCAL_BASE   0x08000000
+#define LINLOCAL_LIMIT  0x7FFFFFFF
+
+#define LINCORE_BASE    0x80000000
+#define LINCORE_LIMIT   0x87FFFFFF
+
+#define LINCORE_CODE_BASE  0x80000000
+#define LINCORE_CODE_LIMIT 0x81FFFFFF
+
+#define LINCORE_DATA_BASE  0x82000000
+#define LINCORE_DATA_LIMIT 0x83FFFFFF
+
+
+/* The core can support locked icache lines in this region */
+#define LINCORE_ICACHE_BASE  0x84000000
+#define LINCORE_ICACHE_LIMIT 0x85FFFFFF
+
+/* The core can support locked dcache lines in this region */
+#define LINCORE_DCACHE_BASE  0x86000000
+#define LINCORE_DCACHE_LIMIT 0x87FFFFFF
+
+#define LINGLOBAL_BASE  0x88000000
+#define LINGLOBAL_LIMIT 0xFFFDFFFF
+
+/*
+ * CHIP Core Register Map
+ * ----------------------
+ */
+#define CORE_HWBASE     0x04800000
+#define PRIV_HWBASE     0x04810000
+#define TRIG_HWBASE     0x04820000
+#define SYSC_HWBASE     0x04830000
+
+/*****************************************************************************
+ *         INTER-THREAD KICK REGISTERS FOR SOFTWARE EVENT GENERATION
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that can be used to supply
+ * kicks to threads that service arbitrary software events.
+ */
+
+#define T0KICK     0x04800800   /* Background kick 0     */
+#define     TXXKICK_MAX 0xFFFF  /* Maximum kicks */
+#define     TnXKICK_STRIDE      0x00001000  /* Thread scale value    */
+#define     TnXKICK_STRIDE_S    12
+#define T0KICKI    0x04800808   /* Interrupt kick 0      */
+#define     TXIKICK_OFFSET  0x00000008  /* Int level offset value */
+#define T1KICK     0x04801800   /* Background kick 1     */
+#define T1KICKI    0x04801808   /* Interrupt kick 1      */
+#define T2KICK     0x04802800   /* Background kick 2     */
+#define T2KICKI    0x04802808   /* Interrupt kick 2      */
+#define T3KICK     0x04803800   /* Background kick 3     */
+#define T3KICKI    0x04803808   /* Interrupt kick 3      */
+
+/*****************************************************************************
+ *                GLOBAL REGISTER ACCESS RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that allow access to the
+ * internal state of all threads in order to allow global set-up of thread
+ * state and external handling of thread events, errors, or debugging.
+ *
+ * The actual unit and register index values needed to access individul
+ * registers are chip specific see - METAC_TXUXX_VALUES in metac_x_y.h.
+ * However two C array initialisers TXUXX_MASKS and TGUXX_MASKS will always be
+ * defined to allow arbitrary loading, display, and saving of all valid
+ * register states without detailed knowledge of their purpose - TXUXX sets
+ * bits for all valid registers and TGUXX sets bits for the sub-set which are
+ * global.
+ */
+
+#define T0UCTREG0   0x04800000  /* Access to all CT regs */
+#define TnUCTRX_STRIDE      0x00001000  /* Thread scale value    */
+#define TXUCTREGn_STRIDE    0x00000008  /* Register scale value  */
+
+#define TXUXXRXDT  0x0480FFF0   /* Data to/from any threads reg */
+#define TXUXXRXRQ  0x0480FFF8
+#define     TXUXXRXRQ_DREADY_BIT 0x80000000  /* Poll for done */
+#define     TXUXXRXRQ_DSPEXT_BIT 0x00020000  /* Addr DSP Regs */
+#define     TXUXXRXRQ_RDnWR_BIT  0x00010000  /* Set for read  */
+#define     TXUXXRXRQ_TX_BITS    0x00003000  /* Thread number */
+#define     TXUXXRXRQ_TX_S       12
+#define     TXUXXRXRQ_RX_BITS    0x000001F0  /* Register num  */
+#define     TXUXXRXRQ_RX_S       4
+#define         TXUXXRXRQ_DSPRARD0    0      /* DSP RAM A Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRARD1    1      /* DSP RAM A Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRAWR0    2      /* DSP RAM A Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRAWR2    3      /* DSP RAM A Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRBRD0    4      /* DSP RAM B Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRBRD1    5      /* DSP RAM B Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRBWR0    6      /* DSP RAM B Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRBWR1    7      /* DSP RAM B Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRARINC0  8      /* DSP RAM A Read Increment 0 */
+#define         TXUXXRXRQ_DSPRARINC1  9      /* DSP RAM A Read Increment 1 */
+#define         TXUXXRXRQ_DSPRAWINC0 10      /* DSP RAM A Write Increment 0 */
+#define         TXUXXRXRQ_DSPRAWINC1 11      /* DSP RAM A Write Increment 1 */
+#define         TXUXXRXRQ_DSPRBRINC0 12      /* DSP RAM B Read Increment 0 */
+#define         TXUXXRXRQ_DSPRBRINC1 13      /* DSP RAM B Read Increment 1 */
+#define         TXUXXRXRQ_DSPRBWINC0 14      /* DSP RAM B Write Increment 0 */
+#define         TXUXXRXRQ_DSPRBWINC1 15      /* DSP RAM B Write Increment 1 */
+
+#define         TXUXXRXRQ_ACC0L0     16      /* Accumulator 0 bottom 32-bits */
+#define         TXUXXRXRQ_ACC1L0     17      /* Accumulator 1 bottom 32-bits */
+#define         TXUXXRXRQ_ACC2L0     18      /* Accumulator 2 bottom 32-bits */
+#define         TXUXXRXRQ_ACC3L0     19      /* Accumulator 3 bottom 32-bits */
+#define         TXUXXRXRQ_ACC0HI     20      /* Accumulator 0 top 8-bits */
+#define         TXUXXRXRQ_ACC1HI     21      /* Accumulator 1 top 8-bits */
+#define         TXUXXRXRQ_ACC2HI     22      /* Accumulator 2 top 8-bits */
+#define         TXUXXRXRQ_ACC3HI     23      /* Accumulator 3 top 8-bits */
+#define     TXUXXRXRQ_UXX_BITS   0x0000000F  /* Unit number   */
+#define     TXUXXRXRQ_UXX_S      0
+
+/*****************************************************************************
+ *          PRIVILEGE CONTROL VALUES FOR MEMORY MAPPED RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the privilege required to access other memory mapped resources. These
+ * registers themselves always require privilege to update them.
+ */
+
+#define TXPRIVREG_STRIDE    0x8 /* Delta between per-thread regs */
+#define TXPRIVREG_STRIDE_S  3
+
+/*
+ * Each bit 0 to 15 defines privilege required to access internal register
+ * regions 0x04800000 to 0x048FFFFF in 64k chunks
+ */
+#define T0PIOREG    0x04810100
+#define T1PIOREG    0x04810108
+#define T2PIOREG    0x04810110
+#define T3PIOREG    0x04810118
+
+/*
+ * Each bit 0 to 31 defines privilege required to use the pair of
+ * system events implemented as writee in the regions 0x04000000 to
+ * 0x04000FFF in 2*64 byte chunks.
+ */
+#define T0PSYREG    0x04810180
+#define T1PSYREG    0x04810188
+#define T2PSYREG    0x04810190
+#define T3PSYREG    0x04810198
+
+/*
+ * CHIP PRIV CONTROLS
+ * ------------------
+ */
+
+/* The TXPIOREG register holds a bit mask directly mappable to
+   corresponding addresses in the range 0x04800000 to 049FFFFF */
+#define     TXPIOREG_ADDR_BITS  0x1F0000 /* Up to 32x64K bytes */
+#define     TXPIOREG_ADDR_S     16
+
+/* Hence based on the _HWBASE values ... */
+#define     TXPIOREG_CORE_BIT       (1<<((0x04800000>>16)&0x1F))
+#define     TXPIOREG_PRIV_BIT       (1<<((0x04810000>>16)&0x1F))
+#define     TXPIOREG_TRIG_BIT       (1<<((0x04820000>>16)&0x1F))
+#define     TXPIOREG_SYSC_BIT       (1<<((0x04830000>>16)&0x1F))
+
+#define     TXPIOREG_WRC_BIT          0x00080000  /* Wr combiner reg priv */
+#define     TXPIOREG_LOCALBUS_RW_BIT  0x00040000  /* Local bus rd/wr priv */
+#define     TXPIOREG_SYSREGBUS_RD_BIT 0x00020000  /* Sys reg bus write priv */
+#define     TXPIOREG_SYSREGBUS_WR_BIT 0x00010000  /* Sys reg bus read priv */
+
+/* CORE region privilege controls */
+#define T0PRIVCORE 0x04800828
+#define         TXPRIVCORE_TXBKICK_BIT   0x001  /* Background kick priv */
+#define         TXPRIVCORE_TXIKICK_BIT   0x002  /* Interrupt kick priv  */
+#define         TXPRIVCORE_TXAMAREGX_BIT 0x004  /* TXAMAREG4|5|6 priv   */
+#define TnPRIVCORE_STRIDE 0x00001000
+
+#define T0PRIVSYSR 0x04810000
+#define     TnPRIVSYSR_STRIDE   0x00000008
+#define     TnPRIVSYSR_STRIDE_S 3
+#define     TXPRIVSYSR_CFLUSH_BIT     0x01
+#define     TXPRIVSYSR_MTABLE_BIT     0x02
+#define     TXPRIVSYSR_DIRECT_BIT     0x04
+#ifdef METAC_1_2
+#define     TXPRIVSYSR_ALL_BITS       0x07
+#else
+#define     TXPRIVSYSR_CORE_BIT       0x08
+#define     TXPRIVSYSR_CORECODE_BIT   0x10
+#define     TXPRIVSYSR_ALL_BITS       0x1F
+#endif
+#define T1PRIVSYSR 0x04810008
+#define T2PRIVSYSR 0x04810010
+#define T3PRIVSYSR 0x04810018
+
+/*****************************************************************************
+ *          H/W TRIGGER STATE/LEVEL REGISTERS AND H/W TRIGGER VECTORS
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the state of hardware trigger sources both external to the META processor
+ * and internal to it.
+ */
+
+#define HWSTATMETA  0x04820000  /* Hardware status/clear META trig */
+#define         HWSTATMETA_T0HALT_BITS 0xF
+#define         HWSTATMETA_T0HALT_S    0
+#define     HWSTATMETA_T0BHALT_BIT 0x1  /* Background HALT */
+#define     HWSTATMETA_T0IHALT_BIT 0x2  /* Interrupt HALT  */
+#define     HWSTATMETA_T0PHALT_BIT 0x4  /* PF/RO Memory HALT */
+#define     HWSTATMETA_T0AMATR_BIT 0x8  /* AMA trigger */
+#define     HWSTATMETA_TnINT_S     4    /* Shift by (thread*4) */
+#define HWSTATEXT   0x04820010  /* H/W status/clear external trigs  0-31 */
+#define HWSTATEXT2  0x04820018  /* H/W status/clear external trigs 32-63 */
+#define HWSTATEXT4  0x04820020  /* H/W status/clear external trigs 64-95 */
+#define HWSTATEXT6  0x04820028  /* H/W status/clear external trigs 96-128 */
+#define HWLEVELEXT  0x04820030  /* Edge/Level type of external trigs  0-31 */
+#define HWLEVELEXT2 0x04820038  /* Edge/Level type of external trigs 32-63 */
+#define HWLEVELEXT4 0x04820040  /* Edge/Level type of external trigs 64-95 */
+#define HWLEVELEXT6 0x04820048  /* Edge/Level type of external trigs 96-128 */
+#define     HWLEVELEXT_XXX_LEVEL 1  /* Level sense logic in HWSTATEXTn */
+#define     HWLEVELEXT_XXX_EDGE  0
+#define HWMASKEXT   0x04820050  /* Enable/disable of external trigs  0-31 */
+#define HWMASKEXT2  0x04820058  /* Enable/disable of external trigs 32-63 */
+#define HWMASKEXT4  0x04820060  /* Enable/disable of external trigs 64-95 */
+#define HWMASKEXT6  0x04820068  /* Enable/disable of external trigs 96-128 */
+#define T0VECINT_BHALT  0x04820500  /* Background HALT trigger vector */
+#define     TXVECXXX_BITS   0xF       /* Per-trigger vector vals 0,1,4-15 */
+#define     TXVECXXX_S  0
+#define T0VECINT_IHALT  0x04820508  /* Interrupt HALT */
+#define T0VECINT_PHALT  0x04820510  /* PF/RO memory fault */
+#define T0VECINT_AMATR  0x04820518  /* AMA trigger */
+#define     TnVECINT_STRIDE 0x00000020  /* Per thread stride */
+#define HWVEC0EXT   0x04820700  /* Vectors for external triggers  0-31 */
+#define HWVEC20EXT  0x04821700  /* Vectors for external triggers 32-63 */
+#define HWVEC40EXT  0x04822700  /* Vectors for external triggers 64-95 */
+#define HWVEC60EXT  0x04823700  /* Vectors for external triggers 96-127 */
+#define     HWVECnEXT_STRIDE 0x00000008 /* Per trigger stride */
+#define HWVECnEXT_DEBUG 0x1         /* Redirect trigger to debug i/f */
+
+/*
+ * CORE HWCODE-BREAKPOINT REGISTERS/VALUES
+ * ---------------------------------------
+ */
+#define CODEB0ADDR         0x0480FF00  /* Address specifier */
+#define     CODEBXADDR_MATCHX_BITS 0xFFFFFFFC
+#define     CODEBXADDR_MATCHX_S    2
+#define CODEB0CTRL         0x0480FF08  /* Control */
+#define     CODEBXCTRL_MATEN_BIT   0x80000000   /* Match 'Enable'  */
+#define     CODEBXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     CODEBXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     CODEBXCTRL_HITC_S      16
+#define           CODEBXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           CODEBXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     CODEBXCTRL_MMASK_BITS  0x0000FFFC   /* Mask ADDR_MATCH bits */
+#define     CODEBXCTRL_MMASK_S     2
+#define     CODEBXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     CODEBXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define CODEBnXXXX_STRIDE      0x00000010  /* Stride between CODEB reg sets */
+#define CODEBnXXXX_STRIDE_S    4
+#define CODEBnXXXX_LIMIT       3           /* Sets 0-3 */
+
+/*
+ * CORE DATA-WATCHPOINT REGISTERS/VALUES
+ * -------------------------------------
+ */
+#define DATAW0ADDR         0x0480FF40  /* Address specifier */
+#define     DATAWXADDR_MATCHR_BITS 0xFFFFFFF8
+#define     DATAWXADDR_MATCHR_S    3
+#define     DATAWXADDR_MATCHW_BITS 0xFFFFFFFF
+#define     DATAWXADDR_MATCHW_S    0
+#define DATAW0CTRL         0x0480FF48  /* Control */
+#define     DATAWXCTRL_MATRD_BIT   0x80000000   /* Match 'Read'  */
+#ifndef METAC_1_2
+#define     DATAWXCTRL_MATNOTTX_BIT 0x20000000  /* Invert threadn enable */
+#endif
+#define     DATAWXCTRL_MATWR_BIT   0x40000000   /* Match 'Write' */
+#define     DATAWXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     DATAWXCTRL_WRSIZE_BITS 0x0F000000   /* Write Match Size */
+#define     DATAWXCTRL_WRSIZE_S    24
+#define         DATAWWRSIZE_ANY   0         /* Any size transaction matches */
+#define         DATAWWRSIZE_8BIT  1     /* Specific sizes ... */
+#define         DATAWWRSIZE_16BIT 2
+#define         DATAWWRSIZE_32BIT 3
+#define         DATAWWRSIZE_64BIT 4
+#define     DATAWXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     DATAWXCTRL_HITC_S      16
+#define           DATAWXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           DATAWXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     DATAWXCTRL_MMASK_BITS 0x0000FFF8    /* Mask ADDR_MATCH bits */
+#define     DATAWXCTRL_MMASK_S    3
+#define     DATAWXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     DATAWXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define DATAW0DMATCH0       0x0480FF50 /* Write match data */
+#define DATAW0DMATCH1       0x0480FF58
+#define DATAW0DMASK0        0x0480FF60 /* Write match data mask */
+#define DATAW0DMASK1        0x0480FF68
+#define DATAWnXXXX_STRIDE      0x00000040  /* Stride between DATAW reg sets */
+#define DATAWnXXXX_STRIDE_S    6
+#define DATAWnXXXX_LIMIT       1           /* Sets 0,1 */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CORE memory mapped AMA registers */
+#define T0AMAREG4   0x04800810
+#define     TXAMAREG4_POOLSIZE_BITS 0x3FFFFF00
+#define     TXAMAREG4_POOLSIZE_S    8
+#define     TXAMAREG4_AVALUE_BITS   0x000000FF
+#define     TXAMAREG4_AVALUE_S  0
+#define T0AMAREG5   0x04800818
+#define     TXAMAREG5_POOLC_BITS    0x07FFFFFF
+#define         TXAMAREG5_POOLC_S       0
+#define T0AMAREG6   0x04800820
+#define     TXAMAREG6_DLINEDEF_BITS 0x00FFFFF0
+#define         TXAMAREG6_DLINEDEF_S    0
+#define TnAMAREGX_STRIDE    0x00001000
+
+/*
+ * Memory Management Control Unit Table Entries
+ * --------------------------------------------
+ */
+#define MMCU_ENTRY_S         4            /* -> Entry size                */
+#define MMCU_ENTRY_ADDR_BITS 0xFFFFF000   /* Physical address             */
+#define MMCU_ENTRY_ADDR_S    12           /* -> Page size                 */
+#define MMCU_ENTRY_CWIN_BITS 0x000000C0   /* Caching 'window' selection   */
+#define MMCU_ENTRY_CWIN_S    6
+#define     MMCU_CWIN_UNCACHED  0 /* May not be memory etc.  */
+#define     MMCU_CWIN_BURST     1 /* Cached but LRU unset */
+#define     MMCU_CWIN_C1SET     2 /* Cached in 1 set only */
+#define     MMCU_CWIN_CACHED    3 /* Fully cached            */
+#define MMCU_ENTRY_CACHE_BIT 0x00000080   /* Set for cached region         */
+#define     MMCU_ECACHE1_FULL_BIT  0x00000040 /* Use all the sets */
+#define     MMCU_ECACHE0_BURST_BIT 0x00000040 /* Match bursts     */
+#define MMCU_ENTRY_SYS_BIT   0x00000010   /* Sys-coherent access required  */
+#define MMCU_ENTRY_WRC_BIT   0x00000008   /* Write combining allowed       */
+#define MMCU_ENTRY_PRIV_BIT  0x00000004   /* Privilege required            */
+#define MMCU_ENTRY_WR_BIT    0x00000002   /* Writes allowed                */
+#define MMCU_ENTRY_VAL_BIT   0x00000001   /* Entry is valid                */
+
+#ifdef METAC_2_1
+/*
+ * Extended first-level/top table entries have extra/larger fields in later
+ * cores as bits 11:0 previously had no effect in such table entries.
+ */
+#define MMCU_E1ENT_ADDR_BITS 0xFFFFFFC0   /* Physical address             */
+#define MMCU_E1ENT_ADDR_S    6            /*   -> resolution < page size  */
+#define MMCU_E1ENT_PGSZ_BITS 0x0000001E   /* Page size for 2nd level      */
+#define MMCU_E1ENT_PGSZ_S    1
+#define     MMCU_E1ENT_PGSZ0_POWER   12   /* PgSz  0 -> 4K */
+#define     MMCU_E1ENT_PGSZ_MAX      10   /* PgSz 10 -> 4M maximum */
+#define MMCU_E1ENT_MINIM_BIT 0x00000020
+#endif /* METAC_2_1 */
+
+/* MMCU control register in SYSC region */
+#define MMCU_TABLE_PHYS_ADDR        0x04830010
+#define     MMCU_TABLE_PHYS_ADDR_BITS   0xFFFFFFFC
+#ifdef METAC_2_1
+#define     MMCU_TABLE_PHYS_EXTEND      0x00000001     /* See below */
+#endif
+#define MMCU_DCACHE_CTRL_ADDR       0x04830018
+#define     MMCU_xCACHE_CTRL_ENABLE_BIT     0x00000001
+#define     MMCU_xCACHE_CTRL_PARTITION_BIT  0x00000000 /* See xCPART below */
+#define MMCU_ICACHE_CTRL_ADDR       0x04830020
+
+#ifdef METAC_2_1
+
+/*
+ * Allow direct access to physical memory used to implement MMU table.
+ *
+ * Each is based on a corresponding MMCU_TnLOCAL_TABLE_PHYSn or similar
+ *    MMCU_TnGLOBAL_TABLE_PHYSn register pair (see next).
+ */
+#define LINSYSMEMT0L_BASE   0x05000000
+#define LINSYSMEMT0L_LIMIT  0x051FFFFF
+#define     LINSYSMEMTnX_STRIDE     0x00200000  /*  2MB Local per thread */
+#define     LINSYSMEMTnX_STRIDE_S   21
+#define     LINSYSMEMTXG_OFFSET     0x00800000  /* +2MB Global per thread */
+#define     LINSYSMEMTXG_OFFSET_S   23
+#define LINSYSMEMT1L_BASE   0x05200000
+#define LINSYSMEMT1L_LIMIT  0x053FFFFF
+#define LINSYSMEMT2L_BASE   0x05400000
+#define LINSYSMEMT2L_LIMIT  0x055FFFFF
+#define LINSYSMEMT3L_BASE   0x05600000
+#define LINSYSMEMT3L_LIMIT  0x057FFFFF
+#define LINSYSMEMT0G_BASE   0x05800000
+#define LINSYSMEMT0G_LIMIT  0x059FFFFF
+#define LINSYSMEMT1G_BASE   0x05A00000
+#define LINSYSMEMT1G_LIMIT  0x05BFFFFF
+#define LINSYSMEMT2G_BASE   0x05C00000
+#define LINSYSMEMT2G_LIMIT  0x05DFFFFF
+#define LINSYSMEMT3G_BASE   0x05E00000
+#define LINSYSMEMT3G_LIMIT  0x05FFFFFF
+
+/*
+ * Extended MMU table functionality allows a sparse or flat table to be
+ * described much more efficiently than before.
+ */
+#define MMCU_T0LOCAL_TABLE_PHYS0    0x04830700
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE    0x20   /* Offset per thread */
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE_S  5
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET    0x10   /* Global versus local */
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET_S  4
+#define     MMCU_TBLPHYS0_DCCTRL_BITS       0x000000DF  /* DC controls  */
+#define     MMCU_TBLPHYS0_ENTLB_BIT         0x00000020  /* Cache in TLB */
+#define     MMCU_TBLPHYS0_TBLSZ_BITS        0x00000F00  /* Area supported */
+#define     MMCU_TBLPHYS0_TBLSZ_S           8
+#define         MMCU_TBLPHYS0_TBLSZ0_POWER      22  /* 0 -> 4M */
+#define         MMCU_TBLPHYS0_TBLSZ_MAX         9   /* 9 -> 2G */
+#define     MMCU_TBLPHYS0_LINBASE_BITS      0xFFC00000  /* Linear base */
+#define     MMCU_TBLPHYS0_LINBASE_S         22
+
+#define MMCU_T0LOCAL_TABLE_PHYS1    0x04830708
+#define     MMCU_TBLPHYS1_ADDR_BITS         0xFFFFFFFC  /* Physical base */
+#define     MMCU_TBLPHYS1_ADDR_S            2
+
+#define MMCU_T0GLOBAL_TABLE_PHYS0   0x04830710
+#define MMCU_T0GLOBAL_TABLE_PHYS1   0x04830718
+#define MMCU_T1LOCAL_TABLE_PHYS0    0x04830720
+#define MMCU_T1LOCAL_TABLE_PHYS1    0x04830728
+#define MMCU_T1GLOBAL_TABLE_PHYS0   0x04830730
+#define MMCU_T1GLOBAL_TABLE_PHYS1   0x04830738
+#define MMCU_T2LOCAL_TABLE_PHYS0    0x04830740
+#define MMCU_T2LOCAL_TABLE_PHYS1    0x04830748
+#define MMCU_T2GLOBAL_TABLE_PHYS0   0x04830750
+#define MMCU_T2GLOBAL_TABLE_PHYS1   0x04830758
+#define MMCU_T3LOCAL_TABLE_PHYS0    0x04830760
+#define MMCU_T3LOCAL_TABLE_PHYS1    0x04830768
+#define MMCU_T3GLOBAL_TABLE_PHYS0   0x04830770
+#define MMCU_T3GLOBAL_TABLE_PHYS1   0x04830778
+
+#define MMCU_T0EBWCCTRL             0x04830640
+#define     MMCU_TnEBWCCTRL_BITS    0x00000007
+#define     MMCU_TnEBWCCTRL_S       0
+#define         MMCU_TnEBWCCCTRL_DISABLE_ALL 0
+#define         MMCU_TnEBWCCCTRL_ABIT25      1
+#define         MMCU_TnEBWCCCTRL_ABIT26      2
+#define         MMCU_TnEBWCCCTRL_ABIT27      3
+#define         MMCU_TnEBWCCCTRL_ABIT28      4
+#define         MMCU_TnEBWCCCTRL_ABIT29      5
+#define         MMCU_TnEBWCCCTRL_ABIT30      6
+#define         MMCU_TnEBWCCCTRL_ENABLE_ALL  7
+#define MMCU_TnEBWCCTRL_STRIDE      8
+
+#endif /* METAC_2_1 */
+
+
+/* Registers within the SYSC register region */
+#define METAC_ID                0x04830000
+#define     METAC_ID_MAJOR_BITS     0xFF000000
+#define     METAC_ID_MAJOR_S        24
+#define     METAC_ID_MINOR_BITS     0x00FF0000
+#define     METAC_ID_MINOR_S        16
+#define     METAC_ID_REV_BITS       0x0000FF00
+#define     METAC_ID_REV_S          8
+#define     METAC_ID_MAINT_BITS     0x000000FF
+#define     METAC_ID_MAINT_S        0
+
+#ifdef METAC_2_1
+/* Use of this section is strongly deprecated */
+#define METAC_ID2               0x04830008
+#define     METAC_ID2_DESIGNER_BITS 0xFFFF0000  /* Modified by customer */
+#define     METAC_ID2_DESIGNER_S    16
+#define     METAC_ID2_MINOR2_BITS   0x00000F00  /* 3rd digit of prod rev */
+#define     METAC_ID2_MINOR2_S      8
+#define     METAC_ID2_CONFIG_BITS   0x000000FF  /* Wrapper configuration */
+#define     METAC_ID2_CONFIG_S      0
+
+/* Primary core identification and configuration information */
+#define METAC_CORE_ID           0x04831000
+#define     METAC_COREID_GROUP_BITS   0xFF000000
+#define     METAC_COREID_GROUP_S      24
+#define         METAC_COREID_GROUP_METAG  0x14
+#define     METAC_COREID_ID_BITS      0x00FF0000
+#define     METAC_COREID_ID_S         16
+#define         METAC_COREID_ID_W32       0x10   /* >= for 32-bit pipeline */
+#define     METAC_COREID_CONFIG_BITS  0x0000FFFF
+#define     METAC_COREID_CONFIG_S     0
+#define       METAC_COREID_CFGCACHE_BITS    0x0007
+#define       METAC_COREID_CFGCACHE_S       0
+#define           METAC_COREID_CFGCACHE_NOM       0
+#define           METAC_COREID_CFGCACHE_TYPE0     1
+#define           METAC_COREID_CFGCACHE_NOMMU     1 /* Alias for TYPE0 */
+#define           METAC_COREID_CFGCACHE_NOCACHE   2
+#define           METAC_COREID_CFGCACHE_PRIVNOMMU 3
+#define       METAC_COREID_CFGDSP_BITS      0x0038
+#define       METAC_COREID_CFGDSP_S         3
+#define           METAC_COREID_CFGDSP_NOM       0
+#define           METAC_COREID_CFGDSP_MIN       1
+#define       METAC_COREID_NOFPACC_BIT      0x0040 /* Set if no FPU accum */
+#define       METAC_COREID_CFGFPU_BITS      0x0180
+#define       METAC_COREID_CFGFPU_S         7
+#define           METAC_COREID_CFGFPU_NOM       0
+#define           METAC_COREID_CFGFPU_SNGL      1
+#define           METAC_COREID_CFGFPU_DBL       2
+#define       METAC_COREID_NOAMA_BIT        0x0200 /* Set if no AMA present */
+#define       METAC_COREID_NOCOH_BIT        0x0400 /* Set if no Gbl coherency */
+
+/* Core revision information */
+#define METAC_CORE_REV          0x04831008
+#define     METAC_COREREV_DESIGN_BITS   0xFF000000
+#define     METAC_COREREV_DESIGN_S      24
+#define     METAC_COREREV_MAJOR_BITS    0x00FF0000
+#define     METAC_COREREV_MAJOR_S       16
+#define     METAC_COREREV_MINOR_BITS    0x0000FF00
+#define     METAC_COREREV_MINOR_S       8
+#define     METAC_COREREV_MAINT_BITS    0x000000FF
+#define     METAC_COREREV_MAINT_S       0
+
+/* Configuration information control outside the core */
+#define METAC_CORE_DESIGNER1    0x04831010      /* Arbitrary value */
+#define METAC_CORE_DESIGNER2    0x04831018      /* Arbitrary value */
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG2      0x04831020
+#define     METAC_CORECFG2_COREDBGTYPE_BITS 0x60000000   /* Core debug type */
+#define     METAC_CORECFG2_COREDBGTYPE_S    29
+#define     METAC_CORECFG2_DCSMALL_BIT      0x04000000   /* Data cache small */
+#define     METAC_CORECFG2_ICSMALL_BIT      0x02000000   /* Inst cache small */
+#define     METAC_CORECFG2_DCSZNP_BITS      0x01C00000   /* Data cache size np */
+#define     METAC_CORECFG2_DCSZNP_S         22
+#define     METAC_CORECFG2_ICSZNP_BITS      0x00380000  /* Inst cache size np */
+#define     METAC_CORECFG2_ICSZNP_S         19
+#define     METAC_CORECFG2_DCSZ_BITS        0x00070000   /* Data cache size */
+#define     METAC_CORECFG2_DCSZ_S           16
+#define         METAC_CORECFG2_xCSZ_4K          0        /* Allocated values */
+#define         METAC_CORECFG2_xCSZ_8K          1
+#define         METAC_CORECFG2_xCSZ_16K         2
+#define         METAC_CORECFG2_xCSZ_32K         3
+#define         METAC_CORECFG2_xCSZ_64K         4
+#define     METAC_CORE_C2ICSZ_BITS          0x0000E000   /* Inst cache size */
+#define     METAC_CORE_C2ICSZ_S             13
+#define     METAC_CORE_GBLACC_BITS          0x00001800   /* Number of Global Acc */
+#define     METAC_CORE_GBLACC_S             11
+#define     METAC_CORE_GBLDXR_BITS          0x00000700   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLDXR_S             8
+#define     METAC_CORE_GBLAXR_BITS          0x000000E0   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLAXR_S             5
+#define     METAC_CORE_RTTRACE_BIT          0x00000010
+#define     METAC_CORE_WATCHN_BITS          0x0000000C   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_WATCHN_S             2
+#define     METAC_CORE_BREAKN_BITS          0x00000003   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_BREAKN_S             0
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG3      0x04831028
+#define     METAC_CORECFG3_L2C_REV_ID_BITS          0x000F0000   /* Revision of L2 cache */
+#define     METAC_CORECFG3_L2C_REV_ID_S             16
+#define     METAC_CORECFG3_L2C_LINE_SIZE_BITS       0x00003000   /* L2 line size */
+#define     METAC_CORECFG3_L2C_LINE_SIZE_S          12
+#define         METAC_CORECFG3_L2C_LINE_SIZE_64B    0x0          /* 64 bytes */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_BITS        0x00000F00   /* L2 number of ways (2^n) */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_S           8
+#define     METAC_CORECFG3_L2C_SIZE_BITS            0x000000F0   /* L2 size (2^n) */
+#define     METAC_CORECFG3_L2C_SIZE_S               4
+#define     METAC_CORECFG3_L2C_UNIFIED_BIT          0x00000004   /* Unified cache: */
+#define     METAC_CORECFG3_L2C_UNIFIED_S            2
+#define       METAC_CORECFG3_L2C_UNIFIED_UNIFIED    1            /* - Unified D/I cache */
+#define       METAC_CORECFG3_L2C_UNIFIED_SEPARATE   0            /* - Separate D/I cache */
+#define     METAC_CORECFG3_L2C_MODE_BIT             0x00000002   /* Cache Mode: */
+#define     METAC_CORECFG3_L2C_MODE_S               1
+#define       METAC_CORECFG3_L2C_MODE_WRITE_BACK    1            /* - Write back */
+#define       METAC_CORECFG3_L2C_MODE_WRITE_THROUGH 0            /* - Write through */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_BIT         0x00000001   /* Have L2C */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_S           0
+
+#endif /* METAC_2_1 */
+
+#define SYSC_CACHE_MMU_CONFIG       0x04830028
+#ifdef METAC_2_1
+#define     SYSC_CMMUCFG_DCSKEWABLE_BIT 0x00000040
+#define     SYSC_CMMUCFG_ICSKEWABLE_BIT 0x00000020
+#define     SYSC_CMMUCFG_DCSKEWOFF_BIT  0x00000010  /* Skew association override  */
+#define     SYSC_CMMUCFG_ICSKEWOFF_BIT  0x00000008  /* -> default 0 on if present */
+#define     SYSC_CMMUCFG_MODE_BITS      0x00000007  /* Access to old state */
+#define     SYSC_CMMUCFG_MODE_S         0
+#define         SYSC_CMMUCFG_ON             0x7
+#define         SYSC_CMMUCFG_EBYPASS        0x6   /* Enhanced by-pass mode */
+#define         SYSC_CMMUCFG_EBYPASSIC      0x4   /* EB just inst cache */
+#define         SYSC_CMMUCFG_EBYPASSDC      0x2   /* EB just data cache */
+#endif /* METAC_2_1 */
+/* Old definitions, Keep them for now */
+#define         SYSC_CMMUCFG_MMU_ON_BIT     0x1
+#define         SYSC_CMMUCFG_DC_ON_BIT      0x2
+#define         SYSC_CMMUCFG_IC_ON_BIT      0x4
+
+#define SYSC_JTAG_THREAD            0x04830030
+#define     SYSC_JTAG_TX_BITS           0x00000003 /* Read only bits! */
+#define     SYSC_JTAG_TX_S              0
+#define     SYSC_JTAG_PRIV_BIT          0x00000004
+#ifdef METAC_2_1
+#define     SYSC_JTAG_SLAVETX_BITS      0x00000018
+#define     SYSC_JTAG_SLAVETX_S         3
+#endif /* METAC_2_1 */
+
+#define SYSC_DCACHE_FLUSH           0x04830038
+#define SYSC_ICACHE_FLUSH           0x04830040
+#define  SYSC_xCACHE_FLUSH_INIT     0x1
+#define MMCU_DIRECTMAP0_ADDR        0x04830080 /* LINSYSDIRECT_BASE -> */
+#define     MMCU_DIRECTMAPn_STRIDE      0x00000010 /* 4 Region settings */
+#define     MMCU_DIRECTMAPn_S           4
+#define         MMCU_DIRECTMAPn_ADDR_BITS       0xFF800000
+#define         MMCU_DIRECTMAPn_ADDR_S          23
+#define         MMCU_DIRECTMAPn_ADDR_SCALE      0x00800000 /* 8M Regions */
+#ifdef METAC_2_1
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ *       (LSB similar to VALID must be set for enhancments to be active)
+ */
+#define         MMCU_DIRECTMAPn_ENHANCE_BIT     0x00000001 /* 0 = no optim */
+#define         MMCU_DIRECTMAPn_DCCTRL_BITS     0x000000DF /* Get DC Ctrl */
+#define         MMCU_DIRECTMAPn_DCCTRL_S        0
+#define         MMCU_DIRECTMAPn_ICCTRL_BITS     0x0000C000 /* Get IC Ctrl */
+#define         MMCU_DIRECTMAPn_ICCTRL_S        8
+#define         MMCU_DIRECTMAPn_ENTLB_BIT       0x00000020 /* Cache in TLB */
+#define         MMCU_DIRECTMAPn_ICCWIN_BITS     0x0000C000 /* Get IC Win Bits */
+#define         MMCU_DIRECTMAPn_ICCWIN_S        14
+#endif /* METAC_2_1 */
+
+#define MMCU_DIRECTMAP1_ADDR        0x04830090
+#define MMCU_DIRECTMAP2_ADDR        0x048300a0
+#define MMCU_DIRECTMAP3_ADDR        0x048300b0
+
+/*
+ * These bits partion each threads use of data cache or instruction cache
+ * resource by modifying the top 4 bits of the address within the cache
+ * storage area.
+ */
+#define SYSC_DCPART0 0x04830200
+#define     SYSC_xCPARTn_STRIDE   0x00000008
+#define     SYSC_xCPARTL_AND_BITS 0x0000000F /* Masks top 4 bits */
+#define     SYSC_xCPARTL_AND_S    0
+#define     SYSC_xCPARTG_AND_BITS 0x00000F00 /* Masks top 4 bits */
+#define     SYSC_xCPARTG_AND_S    8
+#define     SYSC_xCPARTL_OR_BITS  0x000F0000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTL_OR_S     16
+#define     SYSC_xCPARTG_OR_BITS  0x0F000000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTG_OR_S     24
+#define     SYSC_CWRMODE_BIT      0x80000000 /* Write cache mode bit */
+
+#define SYSC_DCPART1 0x04830208
+#define SYSC_DCPART2 0x04830210
+#define SYSC_DCPART3 0x04830218
+#define SYSC_ICPART0 0x04830220
+#define SYSC_ICPART1 0x04830228
+#define SYSC_ICPART2 0x04830230
+#define SYSC_ICPART3 0x04830238
+
+/*
+ * META Core Memory and Cache Update registers
+ */
+#define SYSC_MCMDATAX  0x04830300   /* 32-bit read/write data register */
+#define SYSC_MCMDATAT  0x04830308   /* Read or write data triggers oper */
+#define SYSC_MCMGCTRL  0x04830310   /* Control register */
+#define     SYSC_MCMGCTRL_READ_BIT  0x00000001 /* Set to issue 1st read */
+#define     SYSC_MCMGCTRL_AINC_BIT  0x00000002 /* Set for auto-increment */
+#define     SYSC_MCMGCTRL_ADDR_BITS 0x000FFFFC /* Address or index */
+#define     SYSC_MCMGCTRL_ADDR_S    2
+#define     SYSC_MCMGCTRL_ID_BITS   0x0FF00000 /* Internal memory block Id */
+#define     SYSC_MCMGCTRL_ID_S      20
+#define         SYSC_MCMGID_NODEV       0xFF /* No Device Selected */
+#define         SYSC_MCMGID_DSPRAM0A    0x04 /* DSP RAM D0 block A access */
+#define         SYSC_MCMGID_DSPRAM0B    0x05 /* DSP RAM D0 block B access */
+#define         SYSC_MCMGID_DSPRAM1A    0x06 /* DSP RAM D1 block A access */
+#define         SYSC_MCMGID_DSPRAM1B    0x07 /* DSP RAM D1 block B access */
+#define         SYSC_MCMGID_DCACHEL     0x08 /* DCACHE lines (64-bytes/line) */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHETLB   0x09 /* DCACHE TLB ( Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_DCACHET     0x0A /* DCACHE tags (32-bits/line)   */
+#define         SYSC_MCMGID_DCACHELRU   0x0B /* DCACHE LRU (8-bits/line)     */
+#define         SYSC_MCMGID_ICACHEL     0x0C /* ICACHE lines (64-bytes/line  */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_ICACHETLB   0x0D /* ICACHE TLB (Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_ICACHET     0x0E /* ICACHE Tags (32-bits/line)   */
+#define         SYSC_MCMGID_ICACHELRU   0x0F /* ICACHE LRU (8-bits/line )    */
+#define         SYSC_MCMGID_COREIRAM0   0x10 /* Core code mem id 0 */
+#define         SYSC_MCMGID_COREIRAMn   0x17
+#define         SYSC_MCMGID_COREDRAM0   0x18 /* Core data mem id 0 */
+#define         SYSC_MCMGID_COREDRAMn   0x1F
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHEST    0x20 /* DCACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_ICACHEST    0x21 /* ICACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_DCACHETLBLRU 0x22 /* DCACHE TLB LRU ( Read Only )*/
+#define         SYSC_MCMGID_ICACHETLBLRU 0x23 /* ICACHE TLB LRU( Read Only ) */
+#define         SYSC_MCMGID_DCACHESTLRU 0x24 /* DCACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_ICACHESTLRU 0x25 /* ICACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_DEBUGTLB    0x26 /* DEBUG TLB ( Read Only )      */
+#define         SYSC_MCMGID_DEBUGST     0x27 /* DEBUG ST ( Read Only )       */
+#define         SYSC_MCMGID_L2CACHEL    0x30 /* L2 Cache Lines (64-bytes/line) */
+#define         SYSC_MCMGID_L2CACHET    0x31 /* L2 Cache Tags (32-bits/line) */
+#define         SYSC_MCMGID_COPROX0     0x70 /* Coprocessor port id 0 */
+#define         SYSC_MCMGID_COPROXn     0x77
+#endif /* METAC_2_1 */
+#define     SYSC_MCMGCTRL_TR31_BIT  0x80000000 /* Trigger 31 on completion */
+#define SYSC_MCMSTATUS 0x04830318   /* Status read only */
+#define     SYSC_MCMSTATUS_IDLE_BIT 0x00000001
+
+/* META System Events */
+#define SYSC_SYS_EVENT            0x04830400
+#define     SYSC_SYSEVT_ATOMIC_BIT      0x00000001
+#define     SYSC_SYSEVT_CACHEX_BIT      0x00000002
+#define SYSC_ATOMIC_LOCK          0x04830408
+#define     SYSC_ATOMIC_STATE_TX_BITS 0x0000000F
+#define     SYSC_ATOMIC_STATE_TX_S    0
+#ifdef METAC_1_2
+#define     SYSC_ATOMIC_STATE_DX_BITS 0x000000F0
+#define     SYSC_ATOMIC_STATE_DX_S    4
+#else /* METAC_1_2 */
+#define     SYSC_ATOMIC_SOURCE_BIT    0x00000010
+#endif /* !METAC_1_2 */
+
+
+#ifdef METAC_2_1
+
+/* These definitions replace the EXPAND_TIMER_DIV register defines which are to
+ * be deprecated.
+ */
+#define SYSC_TIMER_DIV            0x04830140
+#define     SYSC_TIMDIV_BITS      0x000000FF
+#define     SYSC_TIMDIV_S         0
+
+/* META Enhanced by-pass control for local and global region */
+#define MMCU_LOCAL_EBCTRL   0x04830600
+#define MMCU_GLOBAL_EBCTRL  0x04830608
+#define     MMCU_EBCTRL_SINGLE_BIT      0x00000020 /* TLB Uncached */
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ */
+#define     MMCU_EBCTRL_DCCTRL_BITS     0x000000C0 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_DCCTRL_S        0
+#define     MMCU_EBCTRL_ICCTRL_BITS     0x0000C000 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_ICCTRL_S        8
+
+/* META Cached Core Mode Registers */
+#define MMCU_T0CCM_ICCTRL   0x04830680     /* Core cached code control */
+#define     MMCU_TnCCM_xxCTRL_STRIDE    8
+#define     MMCU_TnCCM_xxCTRL_STRIDE_S  3
+#define MMCU_T1CCM_ICCTRL   0x04830688
+#define MMCU_T2CCM_ICCTRL   0x04830690
+#define MMCU_T3CCM_ICCTRL   0x04830698
+#define MMCU_T0CCM_DCCTRL   0x048306C0     /* Core cached data control */
+#define MMCU_T1CCM_DCCTRL   0x048306C8
+#define MMCU_T2CCM_DCCTRL   0x048306D0
+#define MMCU_T3CCM_DCCTRL   0x048306D8
+#define     MMCU_TnCCM_ENABLE_BIT       0x00000001
+#define     MMCU_TnCCM_WIN3_BIT         0x00000002
+#define     MMCU_TnCCM_DCWRITE_BIT      0x00000004  /* In DCCTRL only */
+#define     MMCU_TnCCM_REGSZ_BITS       0x00000F00
+#define     MMCU_TnCCM_REGSZ_S          8
+#define         MMCU_TnCCM_REGSZ0_POWER      12     /* RegSz 0 -> 4K */
+#define         MMCU_TnCCM_REGSZ_MAXBYTES    0x00080000  /* 512K max */
+#define     MMCU_TnCCM_ADDR_BITS        0xFFFFF000
+#define     MMCU_TnCCM_ADDR_S           12
+
+#endif /* METAC_2_1 */
+
+/*
+ * Hardware performance counter registers
+ * --------------------------------------
+ */
+#ifdef METAC_2_1
+/* Two Performance Counter Internal Core Events Control registers */
+#define PERF_ICORE0   0x0480FFD0
+#define PERF_ICORE1   0x0480FFD8
+#define     PERFI_CTRL_BITS    0x0000000F
+#define     PERFI_CTRL_S       0
+#define         PERFI_CAH_DMISS    0x0  /* Dcache Misses in cache (TLB Hit) */
+#define         PERFI_CAH_IMISS    0x1  /* Icache Misses in cache (TLB Hit) */
+#define         PERFI_TLB_DMISS    0x2  /* Dcache Misses in per-thread TLB */
+#define         PERFI_TLB_IMISS    0x3  /* Icache Misses in per-thread TLB */
+#define         PERFI_TLB_DWRHITS  0x4  /* DC Write-Hits in per-thread TLB */
+#define         PERFI_TLB_DWRMISS  0x5  /* DC Write-Miss in per-thread TLB */
+#define         PERFI_CAH_DLFETCH  0x8  /* DC Read cache line fetch */
+#define         PERFI_CAH_ILFETCH  0x9  /* DC Read cache line fetch */
+#define         PERFI_CAH_DWFETCH  0xA  /* DC Read cache word fetch */
+#define         PERFI_CAH_IWFETCH  0xB  /* DC Read cache word fetch */
+#endif /* METAC_2_1 */
+
+/* Two memory-mapped hardware performance counter registers */
+#define PERF_COUNT0 0x0480FFE0
+#define PERF_COUNT1 0x0480FFE8
+
+/* Fields in PERF_COUNTn registers */
+#define PERF_COUNT_BITS  0x00ffffff /* Event count value */
+
+#define PERF_THREAD_BITS 0x0f000000 /* Thread mask selects threads */
+#define PERF_THREAD_S    24
+
+#define PERF_CTRL_BITS   0xf0000000 /* Event filter control */
+#define PERF_CTRL_S      28
+
+#define    PERFCTRL_SUPER   0  /* Superthread cycles */
+#define    PERFCTRL_REWIND  1  /* Rewinds due to Dcache Misses */
+#ifdef METAC_2_1
+#define    PERFCTRL_SUPREW  2  /* Rewinds of superthreaded cycles (no mask) */
+
+#define    PERFCTRL_CYCLES  3  /* Counts all cycles (no mask) */
+
+#define    PERFCTRL_PREDBC  4  /* Conditional branch predictions */
+#define    PERFCTRL_MISPBC  5  /* Conditional branch mispredictions */
+#define    PERFCTRL_PREDRT  6  /* Return predictions */
+#define    PERFCTRL_MISPRT  7  /* Return mispredictions */
+#endif /* METAC_2_1 */
+
+#define    PERFCTRL_DHITS   8  /* Dcache Hits */
+#define    PERFCTRL_IHITS   9  /* Icache Hits */
+#define    PERFCTRL_IMISS   10 /* Icache Misses in cache or TLB */
+#ifdef METAC_2_1
+#define    PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
+#define    PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
+
+#define    PERFCTRL_INT     13 /* Internal core delailed events (see next) */
+#define    PERFCTRL_EXT     15 /* External source in core periphery */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/* These definitions replace the EXPAND_PERFCHANx register defines which are to
+ * be deprecated.
+ */
+#define PERF_CHAN0 0x04830150
+#define PERF_CHAN1 0x04830158
+#define     PERF_CHAN_BITS    0x0000000F
+#define     PERF_CHAN_S       0
+#define         PERFCHAN_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         PERFCHAN_WRC_WRITE     0x1   /* Write combiner write       */
+#define         PERFCHAN_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         PERFCHAN_WRC_READ      0x3   /* Write combiner read        */
+#define         PERFCHAN_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+					     /* Cross-bar hold-off cycle:  */
+#define         PERFCHAN_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         PERFCHAN_XBAR_HOLDSBUS 0x6   /*    system bus (ATP only)   */
+#define         PERFCHAN_XBAR_HOLDCREG 0x9   /*    core registers          */
+#define         PERFCHAN_L2C_MISS      0x6   /* L2 Cache miss              */
+#define         PERFCHAN_L2C_HIT       0x7   /* L2 Cache hit               */
+#define         PERFCHAN_L2C_WRITEBACK 0x8   /* L2 Cache writeback         */
+					     /* Admission delay cycle:     */
+#define         PERFCHAN_INPUT_CREG    0xB   /*    core registers          */
+#define         PERFCHAN_INPUT_INTR    0xC   /*    internal ram            */
+#define         PERFCHAN_INPUT_WRC     0xD   /*    write combiners(memory) */
+
+/* Should following be removed as not in TRM anywhere? */
+#define         PERFCHAN_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         PERFCHAN_INPUT_SBUS    0xA   /*    register port           */
+/* End of remove section. */
+
+#define         PERFCHAN_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Write combiner registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0WRCOMBINE register defines, which will be
+ * deprecated.
+ */
+#define WRCOMB_CONFIG0             0x04830100
+#define     WRCOMB_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#define     WRCOMB_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     WRCOMB_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     WRCOMB_TIMEOUT_COUNT_BITS  0x000003FF
+#define     WRCOMB_TIMEOUT_COUNT_S     0
+#define WRCOMB_CONFIG4             0x04830180
+#define     WRCOMB_PARTALLOC_BITS      0x000000C0
+#define     WRCOMB_PARTALLOC_S         64
+#define     WRCOMB_PARTSIZE_BITS       0x00000030
+#define     WRCOMB_PARTSIZE_S          4
+#define     WRCOMB_PARTOFFSET_BITS     0x0000000F
+#define     WRCOMB_PARTOFFSET_S        0
+#define WRCOMB_CONFIG_STRIDE       8
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Thread arbiter registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0ARBITER register defines, which will be
+ * deprecated.
+ */
+#define ARBITER_ARBCONFIG0       0x04830120
+#define     ARBCFG_BPRIORITY_BIT     0x02000000
+#define     ARBCFG_IPRIORITY_BIT     0x01000000
+#define     ARBCFG_PAGE_BITS         0x00FF0000
+#define     ARBCFG_PAGE_S            16
+#define     ARBCFG_BBASE_BITS        0x0000FF00
+#define     ARGCFG_BBASE_S           8
+#define     ARBCFG_IBASE_BITS        0x000000FF
+#define     ARBCFG_IBASE_S           0
+#define ARBITER_TTECONFIG0       0x04820160
+#define     ARBTTE_IUPPER_BITS       0xFF000000
+#define     ARBTTE_IUPPER_S          24
+#define     ARBTTE_ILOWER_BITS       0x00FF0000
+#define     ARBTTE_ILOWER_S          16
+#define     ARBTTE_BUPPER_BITS       0x0000FF00
+#define     ARBTTE_BUPPER_S          8
+#define     ARBTTE_BLOWER_BITS       0x000000FF
+#define     ARBTTE_BLOWER_S          0
+#define ARBITER_STRIDE           8
+#endif /* METAC_2_1 */
+
+/*
+ * Expansion area registers
+ * --------------------------------------
+ */
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0WRCOMBINE         0x03000000
+#ifdef METAC_2_1
+#define     EXPWRC_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#endif /* METAC_2_1 */
+#define     EXPWRC_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     EXPWRC_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     EXPWRC_TIMEOUT_COUNT_BITS  0x000003FF
+#define     EXPWRC_TIMEOUT_COUNT_S     0
+#define EXPAND_TnWRCOMBINE_STRIDE  0x00000008
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0ARBITER         0x03000020
+#define     EXPARB_BPRIORITY_BIT 0x02000000
+#define     EXPARB_IPRIORITY_BIT 0x01000000
+#define     EXPARB_PAGE_BITS     0x00FF0000
+#define     EXPARB_PAGE_S        16
+#define     EXPARB_BBASE_BITS    0x0000FF00
+#define     EXPARB_BBASE_S       8
+#define     EXPARB_IBASE_BITS    0x000000FF
+#define     EXPARB_IBASE_S       0
+#define EXPAND_TnARBITER_STRIDE  0x00000008
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_TIMER_DIV   0x03000040
+#define     EXPTIM_DIV_BITS      0x000000FF
+#define     EXPTIM_DIV_S         0
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_PERFCHAN0   0x03000050
+#define EXPAND_PERFCHAN1   0x03000058
+#define     EXPPERF_CTRL_BITS    0x0000000F
+#define     EXPPERF_CTRL_S       0
+#define         EXPPERF_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         EXPPERF_WRC_WRITE     0x1   /* Write combiner write       */
+#define         EXPPERF_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         EXPPERF_WRC_READ      0x3   /* Write combiner read        */
+#define         EXPPERF_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+					    /* Cross-bar hold-off cycle:  */
+#define         EXPPERF_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         EXPPERF_XBAR_HOLDSBUS 0x6   /*    system bus              */
+#ifdef METAC_1_2
+#define         EXPPERF_XBAR_HOLDLBUS 0x7   /*    local bus               */
+#else /* METAC_1_2 */
+#define         EXPPERF_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         EXPPERF_XBAR_HOLDCREG 0x9   /*    core registers          */
+					    /* Admission delay cycle:     */
+#define         EXPPERF_INPUT_SBUS    0xA   /*    register port           */
+#define         EXPPERF_INPUT_CREG    0xB   /*    core registers          */
+#define         EXPPERF_INPUT_INTR    0xC   /*    internal ram            */
+#define         EXPPERF_INPUT_WRC     0xD   /*    write combiners(memory) */
+#endif /* !METAC_1_2 */
+#define         EXPPERF_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+/*
+ * Debug port registers
+ * --------------------------------------
+ */
+
+/* Data Exchange Register */
+#define DBGPORT_MDBGDATAX                    0x0
+
+/* Data Transfer register */
+#define DBGPORT_MDBGDATAT                    0x4
+
+/* Control Register 0 */
+#define DBGPORT_MDBGCTRL0                    0x8
+#define     DBGPORT_MDBGCTRL0_ADDR_BITS      0xFFFFFFFC
+#define     DBGPORT_MDBGCTRL0_ADDR_S         2
+#define     DBGPORT_MDBGCTRL0_AUTOINCR_BIT   0x00000002
+#define     DBGPORT_MDBGCTRL0_RD_BIT         0x00000001
+
+/* Control Register 1 */
+#define DBGPORT_MDBGCTRL1                    0xC
+#ifdef METAC_2_1
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_BITS      0xC0000000
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_S         30
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_LOCK2_INTERLOCK_BIT   0x20000000
+#define     DBGPORT_MDBGCTRL1_ATOMIC_INTERLOCK_BIT  0x10000000
+#define     DBGPORT_MDBGCTRL1_TRIGSTATUS_BIT        0x08000000
+#define     DBGPORT_MDBGCTRL1_GBLPORT_IDLE_BIT      0x04000000
+#define     DBGPORT_MDBGCTRL1_COREMEM_IDLE_BIT      0x02000000
+#define     DBGPORT_MDBGCTRL1_READY_BIT             0x01000000
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_DEFERRID_BITS         0x00E00000
+#define     DBGPORT_MDBGCTRL1_DEFERRID_S            21
+#define     DBGPORT_MDBGCTRL1_DEFERR_BIT            0x00100000
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_WR_ACTIVE_BIT         0x00040000
+#define     DBGPORT_MDBGCTRL1_COND_LOCK2_BIT        0x00020000
+#define     DBGPORT_MDBGCTRL1_LOCK2_BIT             0x00010000
+#define     DBGPORT_MDBGCTRL1_DIAGNOSE_BIT          0x00008000
+#define     DBGPORT_MDBGCTRL1_FORCEDIAG_BIT         0x00004000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_BITS         0x00003000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_S            12
+#define     DBGPORT_MDBGCTRL1_TRIGGER_BIT           0x00000100
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_INTSPECIAL_BIT        0x00000080
+#define     DBGPORT_MDBGCTRL1_INTRUSIVE_BIT         0x00000040
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_THREAD_BITS           0x00000030 /* Thread mask selects threads */
+#define     DBGPORT_MDBGCTRL1_THREAD_S              4
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_BITS       0x0000000C
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_S          2
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_32_BIT 0x00000000
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_16_BIT 0x00000004
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_8_BIT  0x00000008
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_BITS       0x00000003
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_S          0
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_8_BIT  0x00000001
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_16_BIT 0x00000002
+
+
+/* L2 Cache registers */
+#define SYSC_L2C_INIT              0x048300C0
+#define SYSC_L2C_INIT_INIT                  1
+#define SYSC_L2C_INIT_IN_PROGRESS           0
+#define SYSC_L2C_INIT_COMPLETE              1
+
+#define SYSC_L2C_ENABLE            0x048300D0
+#define SYSC_L2C_ENABLE_ENABLE_BIT     0x00000001
+#define SYSC_L2C_ENABLE_PFENABLE_BIT   0x00000002
+
+#define SYSC_L2C_PURGE             0x048300C8
+#define SYSC_L2C_PURGE_PURGE                1
+#define SYSC_L2C_PURGE_IN_PROGRESS          0
+#define SYSC_L2C_PURGE_COMPLETE             1
+
+#endif /* _ASM_METAG_MEM_H_ */
diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h
new file mode 100644
index 0000000..acf4b8e
--- /dev/null
+++ b/arch/metag/include/asm/metag_regs.h
@@ -0,0 +1,1184 @@
+/*
+ * asm/metag_regs.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta core (non memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_REGS_H_
+#define _ASM_METAG_REGS_H_
+
+/*
+ * CHIP Unit Identifiers and Valid/Global register number masks
+ * ------------------------------------------------------------
+ */
+#define TXUCT_ID    0x0     /* Control unit regs */
+#ifdef METAC_1_2
+#define     TXUCT_MASK  0xFF0FFFFF  /* Valid regs 0..31  */
+#else
+#define     TXUCT_MASK  0xFF1FFFFF  /* Valid regs 0..31  */
+#endif
+#define     TGUCT_MASK  0x00000000  /* No global regs    */
+#define TXUD0_ID    0x1     /* Data unit regs */
+#define TXUD1_ID    0x2
+#define     TXUDX_MASK  0xFFFFFFFF  /* Valid regs 0..31 */
+#define     TGUDX_MASK  0xFFFF0000  /* Global regs for base inst */
+#define     TXUDXDSP_MASK   0x0F0FFFFF  /* Valid DSP regs */
+#define     TGUDXDSP_MASK   0x0E0E0000  /* Global DSP ACC regs */
+#define TXUA0_ID    0x3     /* Address unit regs */
+#define TXUA1_ID    0x4
+#define     TXUAX_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUAX_MASK  0x0000FF00  /* Global regs  8-15 */
+#define TXUPC_ID    0x5     /* PC registers */
+#define     TXUPC_MASK  0x00000003  /* Valid regs   0- 1 */
+#define     TGUPC_MASK  0x00000000  /* No global regs    */
+#define TXUPORT_ID  0x6     /* Ports are not registers */
+#define TXUTR_ID    0x7
+#define     TXUTR_MASK  0x0000005F  /* Valid regs   0-3,4,6 */
+#define     TGUTR_MASK  0x00000000  /* No global regs    */
+#ifdef METAC_2_1
+#define TXUTT_ID    0x8
+#define     TXUTT_MASK  0x0000000F  /* Valid regs   0-3 */
+#define     TGUTT_MASK  0x00000010  /* Global reg   4   */
+#define TXUFP_ID    0x9     /* FPU regs */
+#define     TXUFP_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUFP_MASK  0x00000000  /* No global regs    */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_1_2
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+		      TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+		      0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+		      TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+		      0, 0, 0, 0, 0, 0, 0, 0                          }
+#else /* METAC_1_2 */
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+		      TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+		      TXUTT_MASK, TXUFP_MASK,          0,          0, \
+			       0,          0,          0,          0  }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+		      TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+		      TGUTT_MASK, TGUFP_MASK,          0,          0, \
+			       0,          0,          0,          0  }
+#endif /* !METAC_1_2 */
+
+#define TXUXXDSP_MASKS { 0, TXUDXDSP_MASK, TXUDXDSP_MASK, 0, 0, 0, 0, 0, \
+			 0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXXDSP_MASKS { 0, TGUDXDSP_MASK, TGUDXDSP_MASK, 0, 0, 0, 0, 0, \
+			 0, 0, 0, 0, 0, 0, 0, 0                          }
+
+/* -------------------------------------------------------------------------
+;                          DATA AND ADDRESS UNIT REGISTERS
+;  -----------------------------------------------------------------------*/
+/*
+  Thread local D0 registers
+ */
+/*   D0.0    ; Holds 32-bit result, can be used as scratch */
+#define D0Re0 D0.0
+/*   D0.1    ; Used to pass Arg6_32 */
+#define D0Ar6 D0.1
+/*   D0.2    ; Used to pass Arg4_32 */
+#define D0Ar4 D0.2
+/*   D0.3    ; Used to pass Arg2_32 to a called routine (see D1.3 below) */
+#define D0Ar2 D0.3
+/*   D0.4    ; Can be used as scratch; used to save A0FrP in entry sequences */
+#define D0FrT D0.4
+/*   D0.5    ; C compiler assumes preservation, save with D1.5 if used */
+/*   D0.6    ; C compiler assumes preservation, save with D1.6 if used */
+/*   D0.7    ; C compiler assumes preservation, save with D1.7 if used */
+/*   D0.8    ; Use of D0.8 and above is not encouraged */
+/*   D0.9  */
+/*   D0.10 */
+/*   D0.11 */
+/*   D0.12 */
+/*   D0.13 */
+/*   D0.14 */
+/*   D0.15 */
+/*
+   Thread local D1 registers
+ */
+/*   D1.0    ; Holds top 32-bits of 64-bit result, can be used as scratch */
+#define D1Re0 D1.0
+/*   D1.1    ; Used to pass Arg5_32 */
+#define D1Ar5 D1.1
+/*   D1.2    ; Used to pass Arg3_32 */
+#define D1Ar3 D1.2
+/*   D1.3    ; Used to pass Arg1_32 (first 32-bit argument) to a called routine */
+#define D1Ar1 D1.3
+/*   D1.4    ; Used for Return Pointer, save during entry with A0FrP (via D0.4) */
+#define D1RtP D1.4
+/*   D1.5    ; C compiler assumes preservation, save if used */
+/*   D1.6    ; C compiler assumes preservation, save if used */
+/*   D1.7    ; C compiler assumes preservation, save if used */
+/*   D1.8    ; Use of D1.8 and above is not encouraged */
+/*   D1.9  */
+/*   D1.10 */
+/*   D1.11 */
+/*   D1.12 */
+/*   D1.13 */
+/*   D1.14 */
+/*   D1.15 */
+/*
+   Thread local A0 registers
+ */
+/*   A0.0    ; Primary stack pointer */
+#define A0StP A0.0
+/*   A0.1    ; Used as local frame pointer in C, save if used (via D0.4) */
+#define A0FrP A0.1
+/*   A0.2  */
+/*   A0.3  */
+/*   A0.4    ; Use of A0.4 and above is not encouraged */
+/*   A0.5  */
+/*   A0.6  */
+/*   A0.7  */
+/*
+   Thread local A1 registers
+ */
+/*   A1.0    ; Global static chain pointer - do not modify */
+#define A1GbP A1.0
+/*   A1.1    ; Local static chain pointer in C, can be used as scratch */
+#define A1LbP A1.1
+/*   A1.2  */
+/*   A1.3  */
+/*   A1.4    ; Use of A1.4 and above is not encouraged */
+/*   A1.5  */
+/*   A1.6  */
+/*   A1.7  */
+#ifdef METAC_2_1
+/* Renameable registers for use with Fast Interrupts */
+/* The interrupt stack pointer (usually a global register) */
+#define A0IStP A0IReg
+/* The interrupt global pointer (usually a global register) */
+#define A1IGbP A1IReg
+#endif
+/*
+   Further registers may be globally allocated via linkage/loading tools,
+   normally they are not used.
+ */
+/*-------------------------------------------------------------------------
+;                    STACK STRUCTURE and CALLING CONVENTION
+; -----------------------------------------------------------------------*/
+/*
+; Calling convention indicates that the following is the state of the
+; stack frame at the start of a routine-
+;
+;       Arg9_32 [A0StP+#-12]
+;       Arg8_32 [A0StP+#- 8]
+;       Arg7_32 [A0StP+#- 4]
+;   A0StP->
+;
+; Registers D1.3, D0.3, ..., to D0.1 are used to pass Arg1_32 to Arg6_32
+;   respectively. If a routine needs to store them on the stack in order
+;   to make sub-calls or because of the general complexity of the routine it
+;   is best to dump these registers immediately at the start of a routine
+;   using a MSETL or SETL instruction-
+;
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+;or SETL    [A0StP+#8++],D0Ar2       ; Up to two 32-bit args expected
+;
+; For non-leaf routines it is always necessary to save and restore at least
+; the return address value D1RtP on the stack. Also by convention if the
+; frame is saved then a new A0FrP value must be set-up. So for non-leaf
+; routines at this point both these registers must be saved onto the stack
+; using a SETL instruction and the new A0FrP value is then set-up-
+;
+;   MOV     D0FrT,A0FrP
+;   ADD     A0FrP,A0StP,#0
+;   SETL    [A0StP+#8++],D0FrT,D1RtP
+;
+; Registers D0.5, D1.5, to D1.7 are assumed to be preserved across calls so
+;   a SETL or MSETL instruction can be used to save the current state
+;   of these registers if they are modified by the current routine-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Only save registers modified
+;or SETL    [A0StP+#8++],D0.5        ; Only D0.5 and/or D1.5 modified
+;
+; All of the above sequences can be combined into one maximal case-
+;
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+;
+; Having completed the above sequence the only remaining task on routine
+; entry is to reserve any local and outgoing argment storage space on the
+; stack. This instruction may be omitted if the size of this region is zero-
+;
+;   ADD     A0StP,A0StP,#(LCS)
+;
+; LCS is the first example use of one of a number of standard local defined
+; values that can be created to make assembler code more readable and
+; potentially more robust-
+;
+; #define ARS   0x18                 ; Register arg bytes saved on stack
+; #define FRS   0x20                 ; Frame save area size in bytes
+; #define LCS   0x00                 ; Locals and Outgoing arg size
+; #define ARO   (LCS+FRS)            ; Stack offset to access args
+;
+; All of the above defines should be undefined (#undef) at the end of each
+; routine to avoid accidental use in the next routine.
+;
+; Given all of the above the following stack structure is expected during
+; the body of a routine if all args passed in registers are saved during
+; entry-
+;
+;                                    ; 'Incoming args area'
+;         Arg10_32 [A0StP+#-((10*4)+ARO)]       Arg9_32  [A0StP+#-(( 9*4)+ARO)]
+;         Arg8_32  [A0StP+#-(( 8*4)+ARO)]       Arg7_32  [A0StP+#-(( 7*4)+ARO)]
+;--- Call point
+; D0Ar6=  Arg6_32  [A0StP+#-(( 6*4)+ARO)] D1Ar5=Arg5_32  [A0StP+#-(( 5*4)+ARO)]
+; D0Ar4=  Arg4_32  [A0StP+#-(( 4*4)+ARO)] D1Ar3=Arg3_32  [A0StP+#-(( 3*4)+ARO)]
+; D0Ar2=  Arg2_32  [A0StP+#-(( 2*4)+ARO)] D1Ar2=Arg1_32  [A0StP+#-(( 1*4)+ARO)]
+;                                    ; 'Frame area'
+; A0FrP-> D0FrT, D1RtP,
+;         D0.5, D1.5,
+;         D0.6, D1.6,
+;         D0.7, D1.7,
+;                                    ; 'Locals area'
+;         Loc0_32  [A0StP+# (( 0*4)-LCS)],      Loc1_32 [A0StP+# (( 1*4)-LCS)]
+;               .... other locals
+;         Locn_32  [A0StP+# (( n*4)-LCS)]
+;                                    ; 'Outgoing args area'
+;         Outm_32  [A0StP+#- ( m*4)]            .... other outgoing args
+;         Out8_32  [A0StP+#- ( 1*4)]            Out7_32  [A0StP+#- ( 1*4)]
+; A0StP-> (Out1_32-Out6_32 in regs D1Ar1-D0Ar6)
+;
+; The exit sequence for a non-leaf routine can use the frame pointer created
+; in the entry sequence to optimise the recovery of the full state-
+;
+;   MGETL   D0FrT,D0.5,D0.6,D0.7,[A0FrP]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;
+; Having described the most complex non-leaf case above, it is worth noting
+; that if a routine is a leaf and does not use any of the caller-preserved
+; state. The routine can be implemented as-
+;
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   SUB     A0StP,A0StP,#LCS
+;   MOV     PC,D1RtP
+;
+; The stack adjustments can also be omitted if no local storage is required.
+;
+; Another exit sequence structure is more applicable if for a leaf routine
+; with no local frame pointer saved/generated in which the call saved
+; registers need to be saved and restored-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Hence FRS is 0x18, ARS is 0x00
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   GETL    D0.5,D1.5,[A0StP+#((0*8)-(FRS+LCS))]
+;   GETL    D0.6,D1.6,[A0StP+#((1*8)-(FRS+LCS))]
+;   GETL    D0.7,D1.7,[A0StP+#((2*8)-(FRS+LCS))]
+;   SUB     A0StP,A0StP,#(ARS+FRS+LCS)
+;   MOV     PC,D1RtP
+;
+; Lastly, to support profiling assembler code should use a fixed entry/exit
+; sequence if the trigger define _GMON_ASM is defined-
+;
+;   #ifndef _GMON_ASM
+;   ... optimised entry code
+;   #else
+;   ; Profiling entry case
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],...,D0FrT,... or SETL    [A0FrP],D0FrT,D1RtP
+;   CALLR   D0FrT,_mcount_wrapper
+;   #endif
+;   ... body of routine
+;   #ifndef _GMON_ASM
+;   ... optimised exit code
+;   #else
+;   ; Profiling exit case
+;   MGETL   D0FrT,...,[A0FrP]     or GETL    D0FrT,D1RtP,[A0FrP++]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;   #endif
+
+
+; -------------------------------------------------------------------------
+;                         CONTROL UNIT REGISTERS
+; -------------------------------------------------------------------------
+;
+; See the assembler guide, hardware documentation, or the field values
+; defined below for some details of the use of these registers.
+*/
+#define TXENABLE    CT.0    /* Need to define bit-field values in these */
+#define TXMODE      CT.1
+#define TXSTATUS    CT.2    /* DEFAULT 0x00020000 */
+#define TXRPT       CT.3
+#define TXTIMER     CT.4
+#define TXL1START   CT.5
+#define TXL1END     CT.6
+#define TXL1COUNT   CT.7
+#define TXL2START   CT.8
+#define TXL2END     CT.9
+#define TXL2COUNT   CT.10
+#define TXBPOBITS   CT.11
+#define TXMRSIZE    CT.12
+#define TXTIMERI    CT.13
+#define TXDRCTRL    CT.14  /* DEFAULT 0x0XXXF0F0 */
+#define TXDRSIZE    CT.15
+#define TXCATCH0    CT.16
+#define TXCATCH1    CT.17
+#define TXCATCH2    CT.18
+#define TXCATCH3    CT.19
+
+#ifdef METAC_2_1
+#define TXDEFR      CT.20
+#define TXCPRS      CT.21
+#endif
+
+#define TXINTERN0   CT.23
+#define TXAMAREG0   CT.24
+#define TXAMAREG1   CT.25
+#define TXAMAREG2   CT.26
+#define TXAMAREG3   CT.27
+#define TXDIVTIME   CT.28   /* DEFAULT 0x00000001 */
+#define TXPRIVEXT   CT.29   /* DEFAULT 0x003B0000 */
+#define TXTACTCYC   CT.30
+#define TXIDLECYC   CT.31
+
+/*****************************************************************************
+ *                        CONTROL UNIT REGISTER BITS
+ ****************************************************************************/
+/*
+ * The following registers and where appropriate the sub-fields of those
+ * registers are defined for pervasive use in controlling program flow.
+ */
+
+/*
+ * TXENABLE register fields - only the thread id is routinely useful
+ */
+#define TXENABLE_REGNUM 0
+#define TXENABLE_THREAD_BITS       0x00000700
+#define TXENABLE_THREAD_S          8
+#define TXENABLE_REV_STEP_BITS     0x000000F0
+#define TXENABLE_REV_STEP_S        4
+
+/*
+ * TXMODE register - controls extensions of the instruction set
+ */
+#define TXMODE_REGNUM 1
+#define     TXMODE_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * TXSTATUS register - contains a couple of stable bits that can be used
+ *      to determine the privilege processing level and interrupt
+ *      processing level of the current thread.
+ */
+#define TXSTATUS_REGNUM 2
+#define TXSTATUS_PSTAT_BIT         0x00020000   /* -> Privilege active      */
+#define TXSTATUS_PSTAT_S           17
+#define TXSTATUS_ISTAT_BIT         0x00010000   /* -> In interrupt state    */
+#define TXSTATUS_ISTAT_S           16
+
+/*
+ * These are all relatively boring registers, mostly full 32-bit
+ */
+#define TXRPT_REGNUM     3  /* Repeat counter for XFR... instructions   */
+#define TXTIMER_REGNUM   4  /* Timer-- causes timer trigger on overflow */
+#define TXL1START_REGNUM 5  /* Hardware Loop 1 Start-PC/End-PC/Count    */
+#define TXL1END_REGNUM   6
+#define TXL1COUNT_REGNUM 7
+#define TXL2START_REGNUM 8  /* Hardware Loop 2 Start-PC/End-PC/Count    */
+#define TXL2END_REGNUM   9
+#define TXL2COUNT_REGNUM 10
+#define TXBPOBITS_REGNUM 11 /* Branch predict override bits - tune perf */
+#define TXTIMERI_REGNUM  13 /* Timer-- time based interrupt trigger     */
+
+/*
+ * TXDIVTIME register is routinely read to calculate the time-base for
+ * the TXTIMER register.
+ */
+#define TXDIVTIME_REGNUM 28
+#define     TXDIVTIME_DIV_BITS 0x000000FF
+#define     TXDIVTIME_DIV_S    0
+#define     TXDIVTIME_DIV_MIN  0x00000001   /* Maximum resolution       */
+#define     TXDIVTIME_DIV_MAX  0x00000100   /* 1/1 -> 1/256 resolution  */
+#define     TXDIVTIME_BASE_HZ  1000000      /* Timers run at 1Mhz @1/1  */
+
+/*
+ * TXPRIVEXT register can be consulted to decide if write access to a
+ *    part of the threads register set is not permitted when in
+ *    unprivileged mode (PSTAT == 0).
+ */
+#define TXPRIVEXT_REGNUM 29
+#define     TXPRIVEXT_COPRO_BITS    0xFF000000 /* Co-processor 0-7 */
+#define     TXPRIVEXT_COPRO_S       24
+#ifndef METAC_1_2
+#define     TXPRIVEXT_TXTIMER_BIT   0x00080000 /* TXTIMER   priv */
+#define     TXPRIVEXT_TRACE_BIT     0x00040000 /* TTEXEC|TTCTRL|GTEXEC */
+#endif
+#define     TXPRIVEXT_TXTRIGGER_BIT 0x00020000 /* TXSTAT|TXMASK|TXPOLL */
+#define     TXPRIVEXT_TXGBLCREG_BIT 0x00010000 /* Global common regs */
+#define     TXPRIVEXT_CBPRIV_BIT    0x00008000 /* Mem i/f dump priv */
+#define     TXPRIVEXT_ILOCK_BIT     0x00004000 /* LOCK inst priv */
+#define     TXPRIVEXT_TXITACCYC_BIT 0x00002000 /* TXIDLECYC|TXTACTCYC */
+#define     TXPRIVEXT_TXDIVTIME_BIT 0x00001000 /* TXDIVTIME priv */
+#define     TXPRIVEXT_TXAMAREGX_BIT 0x00000800 /* TXAMAREGX priv */
+#define     TXPRIVEXT_TXTIMERI_BIT  0x00000400 /* TXTIMERI  priv */
+#define     TXPRIVEXT_TXSTATUS_BIT  0x00000200 /* TXSTATUS  priv */
+#define     TXPRIVEXT_TXDISABLE_BIT 0x00000100 /* TXENABLE  priv */
+#ifndef METAC_1_2
+#define     TXPRIVEXT_MINIMON_BIT   0x00000080 /* Enable Minim features */
+#define     TXPRIVEXT_OLDBCCON_BIT  0x00000020 /* Restore Static predictions */
+#define     TXPRIVEXT_ALIGNREW_BIT  0x00000010 /* Align & precise checks */
+#endif
+#define     TXPRIVEXT_KEEPPRI_BIT   0x00000008 /* Use AMA_Priority if ISTAT=1*/
+#define     TXPRIVEXT_TXTOGGLEI_BIT 0x00000001 /* TX.....I  priv */
+
+/*
+ * TXTACTCYC register - counts instructions issued for this thread
+ */
+#define TXTACTCYC_REGNUM  30
+#define     TXTACTCYC_COUNT_MASK    0x00FFFFFF
+
+/*
+ * TXIDLECYC register - counts idle cycles
+ */
+#define TXIDLECYC_REGNUM  31
+#define     TXIDLECYC_COUNT_MASK    0x00FFFFFF
+
+/*****************************************************************************
+ *                             DSP EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values relate to fields and controls that only a program
+ * using the DSP extensions of the META instruction set need to know.
+ */
+
+
+#ifndef METAC_1_2
+/*
+ * Allow co-processor hardware to replace the read pipeline data source in
+ * appropriate cases.
+ */
+#define TXMODE_RDCPEN_BIT       0x00800000
+#endif
+
+/*
+ * Address unit addressing modes
+ */
+#define TXMODE_A1ADDR_BITS  0x00007000
+#define TXMODE_A1ADDR_S     12
+#define TXMODE_A0ADDR_BITS  0x00000700
+#define TXMODE_A0ADDR_S     8
+#define     TXMODE_AXADDR_MODULO 3
+#define     TXMODE_AXADDR_REVB   4
+#define     TXMODE_AXADDR_REVW   5
+#define     TXMODE_AXADDR_REVD   6
+#define     TXMODE_AXADDR_REVL   7
+
+/*
+ * Data unit OverScale select (default 0 -> normal, 1 -> top 16 bits)
+ */
+#define TXMODE_DXOVERSCALE_BIT  0x00000080
+
+/*
+ * Data unit MX mode select (default 0 -> MX16, 1 -> MX8)
+ */
+#define TXMODE_M8_BIT         0x00000040
+
+/*
+ * Data unit accumulator saturation point (default -> 40 bit accumulator)
+ */
+#define TXMODE_DXACCSAT_BIT 0x00000020 /* Set for 32-bit accumulator */
+
+/*
+ * Data unit accumulator saturation enable (default 0 -> no saturation)
+ */
+#define TXMODE_DXSAT_BIT    0x00000010
+
+/*
+ * Data unit master rounding control (default 0 -> normal, 1 -> convergent)
+ */
+#define TXMODE_DXROUNDING_BIT   0x00000008
+
+/*
+ * Data unit product shift for fractional arithmetic (default off)
+ */
+#define TXMODE_DXPRODSHIFT_BIT  0x00000004
+
+/*
+ * Select the arithmetic mode (multiply mostly) for both data units
+ */
+#define TXMODE_DXARITH_BITS 0x00000003
+#define     TXMODE_DXARITH_32  3
+#define     TXMODE_DXARITH_32H 2
+#define     TXMODE_DXARITH_S16 1
+#define     TXMODE_DXARITH_16  0
+
+/*
+ * TXMRSIZE register value only relevant when DSP modulo addressing active
+ */
+#define TXMRSIZE_REGNUM 12
+#define     TXMRSIZE_MIN    0x0002  /* 0, 1 -> normal addressing logic */
+#define     TXMRSIZE_MAX    0xFFFF
+
+/*
+ * TXDRCTRL register can be used to detect the actaul size of the DSP RAM
+ * partitions allocated to this thread.
+ */
+#define TXDRCTRL_REGNUM 14
+#define     TXDRCTRL_SINESIZE_BITS  0x0F000000
+#define     TXDRCTRL_SINESIZE_S     24
+#define     TXDRCTRL_RAMSZPOW_BITS  0x001F0000  /* Limit = (1<<RAMSZPOW)-1 */
+#define     TXDRCTRL_RAMSZPOW_S     16
+#define     TXDRCTRL_D1RSZAND_BITS  0x0000F000  /* Mask top 4 bits - D1 */
+#define     TXDRCTRL_D1RSZAND_S     12
+#define     TXDRCTRL_D0RSZAND_BITS  0x000000F0  /* Mask top 4 bits - D0 */
+#define     TXDRCTRL_D0RSZAND_S     4
+/* Given extracted RAMSZPOW and DnRSZAND fields this returns the size */
+#define     TXDRCTRL_DXSIZE(Pow, AndBits) \
+				((((~(AndBits)) & 0x0f) + 1) << ((Pow)-4))
+
+/*
+ * TXDRSIZE register provides modulo addressing options for each DSP RAM
+ */
+#define TXDRSIZE_REGNUM 15
+#define     TXDRSIZE_R1MOD_BITS       0xFFFF0000
+#define     TXDRSIZE_R1MOD_S          16
+#define     TXDRSIZE_R0MOD_BITS       0x0000FFFF
+#define     TXDRSIZE_R0MOD_S          0
+
+#define     TXDRSIZE_RBRAD_SCALE_BITS 0x70000000
+#define     TXDRSIZE_RBRAD_SCALE_S    28
+#define     TXDRSIZE_RBMODSIZE_BITS   0x0FFF0000
+#define     TXDRSIZE_RBMODSIZE_S      16
+#define     TXDRSIZE_RARAD_SCALE_BITS 0x00007000
+#define     TXDRSIZE_RARAD_SCALE_S    12
+#define     TXDRSIZE_RAMODSIZE_BITS   0x00000FFF
+#define     TXDRSIZE_RAMODSIZE_S      0
+
+/*****************************************************************************
+ *                       DEFERRED and BUS ERROR EXTENSION
+ ****************************************************************************/
+
+/*
+ * TXDEFR register - Deferred exception control
+ */
+#define TXDEFR_REGNUM 20
+#define     TXDEFR_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * Bus error state is a multi-bit positive/negative event notification from
+ * the bus infrastructure.
+ */
+#define     TXDEFR_BUS_ERR_BIT    0x80000000  /* Set if error (LSB STATE) */
+#define     TXDEFR_BUS_ERRI_BIT   0x40000000  /* Fetch returned error */
+#define     TXDEFR_BUS_STATE_BITS 0x3F000000  /* Bus event/state data */
+#define     TXDEFR_BUS_STATE_S    24
+#define     TXDEFR_BUS_TRIG_BIT   0x00800000  /* Set when bus error seen */
+
+/*
+ * Bus events are collected by background code in a deferred manner unless
+ * selected to trigger an extended interrupt HALT trigger when they occur.
+ */
+#define     TXDEFR_BUS_ICTRL_BIT  0x00000080  /* Enable interrupt trigger */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CT Bank AMA Registers */
+#define TXAMAREG0_REGNUM 24
+#ifdef METAC_1_2
+#define     TXAMAREG0_CTRL_BITS       0x07000000
+#else /* METAC_1_2 */
+#define     TXAMAREG0_RCOFF_BIT       0x08000000
+#define     TXAMAREG0_DLINEHLT_BIT    0x04000000
+#define     TXAMAREG0_DLINEDIS_BIT    0x02000000
+#define     TXAMAREG0_CYCSTRICT_BIT   0x01000000
+#define     TXAMAREG0_CTRL_BITS       (TXAMAREG0_RCOFF_BIT |    \
+				       TXAMAREG0_DLINEHLT_BIT | \
+				       TXAMAREG0_DLINEDIS_BIT | \
+				       TXAMAREG0_CYCSTRICT_BIT)
+#endif /* !METAC_1_2 */
+#define     TXAMAREG0_CTRL_S           24
+#define     TXAMAREG0_MDM_BIT         0x00400000
+#define     TXAMAREG0_MPF_BIT         0x00200000
+#define     TXAMAREG0_MPE_BIT         0x00100000
+#define     TXAMAREG0_MASK_BITS       (TXAMAREG0_MDM_BIT | \
+				       TXAMAREG0_MPF_BIT | \
+				       TXAMAREG0_MPE_BIT)
+#define     TXAMAREG0_MASK_S          20
+#define     TXAMAREG0_SDM_BIT         0x00040000
+#define     TXAMAREG0_SPF_BIT         0x00020000
+#define     TXAMAREG0_SPE_BIT         0x00010000
+#define     TXAMAREG0_STATUS_BITS     (TXAMAREG0_SDM_BIT | \
+				       TXAMAREG0_SPF_BIT | \
+				       TXAMAREG0_SPE_BIT)
+#define     TXAMAREG0_STATUS_S        16
+#define     TXAMAREG0_PRIORITY_BITS   0x0000FF00
+#define     TXAMAREG0_PRIORITY_S      8
+#define     TXAMAREG0_BVALUE_BITS     0x000000FF
+#define     TXAMAREG0_BVALUE_S  0
+
+#define TXAMAREG1_REGNUM 25
+#define     TXAMAREG1_DELAYC_BITS     0x07FFFFFF
+#define     TXAMAREG1_DELAYC_S  0
+
+#define TXAMAREG2_REGNUM 26
+#ifdef METAC_1_2
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFFF
+#define     TXAMAREG2_DLINEC_S        0
+#else /* METAC_1_2 */
+#define     TXAMAREG2_IRQPRIORITY_BIT 0xFF000000
+#define     TXAMAREG2_IRQPRIORITY_S   24
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFF0
+#define     TXAMAREG2_DLINEC_S        4
+#endif /* !METAC_1_2 */
+
+#define TXAMAREG3_REGNUM 27
+#define     TXAMAREG2_AMABLOCK_BIT    0x00080000
+#define     TXAMAREG2_AMAC_BITS       0x0000FFFF
+#define     TXAMAREG2_AMAC_S          0
+
+/*****************************************************************************
+ *                                FPU EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following registers only exist in FPU enabled cores.
+ */
+
+/*
+ * TXMODE register - FPU rounding mode control/status fields
+ */
+#define     TXMODE_FPURMODE_BITS     0x00030000
+#define     TXMODE_FPURMODE_S        16
+#define     TXMODE_FPURMODEWRITE_BIT 0x00040000  /* Set to change FPURMODE */
+
+/*
+ * TXDEFR register - FPU exception handling/state is a significant source
+ *   of deferrable errors. Run-time S/W can move handling to interrupt level
+ *   using DEFR instruction to collect state.
+ */
+#define     TXDEFR_FPE_FE_BITS       0x003F0000  /* Set by FPU_FE events */
+#define     TXDEFR_FPE_FE_S          16
+
+#define     TXDEFR_FPE_INEXACT_FE_BIT   0x010000
+#define     TXDEFR_FPE_UNDERFLOW_FE_BIT 0x020000
+#define     TXDEFR_FPE_OVERFLOW_FE_BIT  0x040000
+#define     TXDEFR_FPE_DIVBYZERO_FE_BIT 0x080000
+#define     TXDEFR_FPE_INVALID_FE_BIT   0x100000
+#define     TXDEFR_FPE_DENORMAL_FE_BIT  0x200000
+
+#define     TXDEFR_FPE_ICTRL_BITS    0x000003F   /* Route to interrupts */
+#define     TXDEFR_FPE_ICTRL_S       0
+
+#define     TXDEFR_FPE_INEXACT_ICTRL_BIT   0x01
+#define     TXDEFR_FPE_UNDERFLOW_ICTRL_BIT 0x02
+#define     TXDEFR_FPE_OVERFLOW_ICTRL_BIT  0x04
+#define     TXDEFR_FPE_DIVBYZERO_ICTRL_BIT 0x08
+#define     TXDEFR_FPE_INVALID_ICTRL_BIT   0x10
+#define     TXDEFR_FPE_DENORMAL_ICTRL_BIT  0x20
+
+/*
+ * DETAILED FPU RELATED VALUES
+ * ---------------------------
+ */
+
+/*
+ * Rounding mode field in TXMODE can hold a number of logical values
+ */
+#define METAG_FPURMODE_TONEAREST  0x0      /* Default */
+#define METAG_FPURMODE_TOWARDZERO 0x1
+#define METAG_FPURMODE_UPWARD     0x2
+#define METAG_FPURMODE_DOWNWARD   0x3
+
+/*
+ * In order to set the TXMODE register field that controls the rounding mode
+ * an extra bit must be set in the value written versus that read in order
+ * to gate writes to the rounding mode field. This allows other non-FPU code
+ * to modify TXMODE without knowledge of the FPU units presence and not
+ * influence the FPU rounding mode. This macro adds the required bit so new
+ * rounding modes are accepted.
+ */
+#define TXMODE_FPURMODE_SET(FPURMode) \
+	(TXMODE_FPURMODEWRITE_BIT + ((FPURMode)<<TXMODE_FPURMODE_S))
+
+/*
+ * To successfully restore TXMODE to zero at the end of the function the
+ * following value (rather than zero) must be used.
+ */
+#define TXMODE_FPURMODE_RESET (TXMODE_FPURMODEWRITE_BIT)
+
+/*
+ * In TXSTATUS a special bit exists to indicate if FPU H/W has been accessed
+ * since it was last reset.
+ */
+#define TXSTATUS_FPACTIVE_BIT  0x01000000
+
+/*
+ * Exception state (see TXDEFR_FPU_FE_*) and enabling (for interrupt
+ * level processing (see TXDEFR_FPU_ICTRL_*) are controlled by similar
+ * bit mask locations within each field.
+ */
+#define METAG_FPU_FE_INEXACT   0x01
+#define METAG_FPU_FE_UNDERFLOW 0x02
+#define METAG_FPU_FE_OVERFLOW  0x04
+#define METAG_FPU_FE_DIVBYZERO 0x08
+#define METAG_FPU_FE_INVALID   0x10
+#define METAG_FPU_FE_DENORMAL  0x20
+#define METAG_FPU_FE_ALL_EXCEPT (METAG_FPU_FE_INEXACT   | \
+				 METAG_FPU_FE_UNDERFLOW | \
+				 METAG_FPU_FE_OVERFLOW  | \
+				 METAG_FPU_FE_DIVBYZERO | \
+				 METAG_FPU_FE_INVALID   | \
+				 METAG_FPU_FE_DENORMAL)
+
+/*****************************************************************************
+ *             THREAD CONTROL, ERROR, OR INTERRUPT STATE EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that externally controls
+ * threads, handles errors/interrupts, and/or set-up interrupt/error handlers
+ * for subsequent use.
+ */
+
+/*
+ * TXENABLE register fields - only ENABLE_BIT is potentially read/write
+ */
+#define TXENABLE_MAJOR_REV_BITS    0xFF000000
+#define TXENABLE_MAJOR_REV_S       24
+#define TXENABLE_MINOR_REV_BITS    0x00FF0000
+#define TXENABLE_MINOR_REV_S       16
+#define TXENABLE_CLASS_BITS        0x0000F000
+#define TXENABLE_CLASS_S           12
+#define TXENABLE_CLASS_DSP             0x0 /* -> DSP Thread */
+#define TXENABLE_CLASS_LDSP            0x8 /* -> DSP LITE Thread */
+#define TXENABLE_CLASS_GP              0xC /* -> General Purpose Thread */
+#define     TXENABLE_CLASSALT_LFPU       0x2 /*  Set to indicate LITE FPU */
+#define     TXENABLE_CLASSALT_FPUR8      0x1 /*  Set to indicate 8xFPU regs */
+#define TXENABLE_MTXARCH_BIT       0x00000800
+#define TXENABLE_STEP_REV_BITS     0x000000F0
+#define TXENABLE_STEP_REV_S        4
+#define TXENABLE_STOPPED_BIT       0x00000004   /* TXOFF due to ENABLE->0 */
+#define TXENABLE_OFF_BIT           0x00000002   /* Thread is in off state */
+#define TXENABLE_ENABLE_BIT        0x00000001   /* Set if running */
+
+/*
+ * TXSTATUS register - used by external/internal interrupt/error handler
+ */
+#define TXSTATUS_CB1MARKER_BIT     0x00800000   /* -> int level mem state */
+#define TXSTATUS_CBMARKER_BIT      0x00400000   /* -> mem i/f state dumped */
+#define TXSTATUS_MEM_FAULT_BITS    0x00300000
+#define TXSTATUS_MEM_FAULT_S       20
+#define     TXSTATUS_MEMFAULT_NONE  0x0 /* -> No memory fault       */
+#define     TXSTATUS_MEMFAULT_GEN   0x1 /* -> General fault         */
+#define     TXSTATUS_MEMFAULT_PF    0x2 /* -> Page fault            */
+#define     TXSTATUS_MEMFAULT_RO    0x3 /* -> Read only fault       */
+#define TXSTATUS_MAJOR_HALT_BITS   0x000C0000
+#define TXSTATUS_MAJOR_HALT_S      18
+#define     TXSTATUS_MAJHALT_TRAP 0x0   /* -> SWITCH inst used      */
+#define     TXSTATUS_MAJHALT_INST 0x1   /* -> Unknown inst or fetch */
+#define     TXSTATUS_MAJHALT_PRIV 0x2   /* -> Internal privilege    */
+#define     TXSTATUS_MAJHALT_MEM  0x3   /* -> Memory i/f fault      */
+#define TXSTATUS_L_STEP_BITS       0x00000800   /* -> Progress of L oper    */
+#define TXSTATUS_LSM_STEP_BITS     0x00000700   /* -> Progress of L/S mult  */
+#define TXSTATUS_LSM_STEP_S        8
+#define TXSTATUS_FLAG_BITS         0x0000001F   /* -> All the flags         */
+#define TXSTATUS_SCC_BIT           0x00000010   /* -> Split-16 flags ...    */
+#define TXSTATUS_SCF_LZ_BIT        0x00000008   /* -> Split-16 Low  Z flag  */
+#define TXSTATUS_SCF_HZ_BIT        0x00000004   /* -> Split-16 High Z flag  */
+#define TXSTATUS_SCF_HC_BIT        0x00000002   /* -> Split-16 High C flag  */
+#define TXSTATUS_SCF_LC_BIT        0x00000001   /* -> Split-16 Low  C flag  */
+#define TXSTATUS_CF_Z_BIT          0x00000008   /* -> Condition Z flag      */
+#define TXSTATUS_CF_N_BIT          0x00000004   /* -> Condition N flag      */
+#define TXSTATUS_CF_O_BIT          0x00000002   /* -> Condition O flag      */
+#define TXSTATUS_CF_C_BIT          0x00000001   /* -> Condition C flag      */
+
+/*
+ * TXCATCH0-3 register contents may store information on a memory operation
+ * that has failed if the bit TXSTATUS_CBMARKER_BIT is set.
+ */
+#define TXCATCH0_REGNUM 16
+#define TXCATCH1_REGNUM 17
+#define     TXCATCH1_ADDR_BITS   0xFFFFFFFF   /* TXCATCH1 is Addr 0-31 */
+#define     TXCATCH1_ADDR_S      0
+#define TXCATCH2_REGNUM 18
+#define     TXCATCH2_DATA0_BITS  0xFFFFFFFF   /* TXCATCH2 is Data 0-31 */
+#define     TXCATCH2_DATA0_S     0
+#define TXCATCH3_REGNUM 19
+#define     TXCATCH3_DATA1_BITS  0xFFFFFFFF   /* TXCATCH3 is Data 32-63 */
+#define     TXCATCH3_DATA1_S     0
+
+/*
+ * Detailed catch state information
+ * --------------------------------
+ */
+
+/* Contents of TXCATCH0 register */
+#define     TXCATCH0_LDRXX_BITS  0xF8000000  /* Load destination reg 0-31 */
+#define     TXCATCH0_LDRXX_S     27
+#define     TXCATCH0_LDDST_BITS  0x07FF0000  /* Load destination bits */
+#define     TXCATCH0_LDDST_S     16
+#define         TXCATCH0_LDDST_D1DSP 0x400   /* One bit set if it's a LOAD */
+#define         TXCATCH0_LDDST_D0DSP 0x200
+#define         TXCATCH0_LDDST_TMPLT 0x100
+#define         TXCATCH0_LDDST_TR    0x080
+#ifdef METAC_2_1
+#define         TXCATCH0_LDDST_FPU   0x040
+#endif
+#define         TXCATCH0_LDDST_PC    0x020
+#define         TXCATCH0_LDDST_A1    0x010
+#define         TXCATCH0_LDDST_A0    0x008
+#define         TXCATCH0_LDDST_D1    0x004
+#define         TXCATCH0_LDDST_D0    0x002
+#define         TXCATCH0_LDDST_CT    0x001
+#ifdef METAC_2_1
+#define     TXCATCH0_WATCHSTOP_BIT 0x00004000  /* Set if Data Watch set fault */
+#endif
+#define     TXCATCH0_WATCHS_BIT  0x00004000  /* Set if Data Watch set fault */
+#define     TXCATCH0_WATCH1_BIT  0x00002000  /* Set if Data Watch 1 matches */
+#define     TXCATCH0_WATCH0_BIT  0x00001000  /* Set if Data Watch 0 matches */
+#define     TXCATCH0_FAULT_BITS  0x00000C00  /* See TXSTATUS_MEMFAULT_*     */
+#define     TXCATCH0_FAULT_S     10
+#define     TXCATCH0_PRIV_BIT    0x00000200  /* Privilege of transaction    */
+#define     TXCATCH0_READ_BIT    0x00000100  /* Set for Read or Load cases  */
+
+#ifdef METAC_2_1
+/* LNKGET Marker bit in TXCATCH0 */
+#define   TXCATCH0_LNKGET_MARKER_BIT 0x00000008
+#define       TXCATCH0_PREPROC_BIT  0x00000004
+#endif
+
+/* Loads are indicated by one of the LDDST bits being set */
+#define     TXCATCH0_LDM16_BIT   0x00000004  /* Load M16 flag */
+#define     TXCATCH0_LDL2L1_BITS 0x00000003  /* Load data size L2,L1 */
+#define     TXCATCH0_LDL2L1_S    0
+
+/* Reads are indicated by the READ bit being set without LDDST bits */
+#define     TXCATCH0_RAXX_BITS   0x0000001F  /* RAXX issue port for read */
+#define     TXCATCH0_RAXX_S      0
+
+/* Write operations are all that remain if READ bit is not set */
+#define     TXCATCH0_WMASK_BITS  0x000000FF  /* Write byte lane mask */
+#define     TXCATCH0_WMASK_S     0
+
+#ifdef METAC_2_1
+
+/* When a FPU exception is signalled then FPUSPEC == FPUSPEC_TAG */
+#define     TXCATCH0_FPURDREG_BITS    0xF8000000
+#define     TXCATCH0_FPURDREG_S       27
+#define     TXCATCH0_FPUR1REG_BITS    0x07C00000
+#define     TXCATCH0_FPUR1REG_S       22
+#define     TXCATCH0_FPUSPEC_BITS     0x000F0000
+#define     TXCATCH0_FPUSPEC_S        16
+#define         TXCATCH0_FPUSPEC_TAG      0xF
+#define     TXCATCH0_FPUINSTA_BIT     0x00001000
+#define     TXCATCH0_FPUINSTQ_BIT     0x00000800
+#define     TXCATCH0_FPUINSTZ_BIT     0x00000400
+#define     TXCATCH0_FPUINSTN_BIT     0x00000200
+#define     TXCATCH0_FPUINSTO3O_BIT   0x00000100
+#define     TXCATCH0_FPUWIDTH_BITS    0x000000C0
+#define     TXCATCH0_FPUWIDTH_S       6
+#define         TXCATCH0_FPUWIDTH_FLOAT   0
+#define         TXCATCH0_FPUWIDTH_DOUBLE  1
+#define         TXCATCH0_FPUWIDTH_PAIRED  2
+#define     TXCATCH0_FPUOPENC_BITS    0x0000003F
+#define     TXCATCH0_FPUOPENC_S       0
+#define         TXCATCH0_FPUOPENC_ADD     0  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUB     1  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUL     2  /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ATOI    3  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_ATOX    4  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ITOA    5  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_XTOA    6  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ATOH    7  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_HTOA    8  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_DTOF    9  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_FTOD    10 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOL    11 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_LTOD    12 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOXL   13 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_XLTOD   14 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_CMP     15 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MIN     16 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MAX     17 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ADDRE   18 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUBRE   19 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MULRE   20 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MXA     21 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MXAS    22 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MAR     23 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MARS    24 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUZ     25 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MUZS    26 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_RCP     27 /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_RSQ     28 /* rop2=Rs */
+
+/* For floating point exceptions TXCATCH1 is used to carry extra data */
+#define     TXCATCH1_FPUR2REG_BITS    0xF8000000
+#define     TXCATCH1_FPUR2REG_S       27
+#define     TXCATCH1_FPUR3REG_BITS    0x07C00000  /* Undefined if O3O set */
+#define     TXCATCH1_FPUR3REG_S       22
+#define     TXCATCH1_FPUIMM16_BITS    0x0000FFFF
+#define     TXCATCH1_FPUIMM16_S       0
+
+#endif /* METAC_2_1 */
+
+/*
+ * TXDIVTIME register used to hold the partial base address of memory i/f
+ * state dump area. Now deprecated.
+ */
+#define     TXDIVTIME_CBBASE_MASK    0x03FFFE00
+#define     TXDIVTIME_CBBASE_LINBASE 0x80000000
+#define     TXDIVTIME_CBBASE_LINBOFF 0x00000000 /* BGnd state */
+#define     TXDIVTIME_CBBASE_LINIOFF 0x00000100 /* Int  state */
+
+/*
+ * TXDIVTIME register used to indicate if the read pipeline was dirty when a
+ * thread was interrupted, halted, or generated an exception. It is invalid
+ * to attempt to issue a further pipeline read address while the read
+ * pipeline is in the dirty state.
+ */
+#define     TXDIVTIME_RPDIRTY_BIT   0x80000000
+
+/*
+ * Further bits in the TXDIVTIME register allow interrupt handling code to
+ * short-cut the discovery the most significant bit last read from TXSTATI.
+ *
+ * This is the bit number of the trigger line that a low level interrupt
+ * handler should acknowledge and then perhaps the index of a corresponding
+ * handler function.
+ */
+#define     TXDIVTIME_IRQENC_BITS   0x0F000000
+#define     TXDIVTIME_IRQENC_S      24
+
+/*
+ * If TXDIVTIME_RPVALID_BIT is set the read pipeline contained significant
+ * information when the thread was interrupted|halted|exceptioned. Each slot
+ * containing data is indicated by a one bit in the corresponding
+ * TXDIVTIME_RPMASK_BITS bit (least significance bit relates to first
+ * location in read pipeline - most likely to have the 1 state). Empty slots
+ * contain zeroes with no interlock applied on reads if RPDIRTY is currently
+ * set with RPMASK itself being read-only state.
+ */
+#define     TXDIVTIME_RPMASK_BITS 0x003F0000   /* -> Full (1) Empty (0) */
+#define     TXDIVTIME_RPMASK_S    16
+
+/*
+ * TXPRIVEXT register can be used to single step thread execution and
+ * enforce synchronous memory i/f address checking for debugging purposes.
+ */
+#define     TXPRIVEXT_TXSTEP_BIT    0x00000004
+#define     TXPRIVEXT_MEMCHECK_BIT  0x00000002
+
+/*
+ * TXINTERNx registers holds internal state information for H/W debugging only
+ */
+#define TXINTERN0_REGNUM 23
+#define     TXINTERN0_LOCK2_BITS  0xF0000000
+#define     TXINTERN0_LOCK2_S     28
+#define     TXINTERN0_LOCK1_BITS  0x0F000000
+#define     TXINTERN0_LOCK1_S     24
+#define     TXINTERN0_TIFDF_BITS  0x0000F000
+#define     TXINTERN0_TIFDF_S     12
+#define     TXINTERN0_TIFIB_BITS  0x00000F00
+#define     TXINTERN0_TIFIB_S     8
+#define     TXINTERN0_TIFAF_BITS  0x000000F0
+#define     TXINTERN0_TIFAF_S     4
+#define     TXINTERN0_MSTATE_BITS 0x0000000F
+#define     TXINTERN0_MSTATE_S    0
+
+/*
+ * TXSTAT, TXMASK, TXPOLL, TXSTATI, TXMASKI, TXPOLLI registers from trigger
+ * bank all have similar contents (upper kick count bits not in MASK regs)
+ */
+#define TXSTAT_REGNUM  0
+#define     TXSTAT_TIMER_BIT    0x00000001
+#define     TXSTAT_TIMER_S      0
+#define     TXSTAT_KICK_BIT     0x00000002
+#define     TXSTAT_KICK_S       1
+#define     TXSTAT_DEFER_BIT    0x00000008
+#define     TXSTAT_DEFER_S      3
+#define     TXSTAT_EXTTRIG_BITS 0x0000FFF0
+#define     TXSTAT_EXTTRIG_S    4
+#define     TXSTAT_FPE_BITS     0x003F0000
+#define     TXSTAT_FPE_S        16
+#define     TXSTAT_FPE_DENORMAL_BIT    0x00200000
+#define     TXSTAT_FPE_DENORMAL_S      21
+#define     TXSTAT_FPE_INVALID_BIT     0x00100000
+#define     TXSTAT_FPE_INVALID_S       20
+#define     TXSTAT_FPE_DIVBYZERO_BIT   0x00080000
+#define     TXSTAT_FPE_DIVBYZERO_S     19
+#define     TXSTAT_FPE_OVERFLOW_BIT    0x00040000
+#define     TXSTAT_FPE_OVERFLOW_S      18
+#define     TXSTAT_FPE_UNDERFLOW_BIT   0x00020000
+#define     TXSTAT_FPE_UNDERFLOW_S     17
+#define     TXSTAT_FPE_INEXACT_BIT     0x00010000
+#define     TXSTAT_FPE_INEXACT_S       16
+#define     TXSTAT_BUSERR_BIT          0x00800000   /* Set if bus error/ack state */
+#define     TXSTAT_BUSERR_S            23
+#define         TXSTAT_BUSSTATE_BITS     0xFF000000 /* Read only */
+#define         TXSTAT_BUSSTATE_S        24
+#define     TXSTAT_KICKCNT_BITS 0xFFFF0000
+#define     TXSTAT_KICKCNT_S    16
+#define TXMASK_REGNUM  1
+#define TXSTATI_REGNUM 2
+#define     TXSTATI_BGNDHALT_BIT    0x00000004
+#define TXMASKI_REGNUM 3
+#define TXPOLL_REGNUM  4
+#define TXPOLLI_REGNUM 6
+
+/*
+ * TXDRCTRL register can be used to partition the DSP RAM space available to
+ * this thread at startup. This is achieved by offsetting the region allocated
+ * to each thread.
+ */
+#define     TXDRCTRL_D1PARTOR_BITS  0x00000F00  /* OR's into top 4 bits */
+#define     TXDRCTRL_D1PARTOR_S     8
+#define     TXDRCTRL_D0PARTOR_BITS  0x0000000F  /* OR's into top 4 bits */
+#define     TXDRCTRL_D0PARTOR_S     0
+/* Given extracted Pow and Or fields this is threads base within DSP RAM */
+#define     TXDRCTRL_DXBASE(Pow, Or)  ((Or)<<((Pow)-4))
+
+/*****************************************************************************
+ *                      RUN TIME TRACE CONTROL REGISTERS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that implements run-time
+ *  trace features within the META Core
+ */
+#define TTEXEC      TT.0
+#define TTCTRL      TT.1
+#define TTMARK      TT.2
+#define TTREC       TT.3
+#define GTEXEC      TT.4
+
+#define TTEXEC_REGNUM               0
+#define     TTEXEC_EXTTRIGAND_BITS      0x7F000000
+#define     TTEXEC_EXTTRIGAND_S         24
+#define     TTEXEC_EXTTRIGEN_BIT        0x00008000
+#define     TTEXEC_EXTTRIGMATCH_BITS    0x00007F00
+#define     TTEXEC_EXTTRIGMATCH_S       8
+#define     TTEXEC_TCMODE_BITS          0x00000003
+#define     TTEXEC_TCMODE_S             0
+
+#define TTCTRL_REGNUM               1
+#define     TTCTRL_TRACETT_BITS         0x00008000
+#define     TTCTRL_TRACETT_S            15
+#define     TTCTRL_TRACEALL_BITS        0x00002000
+#define     TTCTRL_TRACEALL_S           13
+#ifdef METAC_2_1
+#define     TTCTRL_TRACEALLTAG_BITS     0x00000400
+#define     TTCTRL_TRACEALLTAG_S        10
+#endif /* METAC_2_1 */
+#define     TTCTRL_TRACETAG_BITS        0x00000200
+#define     TTCTRL_TRACETAG_S           9
+#define     TTCTRL_TRACETTPC_BITS       0x00000080
+#define     TTCTRL_TRACETTPC_S          7
+#define     TTCTRL_TRACEMPC_BITS        0x00000020
+#define     TTCTRL_TRACEMPC_S           5
+#define     TTCTRL_TRACEEN_BITS         0x00000008
+#define     TTCTRL_TRACEEN_S            3
+#define     TTCTRL_TRACEEN1_BITS        0x00000004
+#define     TTCTRL_TRACEEN1_S           2
+#define     TTCTRL_TRACEPC_BITS         0x00000002
+#define     TTCTRL_TRACEPC_S            1
+
+#ifdef METAC_2_1
+#define TTMARK_REGNUM   2
+#define TTMARK_BITS                 0xFFFFFFFF
+#define TTMARK_S                    0x0
+
+#define TTREC_REGNUM    3
+#define TTREC_BITS                  0xFFFFFFFFFFFFFFFF
+#define TTREC_S                     0x0
+#endif /* METAC_2_1 */
+
+#define GTEXEC_REGNUM               4
+#define     GTEXEC_DCRUN_BITS           0x80000000
+#define     GTEXEC_DCRUN_S              31
+#define     GTEXEC_ICMODE_BITS          0x0C000000
+#define     GTEXEC_ICMODE_S             26
+#define     GTEXEC_TCMODE_BITS          0x03000000
+#define     GTEXEC_TCMODE_S             24
+#define     GTEXEC_PERF1CMODE_BITS      0x00040000
+#define     GTEXEC_PERF1CMODE_S         18
+#define     GTEXEC_PERF0CMODE_BITS      0x00010000
+#define     GTEXEC_PERF0CMODE_S         16
+#define     GTEXEC_REFMSEL_BITS         0x0000F000
+#define     GTEXEC_REFMSEL_S            12
+#define     GTEXEC_METRICTH_BITS        0x000003FF
+#define     GTEXEC_METRICTH_S           0
+
+#ifdef METAC_2_1
+/*
+ * Clock Control registers
+ * -----------------------
+ */
+#define TXCLKCTRL_REGNUM        22
+
+/*
+ * Default setting is with clocks always on (DEFON), turning all clocks off
+ * can only be done from external devices (OFF), enabling automatic clock
+ * gating will allow clocks to stop as units fall idle.
+ */
+#define TXCLKCTRL_ALL_OFF       0x02222222
+#define TXCLKCTRL_ALL_DEFON     0x01111111
+#define TXCLKCTRL_ALL_AUTO      0x02222222
+
+/*
+ * Individual fields control caches, floating point and main data/addr units
+ */
+#define TXCLKCTRL_CLOCKIC_BITS  0x03000000
+#define TXCLKCTRL_CLOCKIC_S     24
+#define TXCLKCTRL_CLOCKDC_BITS  0x00300000
+#define TXCLKCTRL_CLOCKDC_S     20
+#define TXCLKCTRL_CLOCKFP_BITS  0x00030000
+#define TXCLKCTRL_CLOCKFP_S     16
+#define TXCLKCTRL_CLOCKD1_BITS  0x00003000
+#define TXCLKCTRL_CLOCKD1_S     12
+#define TXCLKCTRL_CLOCKD0_BITS  0x00000300
+#define TXCLKCTRL_CLOCKD0_S     8
+#define TXCLKCTRL_CLOCKA1_BITS  0x00000030
+#define TXCLKCTRL_CLOCKA1_S     4
+#define TXCLKCTRL_CLOCKA0_BITS  0x00000003
+#define TXCLKCTRL_CLOCKA0_S     0
+
+/*
+ * Individual settings for each field are common
+ */
+#define TXCLKCTRL_CLOCKxx_OFF   0
+#define TXCLKCTRL_CLOCKxx_DEFON 1
+#define TXCLKCTRL_CLOCKxx_AUTO  2
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Fast interrupt new bits
+ * ------------------------------------
+ */
+#define TXSTATUS_IPTOGGLE_BIT           0x80000000 /* Prev PToggle of TXPRIVEXT */
+#define TXSTATUS_ISTATE_BIT             0x40000000 /* IState bit */
+#define TXSTATUS_IWAIT_BIT              0x20000000 /* wait indefinitely in decision step*/
+#define TXSTATUS_IEXCEPT_BIT            0x10000000 /* Indicate an exception occured */
+#define TXSTATUS_IRPCOUNT_BITS          0x0E000000 /* Number of 'dirty' date entries*/
+#define TXSTATUS_IRPCOUNT_S             25
+#define TXSTATUS_IRQSTAT_BITS           0x0000F000 /* IRQEnc bits, trigger or interrupts */
+#define TXSTATUS_IRQSTAT_S              12
+#define TXSTATUS_LNKSETOK_BIT           0x00000020 /* LNKSetOK bit, successful LNKSET */
+
+/* New fields in TXDE for fast interrupt system */
+#define TXDIVTIME_IACTIVE_BIT           0x00008000 /* Enable new interrupt system */
+#define TXDIVTIME_INONEST_BIT           0x00004000 /* Gate nested interrupt */
+#define TXDIVTIME_IREGIDXGATE_BIT       0x00002000 /* gate of the IRegIdex field */
+#define TXDIVTIME_IREGIDX_BITS          0x00001E00 /* Index of A0.0/1 replaces */
+#define TXDIVTIME_IREGIDX_S             9
+#define TXDIVTIME_NOST_BIT              0x00000100 /* disable superthreading bit */
+#endif
+
+#endif /* _ASM_METAG_REGS_H_ */
diff --git a/arch/metag/include/asm/mman.h b/arch/metag/include/asm/mman.h
new file mode 100644
index 0000000..17999db
--- /dev/null
+++ b/arch/metag/include/asm/mman.h
@@ -0,0 +1,11 @@
+#ifndef __METAG_MMAN_H__
+#define __METAG_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check metag_mmap_check
+int metag_mmap_check(unsigned long addr, unsigned long len,
+		     unsigned long flags);
+#endif
+#endif /* __METAG_MMAN_H__ */
diff --git a/arch/metag/include/asm/mmu.h b/arch/metag/include/asm/mmu.h
new file mode 100644
index 0000000..9c32114
--- /dev/null
+++ b/arch/metag/include/asm/mmu.h
@@ -0,0 +1,77 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#ifdef CONFIG_METAG_USER_TCM
+#include <linux/list.h>
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#include <asm/page.h>
+#endif
+
+typedef struct {
+	/* Software pgd base pointer used for Meta 1.x MMU. */
+	unsigned long pgd_base;
+#ifdef CONFIG_METAG_USER_TCM
+	struct list_head tcm;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#if HPAGE_SHIFT < HUGEPT_SHIFT
+	/* last partially filled huge page table address */
+	unsigned long part_huge;
+#endif
+#endif
+} mm_context_t;
+
+/* Given a virtual address, return the pte for the top level 4meg entry
+ * that maps that address.
+ * Returns 0 (an empty pte) if that range is not mapped.
+ */
+unsigned long mmu_read_first_level_page(unsigned long vaddr);
+
+/* Given a linear (virtual) address, return the second level 4k pte
+ * that maps that address.  Returns 0 if the address is not mapped.
+ */
+unsigned long mmu_read_second_level_page(unsigned long vaddr);
+
+/* Get the virtual base address of the MMU */
+unsigned long mmu_get_base(void);
+
+/* Initialize the MMU. */
+void mmu_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META21_MMU
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS0 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS0 if running in global-space.
+ */
+static inline unsigned long mmu_phys0_addr(unsigned int cpu)
+{
+	unsigned long phys0;
+
+	phys0 = (MMCU_T0LOCAL_TABLE_PHYS0 +
+		(MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+		(MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+	return phys0;
+}
+
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS1 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS1 if running in global-space.
+ */
+static inline unsigned long mmu_phys1_addr(unsigned int cpu)
+{
+	unsigned long phys1;
+
+	phys1 = (MMCU_T0LOCAL_TABLE_PHYS1 +
+		(MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+		(MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+	return phys1;
+}
+#endif /* CONFIG_METAG_META21_MMU */
+
+#endif
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
new file mode 100644
index 0000000..ae2a71b
--- /dev/null
+++ b/arch/metag/include/asm/mmu_context.h
@@ -0,0 +1,113 @@
+#ifndef __METAG_MMU_CONTEXT_H
+#define __METAG_MMU_CONTEXT_H
+
+#include <asm-generic/mm_hooks.h>
+
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include <linux/io.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+				  struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+#ifndef CONFIG_METAG_META21_MMU
+	/* We use context to store a pointer to the page holding the
+	 * pgd of a process while it is running. While a process is not
+	 * running the pgd and context fields should be equal.
+	 */
+	mm->context.pgd_base = (unsigned long) mm->pgd;
+#endif
+#ifdef CONFIG_METAG_USER_TCM
+	INIT_LIST_HEAD(&mm->context.tcm);
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#include <linux/slab.h>
+#include <asm/tcm.h>
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+	struct tcm_allocation *pos, *n;
+
+	list_for_each_entry_safe(pos, n,  &mm->context.tcm, list) {
+		tcm_free(pos->tag, pos->addr, pos->size);
+		list_del(&pos->list);
+		kfree(pos);
+	}
+}
+#else
+#define destroy_context(mm)		do { } while (0)
+#endif
+
+#ifdef CONFIG_METAG_META21_MMU
+static inline void load_pgd(pgd_t *pgd, int thread)
+{
+	unsigned long phys0 = mmu_phys0_addr(thread);
+	unsigned long phys1 = mmu_phys1_addr(thread);
+
+	/*
+	 *  0x900 2Gb address space
+	 *  The permission bits apply to MMU table region which gives a 2MB
+	 *  window into physical memory. We especially don't want userland to be
+	 *  able to access this.
+	 */
+	metag_out32(0x900 | _PAGE_CACHEABLE | _PAGE_PRIV | _PAGE_WRITE |
+		    _PAGE_PRESENT, phys0);
+	/* Set new MMU base address */
+	metag_out32(__pa(pgd) & MMCU_TBLPHYS1_ADDR_BITS, phys1);
+}
+#endif
+
+static inline void switch_mmu(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_METAG_META21_MMU
+	load_pgd(next->pgd, hard_processor_id());
+#else
+	unsigned int i;
+
+	/* prev->context == prev->pgd in the case where we are initially
+	   switching from the init task to the first process. */
+	if (prev->context.pgd_base != (unsigned long) prev->pgd) {
+		for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+			((pgd_t *) prev->context.pgd_base)[i] = prev->pgd[i];
+	} else
+		prev->pgd = (pgd_t *)mmu_get_base();
+
+	next->pgd = prev->pgd;
+	prev->pgd = (pgd_t *) prev->context.pgd_base;
+
+	for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+		next->pgd[i] = ((pgd_t *) next->context.pgd_base)[i];
+
+	flush_cache_all();
+#endif
+	flush_tlb_all();
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	if (prev != next)
+		switch_mmu(prev, next);
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	switch_mmu(prev_mm, next_mm);
+}
+
+#define deactivate_mm(tsk, mm)   do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/mmzone.h b/arch/metag/include/asm/mmzone.h
new file mode 100644
index 0000000..9c88a9c
--- /dev/null
+++ b/arch/metag/include/asm/mmzone.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_MMZONE_H
+#define __ASM_METAG_MMZONE_H
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)		(node_data[nid])
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	int nid;
+
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid))
+			break;
+
+	return nid;
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+	return NODE_DATA(pfn_to_nid(pfn));
+}
+
+/* arch/metag/mm/numa.c */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end);
+#else
+static inline void
+setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_NUMA
+/* SoC specific mem init */
+void __init soc_mem_setup(void);
+#else
+static inline void __init soc_mem_setup(void) {};
+#endif
+
+#endif /* __ASM_METAG_MMZONE_H */
diff --git a/arch/metag/include/asm/module.h b/arch/metag/include/asm/module.h
new file mode 100644
index 0000000..e47e609
--- /dev/null
+++ b/arch/metag/include/asm/module.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_METAG_MODULE_H
+#define _ASM_METAG_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct metag_plt_entry {
+	/* Indirect jump instruction sequence. */
+	unsigned long tramp[2];
+};
+
+struct mod_arch_specific {
+	/* Indices of PLT sections within module. */
+	unsigned int core_plt_section, init_plt_section;
+};
+
+#if defined CONFIG_METAG_META12
+#define MODULE_PROC_FAMILY "META 1.2 "
+#elif defined CONFIG_METAG_META21
+#define MODULE_PROC_FAMILY "META 2.1 "
+#else
+#define MODULE_PROC_FAMILY ""
+#endif
+
+#ifdef CONFIG_4KSTACKS
+#define MODULE_STACKSIZE "4KSTACKS "
+#else
+#define MODULE_STACKSIZE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+
+#ifdef MODULE
+asm(".section .plt,\"ax\",@progbits; .balign 8; .previous");
+asm(".section .init.plt,\"ax\",@progbits; .balign 8; .previous");
+#endif
+
+#endif /* _ASM_METAG_MODULE_H */
diff --git a/arch/metag/include/asm/page.h b/arch/metag/include/asm/page.h
new file mode 100644
index 0000000..1e8e281
--- /dev/null
+++ b/arch/metag/include/asm/page.h
@@ -0,0 +1,128 @@
+#ifndef _METAG_PAGE_H
+#define _METAG_PAGE_H
+
+#include <linux/const.h>
+
+#include <asm/metag_mem.h>
+
+/* PAGE_SHIFT determines the page size */
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define PAGE_SHIFT	12
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define PAGE_SHIFT	13
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define PAGE_SHIFT	14
+#endif
+
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define HPAGE_SHIFT	13
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define HPAGE_SHIFT	14
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define HPAGE_SHIFT	15
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define HPAGE_SHIFT	16
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define HPAGE_SHIFT	17
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define HPAGE_SHIFT	18
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define HPAGE_SHIFT	19
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define HPAGE_SHIFT	20
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define HPAGE_SHIFT	21
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define HPAGE_SHIFT	22
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_SIZE		(1UL << HPAGE_SHIFT)
+# define HPAGE_MASK		(~(HPAGE_SIZE-1))
+# define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT-PAGE_SHIFT)
+/*
+ * We define our own hugetlb_get_unmapped_area so we don't corrupt 2nd level
+ * page tables with normal pages in them.
+ */
+# define HUGEPT_SHIFT		(22)
+# define HUGEPT_ALIGN		(1 << HUGEPT_SHIFT)
+# define HUGEPT_MASK		(HUGEPT_ALIGN - 1)
+# define ALIGN_HUGEPT(x)	ALIGN(x, HUGEPT_ALIGN)
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* On the Meta, we would like to know if the address (heap) we have is
+ * in local or global space.
+ */
+#define is_global_space(addr)	((addr) > 0x7fffffff)
+#define is_local_space(addr)	(!is_global_space(addr))
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg)        clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)     copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)	((x).pte)
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) })
+#define __pgd(x)	((pgd_t) { (x) })
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+/* The kernel must now ALWAYS live at either 0xC0000000 or 0x40000000 - that
+ * being either global or local space.
+ */
+#define PAGE_OFFSET		(CONFIG_PAGE_OFFSET)
+
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define META_MEMORY_BASE  LINGLOBAL_BASE
+#define META_MEMORY_LIMIT LINGLOBAL_LIMIT
+#else
+#define META_MEMORY_BASE  LINLOCAL_BASE
+#define META_MEMORY_LIMIT LINLOCAL_LIMIT
+#endif
+
+/* Offset between physical and virtual mapping of kernel memory. */
+extern unsigned int meta_memoffset;
+
+#define __pa(x) ((unsigned long)(((unsigned long)(x)) - meta_memoffset))
+#define __va(x) ((void *)((unsigned long)(((unsigned long)(x)) + meta_memoffset)))
+
+extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET         (pfn_base)
+#define virt_to_page(kaddr)     pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_virt(page)      __va(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)  pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page)      (page_to_pfn(page) << PAGE_SHIFT)
+#ifdef CONFIG_FLATMEM
+extern unsigned long max_pfn;
+extern unsigned long min_low_pfn;
+#define pfn_valid(pfn)		((pfn) >= min_low_pfn && (pfn) < max_pfn)
+#endif
+
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASSMEBLY__ */
+
+#endif /* _METAG_PAGE_H */
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644
index 0000000..105bbff
--- /dev/null
+++ b/arch/metag/include/asm/perf_event.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_METAG_PERF_EVENT_H
+#define __ASM_METAG_PERF_EVENT_H
+
+#endif /* __ASM_METAG_PERF_EVENT_H */
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
new file mode 100644
index 0000000..275d928
--- /dev/null
+++ b/arch/metag/include/asm/pgalloc.h
@@ -0,0 +1,79 @@
+#ifndef _METAG_PGALLOC_H
+#define _METAG_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+
+#define pmd_populate(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE | page_to_phys(pte)))
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/*
+ * Allocate and free page tables.
+ */
+#ifdef CONFIG_METAG_META21_MMU
+static inline void pgd_ctor(pgd_t *pgd)
+{
+	memcpy(pgd + USER_PTRS_PER_PGD,
+	       swapper_pg_dir + USER_PTRS_PER_PGD,
+	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+#else
+#define pgd_ctor(x)	do { } while (0)
+#endif
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+	if (pgd)
+		pgd_ctor(pgd);
+	return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
+					      __GFP_ZERO);
+	return pte;
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct page *pte;
+	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	if (pte)
+		pgtable_page_ctor(pte);
+	return pte;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	pgtable_page_dtor(pte);
+	__free_page(pte);
+}
+
+#define __pte_free_tlb(tlb, pte, addr)				\
+	do {							\
+		pgtable_page_dtor(pte);				\
+		tlb_remove_page((tlb), (pte));			\
+	} while (0)
+
+#define check_pgt_cache()	do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
new file mode 100644
index 0000000..1cd13d5
--- /dev/null
+++ b/arch/metag/include/asm/pgtable.h
@@ -0,0 +1,370 @@
+/*
+ * Macros and functions to manipulate Meta page tables.
+ */
+
+#ifndef _METAG_PGTABLE_H
+#define _METAG_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define CONSISTENT_START	0xF7000000
+#define CONSISTENT_END		0xF73FFFFF
+#define VMALLOC_START		0xF8000000
+#define VMALLOC_END		0xFFFEFFFF
+#else
+#define CONSISTENT_START	0x77000000
+#define CONSISTENT_END		0x773FFFFF
+#define VMALLOC_START		0x78000000
+#define VMALLOC_END		0x7FFFFFFF
+#endif
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT		MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE		MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV		MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE	MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT	MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1	0x020
+#define _PAGE_CACHE_CTRL0	0x040
+#define _PAGE_CACHE_CTRL1	0x080
+#define _PAGE_ALWAYS_ZERO_2	0x100
+#define _PAGE_ALWAYS_ZERO_3	0x200
+#define _PAGE_ALWAYS_ZERO_4	0x400
+#define _PAGE_ALWAYS_ZERO_5	0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used.  Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED		_PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY		_PAGE_ALWAYS_ZERO_2
+#define _PAGE_FILE		_PAGE_ALWAYS_ZERO_3
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL		_PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0	(MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1	(MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2	(MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3	(MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE		(_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK	(_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+				 _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK		(PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT		1
+#define _PAGE_SZ_4K		(0x0)
+#define _PAGE_SZ_8K		(0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K		(0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K		(0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K		(0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K		(0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K		(0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K		(0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M		(0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M		(0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M		(0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK		(0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ		(_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ		(_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ		(_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE		(_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_4M)
+#endif
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * Meta, we use that, but "fold" the mid level into the top-level page
+ * table.
+ */
+
+/* PGDIR_SHIFT determines the size of the area a second-level page table can
+ * map. This is always 4MB.
+ */
+
+#define PGDIR_SHIFT	22
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * Entries per page directory level: we use a two-level, so
+ * we don't really have any PMD directory physically. First level tables
+ * always map 2Gb (local or global) at a granularity of 4MB, second-level
+ * tables map 4MB with a granularity between 4MB and 4kB (between 1 and
+ * 1024 entries).
+ */
+#define PTRS_PER_PTE	(PGDIR_SIZE/PAGE_SIZE)
+#define HPTRS_PER_PTE	(PGDIR_SIZE/HPAGE_SIZE)
+#define PTRS_PER_PGD	512
+
+#define USER_PTRS_PER_PGD	256
+#define FIRST_USER_ADDRESS	META_MEMORY_BASE
+#define FIRST_USER_PGD_NR	pgd_index(FIRST_USER_ADDRESS)
+
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
+				 _PAGE_ACCESSED | _PAGE_CACHEABLE)
+#define PAGE_SHARED_C	PAGE_SHARED
+#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+#define PAGE_COPY_C	PAGE_COPY
+
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_DIRTY | \
+				 _PAGE_ACCESSED | _PAGE_WRITE | \
+				 _PAGE_CACHEABLE | _PAGE_KERNEL)
+
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY
+#define __P101	PAGE_READONLY
+#define __P110	PAGE_COPY_C
+#define __P111	PAGE_COPY_C
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY
+#define __S110	PAGE_SHARED_C
+#define __S111	PAGE_SHARED_C
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* zero page used for uninitialized stuff */
+extern unsigned long empty_zero_page;
+#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
+
+/* Certain architectures need to do special things when pte's
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+
+#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, xp)	do { pte_val(*(xp)) = 0; } while (0)
+
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~(PAGE_MASK | _PAGE_SZ_MASK)) \
+					!= (_PAGE_TABLE & ~_PAGE_SZ_MASK))
+#define pmd_present(x)		(pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_write(pte_t pte)   { return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_dirty(pte_t pte)   { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)   { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_file(pte_t pte)    { return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_special(pte_t pte) { return 0; }
+
+static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
+static inline pte_t pte_mkclean(pte_t pte)   { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)     { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)   { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)   { pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)   { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)    { return pte; }
+
+/*
+ * Macro and implementation to make a page protection as uncacheable.
+ */
+#define pgprot_writecombine(prot)					\
+	__pgprot(pgprot_val(prot) & ~(_PAGE_CACHE_CTRL1 | _PAGE_CACHE_CTRL0))
+
+#define pgprot_noncached(prot)						\
+	__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+	unsigned long paddr = pmd_val(pmd) & PAGE_MASK;
+	if (!paddr)
+		return 0;
+	return (unsigned long)__va(paddr);
+}
+
+#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#define pmd_page_shift(pmd)	(12 + ((pmd_val(pmd) & _PAGE_SZ_MASK) \
+					>> _PAGE_SZ_SHIFT))
+#define pmd_num_ptrs(pmd)	(PGDIR_SIZE >> pmd_page_shift(pmd))
+
+/*
+ * Each pgd is only 2k, mapping 2Gb (local or global). If we're in global
+ * space drop the top bit before indexing the pgd.
+ */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define pgd_index(address)	((((address) & ~0x80000000) >> PGDIR_SHIFT) \
+							& (PTRS_PER_PGD-1))
+#else
+#define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#endif
+
+#define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
+
+#define pgd_offset_k(address)	pgd_offset(&init_mm, address)
+
+#define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the second-level page table.. */
+#if !defined(CONFIG_HUGETLB_PAGE)
+  /* all pages are of size (1 << PAGE_SHIFT), so no need to read 1st level pt */
+# define pte_index(pmd, address) \
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#else
+  /* some pages are huge, so read 1st level pt to find out */
+# define pte_index(pmd, address) \
+	(((address) >> pmd_page_shift(pmd)) & (pmd_num_ptrs(pmd) - 1))
+#endif
+#define pte_offset_kernel(dir, address) \
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(*(dir), address))
+#define pte_offset_map(dir, address)		pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address)	pte_offset_kernel(dir, address)
+
+#define pte_unmap(pte)		do { } while (0)
+#define pte_unmap_nested(pte)	do { } while (0)
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Meta doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long address, pte_t *pte)
+{
+}
+
+/*
+ * Encode and decode a swap entry (must be !pte_none(e) && !pte_present(e))
+ * Since PAGE_PRESENT is bit 1, we can use the bits above that.
+ */
+#define __swp_type(x)			(((x).val >> 1) & 0xff)
+#define __swp_offset(x)			((x).val >> 10)
+#define __swp_entry(type, offset)	((swp_entry_t) { ((type) << 1) | \
+					 ((offset) << 10) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS	22
+#define pte_to_pgoff(x)		(pte_val(x) >> 10)
+#define pgoff_to_pte(x)		__pte(((x) << 10) | _PAGE_FILE)
+
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+	remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+void paging_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META12
+/* This is a workaround for an issue in Meta 1 cores. These cores cache
+ * invalid entries in the TLB so we always need to flush whenever we add
+ * a new pte. Unfortunately we can only flush the whole TLB not shoot down
+ * single entries so this is sub-optimal. This implementation ensures that
+ * we will get a flush at the second attempt, so we may still get repeated
+ * faults, we just don't overflow the kernel stack handling them.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+({									  \
+	int __changed = !pte_same(*(__ptep), __entry);			  \
+	if (__changed) {						  \
+		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
+	}								  \
+	flush_tlb_page(__vma, __address);				  \
+	__changed;							  \
+})
+#endif
+
+#include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PGTABLE_H */
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
new file mode 100644
index 0000000..9b029a7
--- /dev/null
+++ b/arch/metag/include/asm/processor.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_PROCESSOR_H
+#define __ASM_METAG_PROCESSOR_H
+
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/metag_regs.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ __label__ _l; _l: &&_l; })
+
+/* The task stops where the kernel starts */
+#define TASK_SIZE	PAGE_OFFSET
+/* Add an extra page of padding at the top of the stack for the guard page. */
+#define STACK_TOP	(TASK_SIZE - PAGE_SIZE)
+#define STACK_TOP_MAX	STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	META_MEMORY_BASE
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#ifdef CONFIG_METAG_FPU
+struct meta_fpu_context {
+	TBICTXEXTFPU fpstate;
+	union {
+		struct {
+			TBICTXEXTBB4 fx8_15;
+			TBICTXEXTFPACC fpacc;
+		} fx8_15;
+		struct {
+			TBICTXEXTFPACC fpacc;
+			TBICTXEXTBB4 unused;
+		} nofx8_15;
+	} extfpstate;
+	bool needs_restore;
+};
+#else
+struct meta_fpu_context {};
+#endif
+
+#ifdef CONFIG_METAG_DSP
+struct meta_ext_context {
+	struct {
+		TBIEXTCTX ctx;
+		TBICTXEXTBB8 bb8;
+		TBIDUAL ax[TBICTXEXTAXX_BYTES / sizeof(TBIDUAL)];
+		TBICTXEXTHL2 hl2;
+		TBICTXEXTTDPR ext;
+		TBICTXEXTRP6 rp;
+	} regs;
+
+	/* DSPRAM A and B save areas. */
+	void *ram[2];
+
+	/* ECH encoded size of DSPRAM save areas. */
+	unsigned int ram_sz[2];
+};
+#else
+struct meta_ext_context {};
+#endif
+
+struct thread_struct {
+	PTBICTX kernel_context;
+	/* A copy of the user process Sig.SaveMask. */
+	unsigned int user_flags;
+	struct meta_fpu_context *fpu_context;
+	void __user *tls_ptr;
+	unsigned short int_depth;
+	unsigned short txdefr_failure;
+	struct meta_ext_context *dsp_context;
+};
+
+#define INIT_THREAD  { \
+	NULL,			/* kernel_context */	\
+	0,			/* user_flags */	\
+	NULL,			/* fpu_context */	\
+	NULL,			/* tls_ptr */		\
+	1,			/* int_depth - we start in kernel */	\
+	0,			/* txdefr_failure */	\
+	NULL,			/* dsp_context */	\
+}
+
+/* Needed to make #define as we are referencing 'current', that is not visible
+ * yet.
+ *
+ * Stack layout is as below.
+
+      argc            argument counter (integer)
+      argv[0]         program name (pointer)
+      argv[1...N]     program args (pointers)
+      argv[argc-1]    end of args (integer)
+      NULL
+      env[0...N]      environment variables (pointers)
+      NULL
+
+ */
+#define start_thread(regs, pc, usp) do {				   \
+	unsigned int *argc = (unsigned int *) bprm->exec;		   \
+	set_fs(USER_DS);						   \
+	current->thread.int_depth = 1;					   \
+	/* Force this process down to user land */			   \
+	regs->ctx.SaveMask = TBICTX_PRIV_BIT;				   \
+	regs->ctx.CurrPC = pc;						   \
+	regs->ctx.AX[0].U0 = usp;					   \
+	regs->ctx.DX[3].U1 = *((int *)argc);			/* argc */ \
+	regs->ctx.DX[3].U0 = (int)((int *)argc + 1);		/* argv */ \
+	regs->ctx.DX[2].U1 = (int)((int *)argc +			   \
+				   regs->ctx.DX[3].U1 + 2);	/* envp */ \
+	regs->ctx.DX[2].U0 = 0;				   /* rtld_fini */ \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+#define copy_segments(tsk, mm)		do { } while (0)
+#define release_segments(mm)		do { } while (0)
+
+extern void exit_thread(void);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+#define	thread_saved_pc(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->CurrPC)
+#define thread_saved_sp(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->AX[0].U0)
+#define thread_saved_fp(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->AX[1].U0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define	KSTK_EIP(tsk)	((tsk)->thread.kernel_context->CurrPC)
+#define	KSTK_ESP(tsk)	((tsk)->thread.kernel_context->AX[0].U0)
+
+#define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
+
+#define cpu_relax()     barrier()
+
+extern void setup_priv(void);
+
+static inline unsigned int hard_processor_id(void)
+{
+	unsigned int id;
+
+	asm volatile ("MOV	%0, TXENABLE\n"
+		      "AND	%0, %0, %1\n"
+		      "LSR	%0, %0, %2\n"
+		      : "=&d" (id)
+		      : "I" (TXENABLE_THREAD_BITS),
+			"K" (TXENABLE_THREAD_S)
+		      );
+
+	return id;
+}
+
+#define OP3_EXIT	0
+
+#define HALT_OK		0
+#define HALT_PANIC	-1
+
+/*
+ * Halt (stop) the hardware thread. This instruction sequence is the
+ * standard way to cause a Meta hardware thread to exit. The exit code
+ * is pushed onto the stack which is interpreted by the debug adapter.
+ */
+static inline void hard_processor_halt(int exit_code)
+{
+	asm volatile ("MOV	D1Ar1, %0\n"
+		      "MOV	D0Ar6, %1\n"
+		      "MSETL	[A0StP],D0Ar6,D0Ar4,D0Ar2\n"
+		      "1:\n"
+		      "SWITCH	#0xC30006\n"
+		      "B		1b\n"
+		      : : "r" (exit_code), "K" (OP3_EXIT));
+}
+
+/* Set these hooks to call SoC specific code to restart/halt/power off. */
+extern void (*soc_restart)(char *cmd);
+extern void (*soc_halt)(void);
+
+extern void show_trace(struct task_struct *tsk, unsigned long *sp,
+		       struct pt_regs *regs);
+
+#endif
diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h
new file mode 100644
index 0000000..d2aa35d
--- /dev/null
+++ b/arch/metag/include/asm/prom.h
@@ -0,0 +1,23 @@
+/*
+ *  arch/metag/include/asm/prom.h
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_METAG_PROM_H
+#define __ASM_METAG_PROM_H
+
+#include <asm/setup.h>
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+extern struct machine_desc *setup_machine_fdt(void *dt);
+extern void copy_fdt(void);
+
+#endif /* __ASM_METAG_PROM_H */
diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h
new file mode 100644
index 0000000..fcabc18
--- /dev/null
+++ b/arch/metag/include/asm/ptrace.h
@@ -0,0 +1,60 @@
+#ifndef _METAG_PTRACE_H
+#define _METAG_PTRACE_H
+
+#include <linux/compiler.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/tbx.h>
+
+#ifndef __ASSEMBLY__
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+
+struct pt_regs {
+	TBICTX ctx;
+	TBICTXEXTCB0 extcb0[5];
+};
+
+#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0)
+
+#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC)
+#define profile_pc(regs) instruction_pointer(regs)
+
+#define task_pt_regs(task) \
+	((struct pt_regs *)(task_stack_page(task) + \
+			    sizeof(struct thread_info)))
+
+#define current_pt_regs() \
+	((struct pt_regs *)((char *)current_thread_info() + \
+			    sizeof(struct thread_info)))
+
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_leave(struct pt_regs *regs);
+
+/* copy a struct user_gp_regs out to user */
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf);
+/* copy a struct user_gp_regs in from user */
+int metag_gp_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf);
+/* copy a struct user_cb_regs out to user */
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf);
+/* copy a struct user_cb_regs in from user */
+int metag_cb_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf);
+/* copy a struct user_rp_state out to user */
+int metag_rp_state_copyout(const struct pt_regs *regs,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf);
+/* copy a struct user_rp_state in from user */
+int metag_rp_state_copyin(struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PTRACE_H */
diff --git a/arch/metag/include/asm/setup.h b/arch/metag/include/asm/setup.h
new file mode 100644
index 0000000..e13083b
--- /dev/null
+++ b/arch/metag/include/asm/setup.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_METAG_SETUP_H
+#define _ASM_METAG_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+void per_cpu_trap_init(unsigned long);
+extern void __init dump_machine_table(void);
+#endif /* _ASM_METAG_SETUP_H */
diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h
new file mode 100644
index 0000000..e0373f8
--- /dev/null
+++ b/arch/metag/include/asm/smp.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/cpumask.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+enum ipi_msg_type {
+	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
+	IPI_RESCHEDULE,
+};
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
+
+asmlinkage void secondary_start_kernel(void);
+
+extern void secondary_startup(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+extern void cpu_die(void);
+#endif
+
+extern void smp_init_cpus(void);
+#endif /* __ASM_SMP_H */
diff --git a/arch/metag/include/asm/sparsemem.h b/arch/metag/include/asm/sparsemem.h
new file mode 100644
index 0000000..03fe255
--- /dev/null
+++ b/arch/metag/include/asm/sparsemem.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_METAG_SPARSEMEM_H
+#define __ASM_METAG_SPARSEMEM_H
+
+/*
+ * SECTION_SIZE_BITS		2^N: how big each section will be
+ * MAX_PHYSADDR_BITS		2^N: how much physical address space we have
+ * MAX_PHYSMEM_BITS		2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS	26
+#define MAX_PHYSADDR_BITS	32
+#define MAX_PHYSMEM_BITS	32
+
+#endif /* __ASM_METAG_SPARSEMEM_H */
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
new file mode 100644
index 0000000..86a7cf3
--- /dev/null
+++ b/arch/metag/include/asm/spinlock.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#ifdef CONFIG_METAG_ATOMICITY_LOCK1
+#include <asm/spinlock_lock1.h>
+#else
+#include <asm/spinlock_lnkget.h>
+#endif
+
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+#define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h
new file mode 100644
index 0000000..ad8436f
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lnkget.h
@@ -0,0 +1,249 @@
+#ifndef __ASM_SPINLOCK_LNKGET_H
+#define __ASM_SPINLOCK_LNKGET_H
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	int ret;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "TST	%0, #1\n"
+		      "MOV	%0, #1\n"
+		      "XORZ      %0, %0, %0\n"
+		      : "=&d" (ret)
+		      : "da" (&lock->lock)
+		      : "cc");
+	return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       TST     %0, #1\n"
+		      "       ADD     %0, %0, #1\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&lock->lock)
+		      : "cc");
+
+	smp_mb();
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       TST     %0, #1\n"
+		      "       ADD     %0, %0, #1\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0, #1\n"
+		      "1:     XORNZ   %0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&lock->lock)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	asm volatile ("       SETD    [%0], %1\n"
+		      :
+		      : "da" (&lock->lock), "da" (0)
+		      : "memory");
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       CMP     %0, #0\n"
+		      "       ADD     %0, %0, %2\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+
+	smp_mb();
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       CMP     %0, #0\n"
+		      "       ADD     %0, %0, %2\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0,#1\n"
+		      "1:     XORNZ   %0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	smp_mb();
+
+	asm volatile ("       SETD    [%0], %1\n"
+		      :
+		      : "da" (&rw->lock), "da" (0)
+		      : "memory");
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+	int ret;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "CMP	%0, #0\n"
+		      "MOV	%0, #1\n"
+		      "XORNZ     %0, %0, %0\n"
+		      : "=&d" (ret)
+		      : "da" (&rw->lock)
+		      : "cc");
+	return ret;
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       ADDS    %0, %0, #1\n"
+		      "       LNKSETDPL [%1], %0\n"
+		      "       BMI     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc");
+
+	smp_mb();
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	smp_mb();
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       SUB     %0, %0, #1\n"
+		      "       LNKSETD [%1], %0\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc", "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       ADDS    %0, %0, #1\n"
+		      "       LNKSETDPL [%1], %0\n"
+		      "       BMI     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0,#1\n"
+		      "       BZ      2f\n"
+		      "1:     MOV     %0,#0\n"
+		      "2:\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "CMP	%0, %2\n"
+		      "MOV	%0, #1\n"
+		      "XORZ	%0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+	return tmp;
+}
+
+#define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_LNKGET_H */
diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h
new file mode 100644
index 0000000..c630444
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lock1.h
@@ -0,0 +1,184 @@
+#ifndef __ASM_SPINLOCK_LOCK1_H
+#define __ASM_SPINLOCK_LOCK1_H
+
+#include <asm/bug.h>
+#include <asm/global_lock.h>
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	int ret;
+
+	barrier();
+	ret = lock->lock;
+	WARN_ON(ret != 0 && ret != 1);
+	return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int we_won = 0;
+	unsigned long flags;
+
+again:
+	__global_lock1(flags);
+	if (lock->lock == 0) {
+		fence();
+		lock->lock = 1;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (we_won == 0)
+		goto again;
+	WARN_ON(lock->lock != 1);
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = lock->lock;
+	if (ret == 0) {
+		fence();
+		lock->lock = 1;
+	}
+	__global_unlock1(flags);
+	return (ret == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	barrier();
+	WARN_ON(!lock->lock);
+	lock->lock = 0;
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int we_won = 0;
+
+again:
+	__global_lock1(flags);
+	if (rw->lock == 0) {
+		fence();
+		rw->lock = 0x80000000;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (we_won == 0)
+		goto again;
+	WARN_ON(rw->lock != 0x80000000);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret == 0) {
+		fence();
+		rw->lock = 0x80000000;
+	}
+	__global_unlock1(flags);
+
+	return (ret == 0);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	barrier();
+	WARN_ON(rw->lock != 0x80000000);
+	rw->lock = 0;
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+	unsigned int ret;
+
+	barrier();
+	ret = rw->lock;
+	return (ret == 0);
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int we_won = 0, ret;
+
+again:
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret < 0x80000000) {
+		fence();
+		rw->lock = ret + 1;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (!we_won)
+		goto again;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	fence();
+	ret = rw->lock--;
+	__global_unlock1(flags);
+	WARN_ON(ret == 0);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret < 0x80000000) {
+		fence();
+		rw->lock = ret + 1;
+	}
+	__global_unlock1(flags);
+	return (ret < 0x80000000);
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+	unsigned int ret;
+
+	barrier();
+	ret = rw->lock;
+	return (ret < 0x80000000);
+}
+
+#endif /* __ASM_SPINLOCK_LOCK1_H */
diff --git a/arch/metag/include/asm/spinlock_types.h b/arch/metag/include/asm/spinlock_types.h
new file mode 100644
index 0000000..b763914
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_types.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_METAG_SPINLOCK_TYPES_H
+#define _ASM_METAG_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif /* _ASM_METAG_SPINLOCK_TYPES_H */
diff --git a/arch/metag/include/asm/stacktrace.h b/arch/metag/include/asm/stacktrace.h
new file mode 100644
index 0000000..2830a0f
--- /dev/null
+++ b/arch/metag/include/asm/stacktrace.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+struct stackframe {
+	unsigned long fp;
+	unsigned long sp;
+	unsigned long lr;
+	unsigned long pc;
+};
+
+struct metag_frame {
+	unsigned long fp;
+	unsigned long lr;
+};
+
+extern int unwind_frame(struct stackframe *frame);
+extern void walk_stackframe(struct stackframe *frame,
+			    int (*fn)(struct stackframe *, void *), void *data);
+
+#endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h
new file mode 100644
index 0000000..53e3806
--- /dev/null
+++ b/arch/metag/include/asm/string.h
@@ -0,0 +1,13 @@
+#ifndef _METAG_STRING_H_
+#define _METAG_STRING_H_
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *__s, int __c, size_t __count);
+
+#define __HAVE_ARCH_MEMCPY
+void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#endif /* _METAG_STRING_H_ */
diff --git a/arch/metag/include/asm/switch.h b/arch/metag/include/asm/switch.h
new file mode 100644
index 0000000..1fd6a58
--- /dev/null
+++ b/arch/metag/include/asm/switch.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_METAG_SWITCH_H
+#define _ASM_METAG_SWITCH_H
+
+/* metag SWITCH codes */
+#define __METAG_SW_PERM_BREAK	0x400002	/* compiled in breakpoint */
+#define __METAG_SW_SYS_LEGACY	0x440000	/* legacy system calls */
+#define __METAG_SW_SYS		0x440001	/* system calls */
+
+/* metag SWITCH instruction encoding */
+#define __METAG_SW_ENCODING(TYPE)	(0xaf000000 | (__METAG_SW_##TYPE))
+
+#endif /* _ASM_METAG_SWITCH_H */
diff --git a/arch/metag/include/asm/syscall.h b/arch/metag/include/asm/syscall.h
new file mode 100644
index 0000000..24fc979
--- /dev/null
+++ b/arch/metag/include/asm/syscall.h
@@ -0,0 +1,104 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_METAG_SYSCALL_H
+#define _ASM_METAG_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+
+#include <asm/switch.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	unsigned long insn;
+
+	/*
+	 * FIXME there's no way to find out how we got here other than to
+	 * examine the memory at the PC to see if it is a syscall
+	 * SWITCH instruction.
+	 */
+	if (get_user(insn, (unsigned long *)(regs->ctx.CurrPC - 4)))
+		return -1;
+
+	if (insn == __METAG_SW_ENCODING(SYS))
+		return regs->ctx.DX[0].U1;
+	else
+		return -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->ctx.DX[0].U0;
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ctx.DX[0].U0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->ctx.DX[0].U0 = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	unsigned int reg, j;
+	BUG_ON(i + n > 6);
+
+	for (j = i, reg = 6 - i; j < (i + n); j++, reg--) {
+		if (reg % 2)
+			args[j] = regs->ctx.DX[(reg + 1) / 2].U0;
+		else
+			args[j] = regs->ctx.DX[reg / 2].U1;
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	unsigned int reg;
+	BUG_ON(i + n > 6);
+
+	for (reg = 6 - i; i < (i + n); i++, reg--) {
+		if (reg % 2)
+			regs->ctx.DX[(reg + 1) / 2].U0 = args[i];
+		else
+			regs->ctx.DX[reg / 2].U1 = args[i];
+	}
+}
+
+#define NR_syscalls __NR_syscalls
+
+/* generic syscall table */
+extern const void *sys_call_table[];
+
+#endif	/* _ASM_METAG_SYSCALL_H */
diff --git a/arch/metag/include/asm/syscalls.h b/arch/metag/include/asm/syscalls.h
new file mode 100644
index 0000000..a02b955
--- /dev/null
+++ b/arch/metag/include/asm/syscalls.h
@@ -0,0 +1,39 @@
+#ifndef _ASM_METAG_SYSCALLS_H
+#define _ASM_METAG_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* kernel/signal.c */
+#define sys_rt_sigreturn sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(void);
+
+#include <asm-generic/syscalls.h>
+
+/* kernel/sys_metag.c */
+asmlinkage int sys_metag_setglobalbit(char __user *, int);
+asmlinkage void sys_metag_set_fpu_flags(unsigned int);
+asmlinkage int sys_metag_set_tls(void __user *);
+asmlinkage void *sys_metag_get_tls(void);
+
+asmlinkage long sys_truncate64_metag(const char __user *, unsigned long,
+				     unsigned long);
+asmlinkage long sys_ftruncate64_metag(unsigned int, unsigned long,
+				      unsigned long);
+asmlinkage long sys_fadvise64_64_metag(int, unsigned long, unsigned long,
+				       unsigned long, unsigned long, int);
+asmlinkage long sys_readahead_metag(int, unsigned long, unsigned long, size_t);
+asmlinkage ssize_t sys_pread64_metag(unsigned long, char __user *, size_t,
+				     unsigned long, unsigned long);
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long, char __user *, size_t,
+				      unsigned long, unsigned long);
+asmlinkage long sys_sync_file_range_metag(int, unsigned long, unsigned long,
+					  unsigned long, unsigned long,
+					  unsigned int);
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+		    int syscall);
+
+#endif /* _ASM_METAG_SYSCALLS_H */
diff --git a/arch/metag/include/asm/tbx.h b/arch/metag/include/asm/tbx.h
new file mode 100644
index 0000000..287b36f
--- /dev/null
+++ b/arch/metag/include/asm/tbx.h
@@ -0,0 +1,1425 @@
+/*
+ * asm/tbx.h
+ *
+ * Copyright (C) 2000-2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Thread binary interface header
+ */
+
+#ifndef _ASM_METAG_TBX_H_
+#define _ASM_METAG_TBX_H_
+
+/* for CACHEW_* values */
+#include <asm/metag_isa.h>
+/* for LINSYSEVENT_* addresses */
+#include <asm/metag_mem.h>
+
+#ifdef  TBI_1_4
+#ifndef TBI_MUTEXES_1_4
+#define TBI_MUTEXES_1_4
+#endif
+#ifndef TBI_SEMAPHORES_1_4
+#define TBI_SEMAPHORES_1_4
+#endif
+#ifndef TBI_ASYNC_SWITCH_1_4
+#define TBI_ASYNC_SWITCH_1_4
+#endif
+#ifndef TBI_FASTINT_1_4
+#define TBI_FASTINT_1_4
+#endif
+#endif
+
+
+/* Id values in the TBI system describe a segment using an arbitrary
+   integer value and flags in the bottom 8 bits, the SIGPOLL value is
+   used in cases where control over blocking or polling behaviour is
+   needed. */
+#define TBID_SIGPOLL_BIT    0x02 /* Set bit in an Id value to poll vs block */
+/* Extended segment identifiers use strings in the string table */
+#define TBID_IS_SEGSTR( Id ) (((Id) & (TBID_SEGTYPE_BITS>>1)) == 0)
+
+/* Segment identifiers contain the following related bit-fields */
+#define TBID_SEGTYPE_BITS   0x0F /* One of the predefined segment types */
+#define TBID_SEGTYPE_S      0
+#define TBID_SEGSCOPE_BITS  0x30 /* Indicates the scope of the segment */
+#define TBID_SEGSCOPE_S     4
+#define TBID_SEGGADDR_BITS  0xC0 /* Indicates access possible via pGAddr */
+#define TBID_SEGGADDR_S     6
+
+/* Segments of memory can only really contain a few types of data */
+#define TBID_SEGTYPE_TEXT   0x02 /* Code segment */
+#define TBID_SEGTYPE_DATA   0x04 /* Data segment */
+#define TBID_SEGTYPE_STACK  0x06 /* Stack segment */
+#define TBID_SEGTYPE_HEAP   0x0A /* Heap segment */
+#define TBID_SEGTYPE_ROOT   0x0C /* Root block segments */
+#define TBID_SEGTYPE_STRING 0x0E /* String table segment */
+
+/* Segments have one of three possible scopes */
+#define TBID_SEGSCOPE_INIT     0 /* Temporary area for initialisation phase */
+#define TBID_SEGSCOPE_LOCAL    1 /* Private to this thread */
+#define TBID_SEGSCOPE_GLOBAL   2 /* Shared globally throughout the system */
+#define TBID_SEGSCOPE_SHARED   3 /* Limited sharing between local/global */
+
+/* For segment specifier a further field in two of the remaining bits
+   indicates the usefulness of the pGAddr field in the segment descriptor
+   descriptor. */
+#define TBID_SEGGADDR_NULL     0 /* pGAddr is NULL -> SEGSCOPE_(LOCAL|INIT) */
+#define TBID_SEGGADDR_READ     1 /* Only read    via pGAddr */
+#define TBID_SEGGADDR_WRITE    2 /* Full access  via pGAddr */
+#define TBID_SEGGADDR_EXEC     3 /* Only execute via pGAddr */
+
+/* The following values are common to both segment and signal Id value and
+   live in the top 8 bits of the Id values. */
+
+/* The ISTAT bit indicates if segments are related to interrupt vs
+   background level interfaces a thread can still handle all triggers at
+   either level, but can also split these up if it wants to. */
+#define TBID_ISTAT_BIT    0x01000000
+#define TBID_ISTAT_S      24
+
+/* Privilege needed to access a segment is indicated by the next bit.
+   
+   This bit is set to mirror the current privilege level when starting a
+   search for a segment - setting it yourself toggles the automatically
+   generated state which is only useful to emulate unprivileged behaviour
+   or access unprivileged areas of memory while at privileged level. */
+#define TBID_PSTAT_BIT    0x02000000
+#define TBID_PSTAT_S      25
+
+/* The top six bits of a signal/segment specifier identifies a thread within
+   the system. This represents a segments owner. */
+#define TBID_THREAD_BITS  0xFC000000
+#define TBID_THREAD_S     26
+
+/* Special thread id values */
+#define TBID_THREAD_NULL   (-32) /* Never matches any thread/segment id used */
+#define TBID_THREAD_GLOBAL (-31) /* Things global to all threads */
+#define TBID_THREAD_HOST   ( -1) /* Host interface */
+#define TBID_THREAD_EXTIO  (TBID_THREAD_HOST)   /* Host based ExtIO i/f */
+
+/* Virtual Id's are used for external thread interface structures or the
+   above special Id's */
+#define TBID_IS_VIRTTHREAD( Id ) ((Id) < 0)
+
+/* Real Id's are used for actual hardware threads that are local */
+#define TBID_IS_REALTHREAD( Id ) ((Id) >= 0)
+
+/* Generate a segment Id given Thread, Scope, and Type */
+#define TBID_SEG( Thread, Scope, Type )                           (\
+    ((Thread)<<TBID_THREAD_S) + ((Scope)<<TBID_SEGSCOPE_S) + (Type))
+
+/* Generate a signal Id given Thread and SigNum */
+#define TBID_SIG( Thread, SigNum )                                        (\
+    ((Thread)<<TBID_THREAD_S) + ((SigNum)<<TBID_SIGNUM_S) + TBID_SIGNAL_BIT)
+
+/* Generate an Id that solely represents a thread - useful for cache ops */
+#define TBID_THD( Thread ) ((Thread)<<TBID_THREAD_S)
+#define TBID_THD_NULL      ((TBID_THREAD_NULL)  <<TBID_THREAD_S)
+#define TBID_THD_GLOBAL    ((TBID_THREAD_GLOBAL)<<TBID_THREAD_S)
+
+/* Common exception handler (see TBID_SIGNUM_XXF below) receives hardware
+   generated fault codes TBIXXF_SIGNUM_xxF in it's SigNum parameter */
+#define TBIXXF_SIGNUM_IIF   0x01 /* General instruction fault */
+#define TBIXXF_SIGNUM_PGF   0x02 /* Privilege general fault */
+#define TBIXXF_SIGNUM_DHF   0x03 /* Data access watchpoint HIT */
+#define TBIXXF_SIGNUM_IGF   0x05 /* Code fetch general read failure */
+#define TBIXXF_SIGNUM_DGF   0x07 /* Data access general read/write fault */
+#define TBIXXF_SIGNUM_IPF   0x09 /* Code fetch page fault */
+#define TBIXXF_SIGNUM_DPF   0x0B /* Data access page fault */
+#define TBIXXF_SIGNUM_IHF   0x0D /* Instruction breakpoint HIT */
+#define TBIXXF_SIGNUM_DWF   0x0F /* Data access read-only fault */
+
+/* Hardware signals communicate events between processing levels within a
+   single thread all the _xxF cases are exceptions and are routed via a
+   common exception handler, _SWx are software trap events and kicks including
+   __TBISignal generated kicks, and finally _TRx are hardware triggers */
+#define TBID_SIGNUM_SW0     0x00 /* SWITCH GROUP 0 - Per thread user */
+#define TBID_SIGNUM_SW1     0x01 /* SWITCH GROUP 1 - Per thread system */
+#define TBID_SIGNUM_SW2     0x02 /* SWITCH GROUP 2 - Internal global request */
+#define TBID_SIGNUM_SW3     0x03 /* SWITCH GROUP 3 - External global request */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_FPE     0x04 /* Deferred exception - Any IEEE 754 exception */
+#define TBID_SIGNUM_FPD     0x05 /* Deferred exception - Denormal exception */
+/* Reserved 0x6 for a reserved deferred exception */
+#define TBID_SIGNUM_BUS     0x07 /* Deferred exception - Bus Error */
+/* Reserved 0x08-0x09 */
+#else
+/* Reserved 0x04-0x09 */
+#endif
+#define TBID_SIGNUM_SWS     0x0A /* KICK received with SigMask != 0 */
+#define TBID_SIGNUM_SWK     0x0B /* KICK received with SigMask == 0 */
+/* Reserved 0x0C-0x0F */
+#define TBID_SIGNUM_TRT     0x10 /* Timer trigger */
+#define TBID_SIGNUM_LWK     0x11 /* Low level kick (handler provided by TBI) */
+#define TBID_SIGNUM_XXF     0x12 /* Fault handler - receives ALL _xxF sigs */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_DFR     0x13 /* Deferred Exception handler */
+#else
+#define TBID_SIGNUM_FPE     0x13 /* FPE Exception handler */
+#endif
+/* External trigger one group 0x14 to 0x17 - per thread */
+#define TBID_SIGNUM_TR1(Thread) (0x14+(Thread))
+#define TBID_SIGNUM_T10     0x14
+#define TBID_SIGNUM_T11     0x15
+#define TBID_SIGNUM_T12     0x16
+#define TBID_SIGNUM_T13     0x17
+/* External trigger two group 0x18 to 0x1b - per thread */
+#define TBID_SIGNUM_TR2(Thread) (0x18+(Thread))
+#define TBID_SIGNUM_T20     0x18
+#define TBID_SIGNUM_T21     0x19
+#define TBID_SIGNUM_T22     0x1A
+#define TBID_SIGNUM_T23     0x1B
+#define TBID_SIGNUM_TR3     0x1C /* External trigger N-4 (global) */
+#define TBID_SIGNUM_TR4     0x1D /* External trigger N-3 (global) */
+#define TBID_SIGNUM_TR5     0x1E /* External trigger N-2 (global) */
+#define TBID_SIGNUM_TR6     0x1F /* External trigger N-1 (global) */
+#define TBID_SIGNUM_MAX     0x1F
+
+/* Return the trigger register(TXMASK[I]/TXSTAT[I]) bits related to
+   each hardware signal, sometimes this is a many-to-one relationship. */
+#define TBI_TRIG_BIT(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_TRT) ? 1<<((SigNum)-TBID_SIGNUM_TRT) :\
+    ( ((SigNum) == TBID_SIGNUM_SWS) ||                             \
+      ((SigNum) == TBID_SIGNUM_SWK)    ) ?                         \
+                         TXSTAT_KICK_BIT : TXSTATI_BGNDHALT_BIT    )
+
+/* Return the hardware trigger vector number for entries in the
+   HWVEC0EXT table that will generate the required internal trigger. */
+#define TBI_TRIG_VEC(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_T10) ? ((SigNum)-TBID_SIGNUM_TRT) : -1)
+
+/* Default trigger masks for each thread at background/interrupt level */
+#define TBI_TRIGS_INIT( Thread )                           (\
+    TXSTAT_KICK_BIT + TBI_TRIG_BIT(TBID_SIGNUM_TR1(Thread)) )
+#define TBI_INTS_INIT( Thread )                            (\
+    TXSTAT_KICK_BIT + TXSTATI_BGNDHALT_BIT                  \
+                    + TBI_TRIG_BIT(TBID_SIGNUM_TR2(Thread)) )
+
+#ifndef __ASSEMBLY__
+/* A spin-lock location is a zero-initialised location in memory */
+typedef volatile int TBISPIN, *PTBISPIN;
+
+/* A kick location is a hardware location you can write to
+ * in order to cause a kick
+ */
+typedef volatile int *PTBIKICK;
+
+#if defined(METAC_1_0) || defined(METAC_1_1)
+/* Macro to perform a kick */
+#define TBI_KICK( pKick ) do { pKick[0] = 1; } while (0)
+#else
+/* #define METAG_LIN_VALUES before including machine.h if required */
+#ifdef LINSYSEVENT_WR_COMBINE_FLUSH
+/* Macro to perform a kick - write combiners must be flushed */
+#define TBI_KICK( pKick )                                                do {\
+    volatile int *pFlush = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;    \
+    pFlush[0] = 0;                                                           \
+    pKick[0]  = 1;                                                } while (0)
+#endif
+#endif /* if defined(METAC_1_0) || defined(METAC_1_1) */
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* 64-bit dual unit state value */
+typedef struct _tbidual_tag_ {
+    /* 32-bit value from a pair of registers in data or address units */
+    int U0, U1;
+} TBIDUAL, *PTBIDUAL;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIDUAL */
+#define TBIDUAL_U0      (0)
+#define TBIDUAL_U1      (4)
+
+#define TBIDUAL_BYTES   (8)
+
+#define TBICTX_CRIT_BIT 0x0001  /* ASync state saved in TBICTX */
+#define TBICTX_SOFT_BIT 0x0002  /* Sync state saved in TBICTX (other bits 0) */
+#ifdef TBI_FASTINT_1_4
+#define TBICTX_FINT_BIT 0x0004  /* Using Fast Interrupts */
+#endif
+#define TBICTX_FPAC_BIT 0x0010  /* FPU state in TBICTX, FPU active on entry */
+#define TBICTX_XMCC_BIT 0x0020  /* Bit to identify a MECC task */
+#define TBICTX_CBUF_BIT 0x0040  /* Hardware catch buffer flag from TXSTATUS */
+#define TBICTX_CBRP_BIT 0x0080  /* Read pipeline dirty from TXDIVTIME */
+#define TBICTX_XDX8_BIT 0x0100  /* Saved DX.8 to DX.15 too */
+#define TBICTX_XAXX_BIT 0x0200  /* Save remaining AX registers to AX.7 */
+#define TBICTX_XHL2_BIT 0x0400  /* Saved hardware loop registers too */
+#define TBICTX_XTDP_BIT 0x0800  /* Saved DSP registers too */
+#define TBICTX_XEXT_BIT 0x1000  /* Set if TBICTX.Ext.Ctx contains extended
+                                   state save area, otherwise TBICTX.Ext.AX2
+                                   just holds normal A0.2 and A1.2 states */
+#define TBICTX_WAIT_BIT 0x2000  /* Causes wait for trigger - sticky toggle */
+#define TBICTX_XCBF_BIT 0x4000  /* Catch buffer or RD extracted into TBICTX */
+#define TBICTX_PRIV_BIT 0x8000  /* Set if system uses 'privileged' model */
+
+#ifdef METAC_1_0
+#define TBICTX_XAX3_BIT 0x0200  /* Saved AX.5 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  5       /* Ax.0 to Ax.4 are core GP regs on CHORUS */
+#else
+#define TBICTX_XAX4_BIT 0x0200  /* Saved AX.4 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  4       /* Default is Ax.0 to Ax.3 */
+#endif
+
+#ifdef TBI_1_4
+#define TBICTX_CFGFPU_FX16_BIT  0x00010000               /* Save FX.8 to FX.15 too */
+
+/* The METAC_CORE_ID_CONFIG field indicates omitted DSP resources */
+#define METAC_COREID_CFGXCTX_MASK( Value )                                 (\
+	( (((Value & METAC_COREID_CFGDSP_BITS)>>                                \
+	             METAC_COREID_CFGDSP_S      ) == METAC_COREID_CFGDSP_MIN) ? \
+	         ~(TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+                             \
+	           TBICTX_XAXX_BIT+TBICTX_XDX8_BIT ) : ~0U )                    )
+#endif
+
+/* Extended context state provides a standardised method for registering the
+   arguments required by __TBICtxSave to save the additional register states
+   currently in use by non general purpose code. The state of the __TBIExtCtx
+   variable in the static space of the thread forms an extension of the base
+   context of the thread.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask == 0 ) then pExt is assumed to be NULL and
+   the empty state of  __TBIExtCtx is represented by the fact that
+   TBICTX.SaveMask does not have the bit TBICTX_XEXT_BIT set.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask != 0 ) then pExt should point at a suitably
+   sized extended context save area (usually at the end of the stack space
+   allocated by the current routine). This space should allow for the
+   displaced state of A0.2 and A1.2 to be saved along with the other extended
+   states indicated via __TBIExtCtx.Ctx.SaveMask. */
+#ifndef __ASSEMBLY__
+typedef union _tbiextctx_tag_ {
+    long long Val;
+    TBIDUAL AX2;
+    struct _tbiextctxext_tag {
+#ifdef TBI_1_4
+        short DspramSizes;      /* DSPRAM sizes. Encoding varies between
+                                   TBICtxAlloc and the ECH scheme. */
+#else
+        short Reserved0;
+#endif
+        short SaveMask;         /* Flag bits for state saved */
+        PTBIDUAL pExt;          /* AX[2] state saved first plus Xxxx state */
+    
+    } Ctx;
+    
+} TBIEXTCTX, *PTBIEXTCTX;
+
+/* Automatic registration of extended context save for __TBINestInts */
+extern TBIEXTCTX __TBIExtCtx;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIEXTCTX */
+#define TBIEXTCTX_AX2           (0)
+#define TBIEXTCTX_Ctx           (0)
+#define TBIEXTCTX_Ctx_SaveMask  (TBIEXTCTX_Ctx + 2)
+#define TBIEXTCTX_Ctx_pExt      (TBIEXTCTX_Ctx + 2 + 2)
+
+/* Extended context data size calculation constants */
+#define TBICTXEXT_BYTES          (8)
+#define TBICTXEXTBB8_BYTES     (8*8)
+#define TBICTXEXTAX3_BYTES     (3*8)
+#define TBICTXEXTAX4_BYTES     (4*8)
+#ifdef METAC_1_0
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX3_BYTES
+#else
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX4_BYTES
+#endif
+#define TBICTXEXTHL2_BYTES     (3*8)
+#define TBICTXEXTTDR_BYTES    (27*8)
+#define TBICTXEXTTDP_BYTES TBICTXEXTTDR_BYTES
+
+#ifdef TBI_1_4
+#define TBICTXEXTFX8_BYTES	(4*8)
+#define TBICTXEXTFPAC_BYTES	(1*4 + 2*2 + 4*8)
+#define TBICTXEXTFACF_BYTES	(3*8)
+#endif
+
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTXEXT_MAXBITS  (TBICTX_XEXT_BIT|                \
+                            TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTXEXT_MAXBYTES (TBICTXEXT_BYTES+TBICTXEXTBB8_BYTES+\
+                         TBICTXEXTAXX_BYTES+TBICTXEXTHL2_BYTES+\
+                                            TBICTXEXTTDP_BYTES )
+
+#ifdef TBI_FASTINT_1_4
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTX2EXT_MAXBITS (TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTX2EXT_MAXBYTES (TBICTXEXTBB8_BYTES+TBICTXEXTAXX_BYTES\
+                             +TBICTXEXTHL2_BYTES+TBICTXEXTTDP_BYTES )
+#endif
+
+/* Specify extended resources being used by current routine, code must be
+   assembler generated to utilise extended resources-
+
+        MOV     D0xxx,A0StP             ; Perform alloca - routine should
+        ADD     A0StP,A0StP,#SaveSize   ; setup/use A0FrP to access locals
+        MOVT    D1xxx,#SaveMask         ; TBICTX_XEXT_BIT MUST be set
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        
+    NB: OG(___TBIExtCtx) is a special case supported for SETL/GETL operations
+        on 64-bit sizes structures only, other accesses must be based on use
+        of OGA(___TBIExtCtx). 
+
+   At exit of routine-
+   
+        MOV     D0xxx,#0                ; Clear extended context save state
+        MOV     D1xxx,#0
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        SUB     A0StP,A0StP,#SaveSize   ; If original A0StP required
+        
+    NB: Both the setting and clearing of the whole __TBIExtCtx MUST be done
+        atomically in one 64-bit write operation.
+
+   For simple interrupt handling only via __TBINestInts there should be no
+   impact of the __TBIExtCtx system. If pre-emptive scheduling is being
+   performed however (assuming __TBINestInts has already been called earlier
+   on) then the following logic will correctly call __TBICtxSave if required
+   and clear out the currently selected background task-
+   
+        if ( __TBIExtCtx.Ctx.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Store extended states in pCtx * /
+            State.Sig.SaveMask |= __TBIExtCtx.Ctx.SaveMask;
+        
+            (void) __TBICtxSave( State, (void *) __TBIExtCtx.Ctx.pExt );
+            __TBIExtCtx.Val   = 0;
+        }
+        
+    and when restoring task states call __TBICtxRestore-
+    
+        / * Restore state from pCtx * /
+        State.Sig.pCtx     = pCtx;
+        State.Sig.SaveMask = pCtx->SaveMask;
+
+        if ( State.Sig.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Restore extended states from pCtx * /
+            __TBIExtCtx.Val = pCtx->Ext.Val;
+            
+            (void) __TBICtxRestore( State, (void *) __TBIExtCtx.Ctx.pExt );
+        }   
+   
+ */
+
+/* Critical thread state save area */
+#ifndef __ASSEMBLY__
+typedef struct _tbictx_tag_ {
+    /* TXSTATUS_FLAG_BITS and TXSTATUS_LSM_STEP_BITS from TXSTATUS */
+    short Flags;
+    /* Mask indicates any extended context state saved; 0 -> Never run */
+    short SaveMask;
+    /* Saved PC value */
+    int CurrPC;
+    /* Saved critical register states */
+    TBIDUAL DX[8];
+    /* Background control register states - for cores without catch buffer
+       base in DIVTIME the TXSTATUS bits RPVALID and RPMASK are stored with
+       the real state TXDIVTIME in CurrDIVTIME */
+    int CurrRPT, CurrBPOBITS, CurrMODE, CurrDIVTIME;
+    /* Saved AX register states */
+    TBIDUAL AX[2];
+    TBIEXTCTX Ext;
+    TBIDUAL AX3[TBICTX_AX_REGS-3];
+    
+    /* Any CBUF state to be restored by a handler return must be stored here.
+       Other extended state can be stored anywhere - see __TBICtxSave and
+       __TBICtxRestore. */
+    
+} TBICTX, *PTBICTX;
+
+#ifdef TBI_FASTINT_1_4
+typedef struct _tbictx2_tag_ {
+    TBIDUAL AX[2];    /* AU.0, AU.1 */
+    TBIDUAL DX[2];    /* DU.0, DU.4 */
+    int     CurrMODE;
+    int     CurrRPT;
+    int     CurrSTATUS;
+    void   *CurrPC;   /* PC in PC address space */
+} TBICTX2, *PTBICTX2;
+/* TBICTX2 is followed by:
+ *   TBICTXEXTCB0                if TXSTATUS.CBMarker
+ *   TBIDUAL * TXSTATUS.IRPCount if TXSTATUS.IRPCount > 0
+ *   TBICTXGP                    if using __TBIStdRootIntHandler or __TBIStdCtxSwitchRootIntHandler
+ */
+
+typedef struct _tbictxgp_tag_ {
+    short    DspramSizes;
+    short    SaveMask;
+    void    *pExt;
+    TBIDUAL  DX[6]; /* DU.1-DU.3, DU.5-DU.7 */
+    TBIDUAL  AX[2]; /* AU.2-AU.3 */
+} TBICTXGP, *PTBICTXGP;
+
+#define TBICTXGP_DspramSizes (0)
+#define TBICTXGP_SaveMask    (TBICTXGP_DspramSizes + 2)
+#define TBICTXGP_MAX_BYTES   (2 + 2 + 4 + 8*(6+2))
+
+#endif
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBICTX */
+#define TBICTX_Flags            (0)
+#define TBICTX_SaveMask         (2)
+#define TBICTX_CurrPC           (4)
+#define TBICTX_DX               (2 + 2 + 4)
+#define TBICTX_CurrRPT          (2 + 2 + 4 + 8 * 8)
+#define TBICTX_CurrMODE         (2 + 2 + 4 + 8 * 8 + 4 + 4)
+#define TBICTX_AX               (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4)
+#define TBICTX_Ext              (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4 + 2 * 8)
+#define TBICTX_Ext_AX2          (TBICTX_Ext + TBIEXTCTX_AX2)
+#define TBICTX_Ext_AX2_U0       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U0)
+#define TBICTX_Ext_AX2_U1       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U1)
+#define TBICTX_Ext_Ctx_pExt     (TBICTX_Ext + TBIEXTCTX_Ctx_pExt)
+#define TBICTX_Ext_Ctx_SaveMask (TBICTX_Ext + TBIEXTCTX_Ctx_SaveMask)
+
+#ifdef TBI_FASTINT_1_4
+#define TBICTX2_BYTES (8 * 2 + 8 * 2 + 4 + 4 + 4 + 4)
+#define TBICTXEXTCB0_BYTES (4 + 4 + 8)
+
+#define TBICTX2_CRIT_MAX_BYTES (TBICTX2_BYTES + TBICTXEXTCB0_BYTES + 6 * TBIDUAL_BYTES)
+#define TBI_SWITCH_NEXT_PC(PC, EXTRA) ((PC) + (EXTRA & 1) ? 8 : 4)
+#endif
+
+#ifndef __ASSEMBLY__
+/* Extended thread state save areas - catch buffer state element */
+typedef struct _tbictxextcb0_tag_ {
+    /* Flags data and address value - see METAC_CATCH_VALUES in machine.h */
+    unsigned long CBFlags, CBAddr;
+    /* 64-bit data */
+    TBIDUAL CBData;
+    
+} TBICTXEXTCB0, *PTBICTXEXTCB0;
+
+/* Read pipeline state saved on later cores after single catch buffer slot */
+typedef struct _tbictxextrp6_tag_ {
+    /* RPMask is TXSTATUS_RPMASK_BITS only, reserved is undefined */
+    unsigned long RPMask, Reserved0;
+    TBIDUAL CBData[6];
+    
+} TBICTXEXTRP6, *PTBICTXEXTRP6;
+
+/* Extended thread state save areas - 8 DU register pairs */
+typedef struct _tbictxextbb8_tag_ {
+    /* Remaining Data unit registers in 64-bit pairs */
+    TBIDUAL UX[8];
+    
+} TBICTXEXTBB8, *PTBICTXEXTBB8;
+
+/* Extended thread state save areas - 3 AU register pairs */
+typedef struct _tbictxextbb3_tag_ {
+    /* Remaining Address unit registers in 64-bit pairs */
+    TBIDUAL UX[3];
+    
+} TBICTXEXTBB3, *PTBICTXEXTBB3;
+
+/* Extended thread state save areas - 4 AU register pairs or 4 FX pairs */
+typedef struct _tbictxextbb4_tag_ {
+    /* Remaining Address unit or FPU registers in 64-bit pairs */
+    TBIDUAL UX[4];
+    
+} TBICTXEXTBB4, *PTBICTXEXTBB4;
+
+/* Extended thread state save areas - Hardware loop states (max 2) */
+typedef struct _tbictxexthl2_tag_ {
+    /* Hardware looping register states */
+    TBIDUAL Start, End, Count;
+    
+} TBICTXEXTHL2, *PTBICTXEXTHL2;
+
+/* Extended thread state save areas - DSP register states */
+typedef struct _tbictxexttdp_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP > 32-bit accumulator bits 63:32 of ACX.0 (zero-extended) */
+    TBIDUAL Acc64[1];
+    /* Twiddle register state, and three phase increment states */
+    TBIDUAL PReg[4];
+    /* Modulo region size, padded to 64-bits */
+    int CurrMRSIZE, Reserved0;
+    
+} TBICTXEXTTDP, *PTBICTXEXTTDP;
+
+/* Extended thread state save areas - DSP register states including DSP RAM */
+typedef struct _tbictxexttdpr_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP 40-bit accumulator register state (Bits 39:8 of ACX.0) */
+    TBIDUAL Acc40[1];
+    /* DSP RAM Pointers */
+    TBIDUAL RP0[2],  WP0[2],  RP1[2],  WP1[2];
+    /* DSP RAM Increments */
+    TBIDUAL RPI0[2], WPI0[2], RPI1[2], WPI1[2];
+    /* Template registers */
+    unsigned long Tmplt[16];
+    /* Modulo address region size and DSP RAM module region sizes */
+    int CurrMRSIZE, CurrDRSIZE;
+    
+} TBICTXEXTTDPR, *PTBICTXEXTTDPR;
+
+#ifdef TBI_1_4
+/* The METAC_ID_CORE register state is a marker for the FPU
+   state that is then stored after this core header structure.  */
+#define TBICTXEXTFPU_CONFIG_MASK  ( (METAC_COREID_NOFPACC_BIT+     \
+                                     METAC_COREID_CFGFPU_BITS ) << \
+                                     METAC_COREID_CONFIG_BITS       )
+
+/* Recorded FPU exception state from TXDEFR in DefrFpu */
+#define TBICTXEXTFPU_DEFRFPU_MASK (TXDEFR_FPU_FE_BITS)
+
+/* Extended thread state save areas - FPU register states */
+typedef struct _tbictxextfpu_tag_ {
+    /* Stored METAC_CORE_ID CONFIG */
+    int CfgFpu;
+    /* Stored deferred TXDEFR bits related to FPU
+     *
+     * This is encoded as follows in order to fit into 16-bits:
+     * DefrFPU:15 - 14 <= 0
+     *        :13 -  8 <= TXDEFR:21-16
+     *        : 7 -  6 <= 0
+     *        : 5 -  0 <= TXDEFR:5-0
+     */
+    short DefrFpu;
+
+    /* TXMODE bits related to FPU */
+    short ModeFpu;
+    
+    /* FPU Even/Odd register states */
+    TBIDUAL FX[4];
+   
+    /* if CfgFpu & TBICTX_CFGFPU_FX16_BIT  -> 1 then TBICTXEXTBB4 holds FX.8-15 */
+    /* if CfgFpu & TBICTX_CFGFPU_NOACF_BIT -> 0 then TBICTXEXTFPACC holds state */
+} TBICTXEXTFPU, *PTBICTXEXTFPU;
+
+/* Extended thread state save areas - FPU accumulator state */
+typedef struct _tbictxextfpacc_tag_ {
+    /* FPU accumulator register state - three 64-bit parts */
+    TBIDUAL FAcc32[3];
+    
+} TBICTXEXTFPACC, *PTBICTXEXTFPACC;
+#endif
+
+/* Prototype TBI structure */
+struct _tbi_tag_ ;
+
+/* A 64-bit return value used commonly in the TBI APIs */
+typedef union _tbires_tag_ {
+    /* Save and load this value to get/set the whole result quickly */
+    long long Val;
+
+    /* Parameter of a fnSigs or __TBICtx* call */
+    struct _tbires_sig_tag_ { 
+        /* TXMASK[I] bits zeroed upto and including current trigger level */
+        unsigned short TrigMask;
+        /* Control bits for handlers - see PTBIAPIFN documentation below */
+        unsigned short SaveMask;
+        /* Pointer to the base register context save area of the thread */
+        PTBICTX pCtx;
+    } Sig;
+
+    /* Result of TBIThrdPrivId call */
+    struct _tbires_thrdprivid_tag_ {
+        /* Basic thread identifier; just TBID_THREAD_BITS */
+        int Id;
+        /* None thread number bits; TBID_ISTAT_BIT+TBID_PSTAT_BIT */
+        int Priv;
+    } Thrd;
+
+    /* Parameter and Result of a __TBISwitch call */
+    struct _tbires_switch_tag_ { 
+        /* Parameter passed across context switch */
+        void *pPara;
+        /* Thread context of other Thread includng restore flags */
+        PTBICTX pCtx;
+    } Switch;
+    
+    /* For extended S/W events only */
+    struct _tbires_ccb_tag_ {
+        void *pCCB;
+        int COff;
+    } CCB;
+
+    struct _tbires_tlb_tag_ {
+        int Leaf;  /* TLB Leaf data */
+        int Flags; /* TLB Flags */
+    } Tlb;
+
+#ifdef TBI_FASTINT_1_4
+    struct _tbires_intr_tag_ {
+      short    TrigMask;
+      short    SaveMask;
+      PTBICTX2 pCtx;
+    } Intr;
+#endif
+
+} TBIRES, *PTBIRES;
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* Prototype for all signal handler functions, called via ___TBISyncTrigger or
+   ___TBIASyncTrigger.
+   
+   State.Sig.TrigMask will indicate the bits set within TXMASKI at
+          the time of the handler call that have all been cleared to prevent
+          nested interrupt occuring immediately.
+   
+   State.Sig.SaveMask is a bit-mask which will be set to Zero when a trigger
+          occurs at background level and TBICTX_CRIT_BIT and optionally
+          TBICTX_CBUF_BIT when a trigger occurs at interrupt level.
+          
+          TBICTX_CBUF_BIT reflects the state of TXSTATUS_CBMARKER_BIT for
+          the interrupted background thread.
+   
+   State.Sig.pCtx will point at a TBICTX structure generated to hold the
+          critical state of the interrupted thread at interrupt level and
+          should be set to NULL when called at background level.
+        
+   Triggers will indicate the status of TXSTAT or TXSTATI sampled by the
+          code that called the handler.
+          
+   InstOrSWSId is defined firstly as 'Inst' if the SigNum is TBID_SIGNUM_SWx
+          and hold the actual SWITCH instruction detected, secondly if SigNum
+          is TBID_SIGNUM_SWS the 'SWSId' is defined to hold the Id of the
+          software signal detected, in other cases the value of this
+          parameter is undefined.
+   
+   pTBI   points at the PTBI structure related to the thread and processing
+          level involved.
+
+   TBIRES return value at both processing levels is similar in terms of any
+          changes that the handler makes. By default the State argument value
+          passed in should be returned.
+          
+      Sig.TrigMask value is bits to OR back into TXMASKI when the handler
+          completes to enable currently disabled interrupts.
+          
+      Sig.SaveMask value is ignored.
+   
+      Sig.pCtx is ignored.
+
+ */
+typedef TBIRES (*PTBIAPIFN)( TBIRES State, int SigNum,
+                             int Triggers, int InstOrSWSId,
+                             volatile struct _tbi_tag_ *pTBI );
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* The global memory map is described by a list of segment descriptors */
+typedef volatile struct _tbiseg_tag_ {
+    volatile struct _tbiseg_tag_ *pLink;
+    int Id;                           /* Id of the segment */
+    TBISPIN Lock;                     /* Spin-lock for struct (normally 0) */
+    unsigned int Bytes;               /* Size of region in bytes */
+    void *pGAddr;                     /* Base addr of region in global space */
+    void *pLAddr;                     /* Base addr of region in local space */
+    int Data[2];                      /* Segment specific data (may be extended) */
+
+} TBISEG, *PTBISEG;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Offsets of fields in TBISEG structure */
+#define TBISEG_pLink    ( 0)
+#define TBISEG_Id       ( 4)
+#define TBISEG_Lock     ( 8)
+#define TBISEG_Bytes    (12)
+#define TBISEG_pGAddr   (16)
+#define TBISEG_pLAddr   (20)
+#define TBISEG_Data     (24)
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbi_tag_ {
+    int SigMask;                      /* Bits set to represent S/W events */
+    PTBIKICK pKick;                   /* Kick addr for S/W events */
+    void *pCCB;                       /* Extended S/W events */
+    PTBISEG pSeg;                     /* Related segment structure */
+    PTBIAPIFN fnSigs[TBID_SIGNUM_MAX+1];/* Signal handler API table */
+} *PTBI, TBI;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBI */
+#define TBI_SigMask     (0)
+#define TBI_pKick       (4)
+#define TBI_pCCB        (8)
+#define TBI_pSeg       (12)
+#define TBI_fnSigs     (16)
+
+#ifdef TBI_1_4
+#ifndef __ASSEMBLY__
+/* This handler should be used for TBID_SIGNUM_DFR */
+extern TBIRES __TBIHandleDFR ( TBIRES State, int SigNum,
+                               int Triggers, int InstOrSWSId,
+                               volatile struct _tbi_tag_ *pTBI );
+#endif
+#endif
+
+/* String table entry - special values */
+#define METAG_TBI_STRS (0x5300) /* Tag      : If entry is valid */
+#define METAG_TBI_STRE (0x4500) /* Tag      : If entry is end of table */
+#define METAG_TBI_STRG (0x4700) /* Tag      : If entry is a gap */
+#define METAG_TBI_STRX (0x5A00) /* TransLen : If no translation present */
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbistr_tag_ {
+    short Bytes;                      /* Length of entry in Bytes */
+    short Tag;                        /* Normally METAG_TBI_STRS(0x5300) */
+    short Len;                        /* Length of the string entry (incl null) */
+    short TransLen;                   /* Normally METAG_TBI_STRX(0x5A00) */
+    char String[8];                   /* Zero terminated (may-be bigger) */
+
+} TBISTR, *PTBISTR;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Cache size information - available as fields of Data[1] of global heap
+   segment */
+#define METAG_TBI_ICACHE_SIZE_S    0             /* see comments below */
+#define METAG_TBI_ICACHE_SIZE_BITS 0x0000000F
+#define METAG_TBI_ICACHE_FILL_S    4
+#define METAG_TBI_ICACHE_FILL_BITS 0x000000F0
+#define METAG_TBI_DCACHE_SIZE_S    8
+#define METAG_TBI_DCACHE_SIZE_BITS 0x00000F00
+#define METAG_TBI_DCACHE_FILL_S    12
+#define METAG_TBI_DCACHE_FILL_BITS 0x0000F000
+
+/* METAG_TBI_xCACHE_SIZE
+   Describes the physical cache size rounded up to the next power of 2
+   relative to a 16K (2^14) cache. These sizes are encoded as a signed addend
+   to this base power of 2, for example
+      4K -> 2^12 -> -2  (i.e. 12-14)
+      8K -> 2^13 -> -1
+     16K -> 2^14 ->  0
+     32K -> 2^15 -> +1
+     64K -> 2^16 -> +2
+    128K -> 2^17 -> +3
+
+   METAG_TBI_xCACHE_FILL
+   Describes the physical cache size within the power of 2 area given by
+   the value above. For example a 10K cache may be represented as having
+   nearest size 16K with a fill of 10 sixteenths. This is encoded as the
+   number of unused 1/16ths, for example
+     0000 ->  0 -> 16/16
+     0001 ->  1 -> 15/16
+     0010 ->  2 -> 14/16
+     ...
+     1111 -> 15 ->  1/16
+ */
+
+#define METAG_TBI_CACHE_SIZE_BASE_LOG2 14
+
+/* Each declaration made by this macro generates a TBISTR entry */
+#ifndef __ASSEMBLY__
+#define TBISTR_DECL( Name, Str )                                       \
+    __attribute__ ((__section__ (".tbistr") )) const char Name[] = #Str
+#endif
+
+/* META timer values - see below for Timer support routines */
+#define TBI_TIMERWAIT_MIN (-16)         /* Minimum 'recommended' period */
+#define TBI_TIMERWAIT_MAX (-0x7FFFFFFF) /* Maximum 'recommended' period */
+
+#ifndef __ASSEMBLY__
+/* These macros allow direct access from C to any register known to the
+   assembler or defined in machine.h. Example candidates are TXTACTCYC,
+   TXIDLECYC, and TXPRIVEXT. Note that when higher level macros and routines
+   like the timer and trigger handling features below these should be used in
+   preference to this direct low-level access mechanism. */
+#define TBI_GETREG( Reg )                                  __extension__ ({\
+   int __GRValue;                                                          \
+   __asm__ volatile ("MOV\t%0," #Reg "\t/* (*TBI_GETREG OK) */" :          \
+                     "=r" (__GRValue) );                                   \
+    __GRValue;                                                            })
+
+#define TBI_SETREG( Reg, Value )                                       do {\
+   int __SRValue = Value;                                                  \
+   __asm__ volatile ("MOV\t" #Reg ",%0\t/* (*TBI_SETREG OK) */" :          \
+                     : "r" (__SRValue) );                       } while (0)
+
+#define TBI_SWAPREG( Reg, Value )                                      do {\
+   int __XRValue = (Value);                                                \
+   __asm__ volatile ("SWAP\t" #Reg ",%0\t/* (*TBI_SWAPREG OK) */" :        \
+                     "=r" (__XRValue) : "0" (__XRValue) );                 \
+   Value = __XRValue;                                           } while (0)
+
+/* Obtain and/or release global critical section lock given that interrupts
+   are already disabled and/or should remain disabled. */
+#define TBI_NOINTSCRITON                                             do {\
+   __asm__ volatile ("LOCK1\t\t/* (*TBI_NOINTSCRITON OK) */");} while (0)
+#define TBI_NOINTSCRITOFF                                             do {\
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_NOINTSCRITOFF OK) */");} while (0)
+/* Optimised in-lining versions of the above macros */
+
+#define TBI_LOCK( TrigState )                                          do {\
+   int __TRValue;                                                          \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_LOCK ... */\n\t"               \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK2\t\t/* ... */\n\t"                              \
+                     "SETD\t[%1+#0x40],D1RtP /* ... OK) */" :              \
+                     "=r&" (__TRValue) : "u" (__ALOCKHI) );                \
+   TrigState = __TRValue;                                       } while (0)
+#define TBI_CRITON( TrigState )                                        do {\
+   int __TRValue;                                                          \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_CRITON ... */\n\t"             \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK1\t\t/* ... OK) */" :                            \
+                     "=r" (__TRValue) );                                   \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_INTSX( TrigState )                                         do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("SWAP\t%0,TXMASKI\t/* (*TBI_INTSX OK) */" :           \
+                     "=r" (__TRValue) : "0" (__TRValue) );                 \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_UNLOCK( TrigState )                                        do {\
+   int __TRValue = TrigState;                                              \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("SETD\t[%1+#0x00],D1RtP\t/* (*TBI_UNLOCK ... */\n\t"  \
+                     "LOCK0\t\t/* ... */\n\t"                              \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue), "u" (__ALOCKHI) );      } while (0)
+
+#define TBI_CRITOFF( TrigState )                                       do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_CRITOFF ... */\n\t"                \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue) );                       } while (0)
+
+#define TBI_TRIGSX( SrcDst ) do { TBI_SWAPREG( TXMASK, SrcDst );} while (0)
+
+/* Composite macros to perform logic ops on INTS or TRIGS masks */
+#define TBI_INTSOR( Bits )                                              do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT |= (Bits); TBI_INTSX(__TT);                             } while (0)
+    
+#define TBI_INTSAND( Bits )                                             do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT &= (Bits); TBI_INTSX(__TT);                             } while (0)
+
+#ifdef TBI_1_4
+#define TBI_DEFRICTRLSOR( Bits )                                        do {\
+    int __TT = TBI_GETREG( CT.20 );                                         \
+    __TT |= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+    
+#define TBI_DEFRICTRLSAND( Bits )                                       do {\
+    int __TT = TBI_GETREG( TXDEFR );                                        \
+    __TT &= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+#endif
+
+#define TBI_TRIGSOR( Bits )                                             do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT |= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+    
+#define TBI_TRIGSAND( Bits )                                            do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT &= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+
+/* Macros to disable and re-enable interrupts using TBI_INTSX, deliberate
+   traps and exceptions can still be handled within the critical section. */
+#define TBI_STOPINTS( Value )                                           do {\
+    int __TT = TBI_GETREG( TXMASKI );                                       \
+    __TT &= TXSTATI_BGNDHALT_BIT; TBI_INTSX( __TT );                        \
+    Value = __TT;                                                } while (0)
+#define TBI_RESTINTS( Value )                                           do {\
+    int __TT = Value; TBI_INTSX( __TT );                         } while (0)
+
+/* Return pointer to segment list at current privilege level */
+PTBISEG __TBISegList( void );
+
+/* Search the segment list for a match given Id, pStart can be NULL */
+PTBISEG __TBIFindSeg( PTBISEG pStart, int Id );
+
+/* Prepare a new segment structure using space from within another */
+PTBISEG __TBINewSeg( PTBISEG pFromSeg, int Id, unsigned int Bytes );
+
+/* Prepare a new segment using any global or local heap segments available */
+PTBISEG __TBIMakeNewSeg( int Id, unsigned int Bytes );
+
+/* Insert a new segment into the segment list so __TBIFindSeg can locate it */
+void __TBIAddSeg( PTBISEG pSeg );
+#define __TBIADDSEG_DEF     /* Some versions failed to define this */
+
+/* Return Id of current thread; TBID_ISTAT_BIT+TBID_THREAD_BITS */
+int __TBIThreadId( void );
+
+/* Return TBIRES.Thrd data for current thread */
+TBIRES __TBIThrdPrivId( void );
+
+/* Return pointer to current threads TBI root block.
+   Id implies whether Int or Background root block is required */
+PTBI __TBI( int Id );
+
+/* Try to set Mask bit using the spin-lock protocol, return 0 if fails and 
+   new state if succeeds */
+int __TBIPoll( PTBISPIN pLock, int Mask );
+
+/* Set Mask bits via the spin-lock protocol in *pLock, return new state */
+int __TBISpin( PTBISPIN pLock, int Mask );
+
+/* Default handler set up for all TBI.fnSigs entries during initialisation */
+TBIRES __TBIUnExpXXX( TBIRES State, int SigNum,
+                   int Triggers, int Inst, PTBI pTBI );
+
+/* Call this routine to service triggers at background processing level. The
+   TBID_POLL_BIT of the Id parameter value will be used to indicate that the
+   routine should return if no triggers need to be serviced initially. If this
+   bit is not set the routine will block until one trigger handler is serviced
+   and then behave like the poll case servicing any remaining triggers
+   actually outstanding before returning. Normally the State parameter should
+   be simply initialised to zero and the result should be ignored, other
+   values/options are for internal use only. */
+TBIRES __TBISyncTrigger( TBIRES State, int Id );
+
+/* Call this routine to enable processing of triggers by signal handlers at
+   interrupt level. The State parameter value passed is returned by this
+   routine. The State.Sig.TrigMask field also specifies the initial
+   state of the interrupt mask register TXMASKI to be setup by the call.
+   The other parts of the State parameter are ignored unless the PRIV bit is
+   set in the SaveMask field. In this case the State.Sig.pCtx field specifies
+   the base of the stack to which the interrupt system should switch into
+   as it saves the state of the previously executing code. In the case the
+   thread will be unprivileged as it continues execution at the return
+   point of this routine and it's future state will be effectively never
+   trusted to be valid. */
+TBIRES __TBIASyncTrigger( TBIRES State );
+
+/* Call this to swap soft threads executing at the background processing level.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The saved
+   state of the previous thread will be stored in a TBICTX descriptor created
+   on it's stack and the address of this will be stored into the *rpSaveCtx
+   location specified. */
+TBIRES __TBISwitch( TBIRES NextThread, PTBICTX *rpSaveCtx );
+
+/* Call this to initialise a stack frame ready for further use, up to four
+   32-bit arguments may be specified after the fixed args to be passed via
+   the new stack pStack to the routine specified via fnMain. If the
+   main-line routine ever returns the thread will operate as if main itself
+   had returned and terminate with the return code given. */
+typedef int (*PTBIMAINFN)( TBIRES Arg /*, <= 4 additional 32-bit args */ );
+PTBICTX __TBISwitchInit( void *pStack, PTBIMAINFN fnMain, ... );
+
+/* Call this to resume a thread from a saved synchronous TBICTX state.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The context
+   of the calling thread is lost and this routine never returns to the
+   caller. The TrigsMask value supplied is ored into TXMASKI to enable
+   interrupts after the context of the new thread is established. */
+void __TBISyncResume( TBIRES NextThread, int TrigsMask );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtxSave( TBIRES State, void *pExt );
+void *__TBICtxRestore( TBIRES State, void *pExt );
+
+#ifdef TBI_1_4
+#ifdef TBI_FASTINT_1_4
+/* Call these routines to copy the GP state to a separate buffer
+ * Only necessary for context switching.
+ */
+PTBICTXGP __TBICtx2SaveCrit( PTBICTX2 pCurrentCtx, PTBICTX2 pSaveCtx );
+void *__TBICtx2SaveGP( PTBICTXGP pCurrentCtxGP, PTBICTXGP pSaveCtxGP );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtx2Save( PTBICTXGP pCtxGP, short SaveMask, void *pExt );
+void *__TBICtx2Restore( PTBICTX2 pCtx, short SaveMask, void *pExt );
+#endif
+
+/* If FPAC flag is set then significant FPU context exists. Call these routine
+   to save and restore it */
+void *__TBICtxFPUSave( TBIRES State, void *pExt );
+void *__TBICtxFPURestore( TBIRES State, void *pExt );
+
+#ifdef TBI_FASTINT_1_4
+extern void *__TBICtx2FPUSave (PTBICTXGP, short, void*);
+extern void *__TBICtx2FPURestore (PTBICTXGP, short, void*);
+#endif
+#endif
+
+#ifdef TBI_1_4
+/* Call these routines to save and restore DSPRAM. */
+void *__TBIDspramSaveA (short DspramSizes, void *pExt);
+void *__TBIDspramSaveB (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreA (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreB (short DspramSizes, void *pExt);
+#endif
+
+/* This routine should be used at the entrypoint of interrupt handlers to
+   re-enable higher priority interrupts and/or save state from the previously
+   executing background code. State is a TBIRES.Sig parameter with NoNestMask
+   indicating the triggers (if any) that should remain disabled and SaveMask
+   CBUF bit indicating the if the hardware catch buffer is dirty. Optionally
+   any number of extended state bits X??? including XCBF can be specified to
+   force a nested state save call to __TBICtxSave before the current routine
+   continues. (In the latter case __TBICtxRestore should be called to restore
+   any extended states before the background thread of execution is resumed) 
+   
+   By default (no X??? bits specified in SaveMask) this routine performs a
+   sub-call to __TBICtxSave with the pExt and State parameters specified IF
+   some triggers could be serviced while the current interrupt handler
+   executes and the hardware catch buffer is actually dirty. In this case
+   this routine provides the XCBF bit in State.Sig.SaveMask to force the
+   __TBICtxSave to extract the current catch state.
+   
+   The NoNestMask parameter should normally indicate that the same or lower
+   triggers than those provoking the current handler call should not be
+   serviced in nested calls, zero may be specified if all possible interrupts
+   are to be allowed.
+   
+   The TBIRES.Sig value returned will be similar to the State parameter
+   specified with the XCBF bit ORed into it's SaveMask if a context save was
+   required and fewer bits set in it's TrigMask corresponding to the same/lower
+   priority interrupt triggers still not enabled. */
+TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask );
+
+/* This routine causes the TBICTX structure specified in State.Sig.pCtx to
+   be restored. This implies that execution will not return to the caller.
+   The State.Sig.TrigMask field will be restored during the context switch
+   such that any immediately occuring interrupts occur in the context of the
+   newly specified task. The State.Sig.SaveMask parameter is ignored. */
+void __TBIASyncResume( TBIRES State );
+
+/* Call this routine to enable fastest possible processing of one or more
+   interrupt triggers via a unified signal handler. The handler concerned
+   must simple return after servicing the related hardware.
+   The State.Sig.TrigMask parameter indicates the interrupt triggers to be
+   enabled and the Thin.Thin.fnHandler specifies the routine to call and
+   the whole Thin parameter value will be passed to this routine unaltered as
+   it's first parameter. */
+void __TBIASyncThin( TBIRES State, TBIRES Thin );
+
+/* Do this before performing your own direct spin-lock access - use TBI_LOCK */
+int __TBILock( void );
+
+/* Do this after performing your own direct spin-lock access - use TBI_UNLOCK */
+void __TBIUnlock( int TrigState );
+
+/* Obtain and release global critical section lock - only stops execution
+   of interrupts on this thread and similar critical section code on other
+   local threads - use TBI_CRITON or TBI_CRITOFF */
+int __TBICritOn( void );
+void __TBICritOff( int TrigState );
+
+/* Change INTS (TXMASKI) - return old state - use TBI_INTSX */
+int __TBIIntsX( int NewMask );
+
+/* Change TRIGS (TXMASK) - return old state - use TBI_TRIGSX */
+int __TBITrigsX( int NewMask );
+
+/* This function initialises a timer for first use, only the TBID_ISTAT_BIT
+   of the Id parameter is used to indicate which timer is to be modified. The
+   Wait value should either be zero to disable the timer concerned or be in
+   the recommended TBI_TIMERWAIT_* range to specify the delay required before
+   the first timer trigger occurs.
+      
+   The TBID_ISTAT_BIT of the Id parameter similar effects all other timer
+   support functions (see below). */
+void __TBITimerCtrl( int Id, int Wait );
+
+/* This routine returns a 64-bit time stamp value that is initialised to zero
+   via a __TBITimerCtrl timer enabling call. */
+long long __TBITimeStamp( int Id );
+
+/* To manage a periodic timer each period elapsed should be subracted from
+   the current timer value to attempt to set up the next timer trigger. The
+   Wait parameter should be a value in the recommended TBI_TIMERWAIT_* range.
+   The return value is the new aggregate value that the timer was updated to,
+   if this is less than zero then a timer trigger is guaranteed to be
+   generated after the number of ticks implied, if a positive result is
+   returned either itterative or step-wise corrective action must be taken to
+   resynchronise the timer and hence provoke a future timer trigger. */
+int __TBITimerAdd( int Id, int Wait );
+
+/* String table search function, pStart is first entry to check or NULL,
+   pStr is string data to search for and MatchLen is either length of string
+   to compare for an exact match or negative length to compare for partial
+   match. */
+const TBISTR *__TBIFindStr( const TBISTR *pStart,
+                            const char *pStr, int MatchLen );
+
+/* String table translate function, pStr is text to translate and Len is
+   it's length. Value returned may not be a string pointer if the
+   translation value is really some other type, 64-bit alignment of the return
+   pointer is guaranteed so almost any type including a structure could be
+   located with this routine. */ 
+const void *__TBITransStr( const char *pStr, int Len );
+
+
+
+/* Arbitrary physical memory access windows, use different Channels to avoid
+   conflict/thrashing within a single piece of code. */
+void *__TBIPhysAccess( int Channel, int PhysAddr, int Bytes );
+void __TBIPhysRelease( int Channel, void *pLinAddr );
+
+#ifdef METAC_1_0
+/* Data cache function nullified because data cache is off */
+#define TBIDCACHE_FLUSH( pAddr )
+#define TBIDCACHE_PRELOAD( Type, pAddr ) ((Type) (pAddr))
+#define TBIDCACHE_REFRESH( Type, pAddr ) ((Type) (pAddr))
+#endif
+#ifdef METAC_1_1
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( pAddr )          ((volatile char *) \
+                 (((unsigned int) (pAddr))>>LINSYSLFLUSH_S))[0] = 0
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                   __extension__ ({ \
+  Type __addr = (Type)(Addr);                                              \
+  (void)__builtin_dcache_refresh ((void *)(((unsigned int)(__addr))>>6));  \
+  __addr; })
+
+#endif
+#ifndef METAC_1_0
+#ifndef METAC_1_1
+/* Support for DCACHE builtin */
+extern void __builtin_dcache_flush (void *);
+
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( Addr )                                            \
+  __builtin_dcache_flush ((void *)(Addr))
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                                    \
+  ((Type) __builtin_dcache_refresh ((void *)(Addr)))
+
+#endif
+#endif
+
+/* Flush the MMCU cache */
+#define TBIMCACHE_FLUSH() { ((volatile int *) LINSYSCFLUSH_MMCU)[0] = 0; }
+
+#ifdef METAC_2_1
+/* Obtain the MMU table entry for the specified address */
+#define TBIMTABLE_LEAFDATA(ADDR) TBIXCACHE_RD((int)(ADDR) & (-1<<6))
+
+#ifndef __ASSEMBLY__
+/* Obtain the full MMU table entry for the specified address */
+#define TBIMTABLE_DATA(ADDR) __extension__ ({ TBIRES __p;                     \
+                                              __p.Val = TBIXCACHE_RL((int)(ADDR) & (-1<<6));   \
+                                              __p; })
+#endif
+#endif
+
+/* Combine a physical base address, and a linear address
+ * Internal use only
+ */
+#define _TBIMTABLE_LIN2PHYS(PHYS, LIN, LMASK) (void*)(((int)(PHYS)&0xFFFFF000)\
+                                               +((int)(LIN)&(LMASK)))
+
+/* Convert a linear to a physical address */
+#define TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR)                                    \
+          (((LEAFDATA) & CRLINPHY0_VAL_BIT)                                   \
+              ? _TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR, 0x00000FFF)               \
+              : 0)
+
+/* Debug support - using external debugger or host */
+void __TBIDumpSegListEntries( void );
+void __TBILogF( const char *pFmt, ... );
+void __TBIAssert( const char *pFile, int LineNum, const char *pExp );
+void __TBICont( const char *pMsg, ... ); /* TBIAssert -> 'wait for continue' */
+
+/* Array of signal name data for debug messages */
+extern const char __TBISigNames[];
+#endif /* ifndef __ASSEMBLY__ */
+
+
+
+/* Scale of sub-strings in the __TBISigNames string list */
+#define TBI_SIGNAME_SCALE   4
+#define TBI_SIGNAME_SCALE_S 2
+
+#define TBI_1_3 
+
+#ifdef TBI_1_3
+
+#ifndef __ASSEMBLY__
+#define TBIXCACHE_RD(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    int __Data;                                                            \
+    __asm__ volatile ( "CACHERD\t%0,[%1+#0]" :                             \
+                       "=r" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_RL(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    long long __Data;                                                      \
+    __asm__ volatile ( "CACHERL\t%0,%t0,[%1+#0]" :                         \
+                       "=d" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_WD(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    int __Data = DATA;                                                    \
+    __asm__ volatile ( "CACHEWD\t[%0+#0],%1" :                            \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#define TBIXCACHE_WL(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    long long __Data = DATA;                                              \
+    __asm__ volatile ( "CACHEWL\t[%0+#0],%1,%t1" :                        \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#ifdef TBI_4_0
+
+#define TBICACHE_FLUSH_L1D_L2(ADDR)                                       \
+  TBIXCACHE_WD(ADDR, CACHEW_FLUSH_L1D_L2)
+#define TBICACHE_WRITEBACK_L1D_L2(ADDR)                                   \
+  TBIXCACHE_WD(ADDR, CACHEW_WRITEBACK_L1D_L2)
+#define TBICACHE_INVALIDATE_L1D(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D)
+#define TBICACHE_INVALIDATE_L1D_L2(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D_L2)
+#define TBICACHE_INVALIDATE_L1DTLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1DTLB)
+#define TBICACHE_INVALIDATE_L1I(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1I)
+#define TBICACHE_INVALIDATE_L1ITLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1ITLB)
+
+#endif /* TBI_4_0 */
+#endif /* ifndef __ASSEMBLY__ */
+
+/* 
+ * Calculate linear PC value from real PC and Minim mode control, the LSB of
+ * the result returned indicates if address compression has occured.
+ */
+#ifndef __ASSEMBLY__
+#define METAG_LINPC( PCVal )                                              (\
+    ( (TBI_GETREG(TXPRIVEXT) & TXPRIVEXT_MINIMON_BIT) != 0 ) ?           ( \
+        ( ((PCVal) & 0x00900000) == 0x00900000 ) ?                         \
+          (((PCVal) & 0xFFE00000) + (((PCVal) & 0x001FFFFC)>>1) + 1) :     \
+        ( ((PCVal) & 0x00800000) == 0x00000000 ) ?                         \
+          (((PCVal) & 0xFF800000) + (((PCVal) & 0x007FFFFC)>>1) + 1) :     \
+                                                             (PCVal)   )   \
+                                                                 : (PCVal) )
+#define METAG_LINPC_X2BIT 0x00000001       /* Make (Size>>1) if compressed */
+
+/* Convert an arbitrary Linear address into a valid Minim PC or return 0 */
+#define METAG_PCMINIM( LinVal )                                           (\
+        (((LinVal) & 0x00980000) == 0x00880000) ?                          \
+            (((LinVal) & 0xFFE00000) + (((LinVal) & 0x000FFFFE)<<1)) :     \
+        (((LinVal) & 0x00C00000) == 0x00000000) ?                          \
+            (((LinVal) & 0xFF800000) + (((LinVal) & 0x003FFFFE)<<1)) : 0   )
+
+/* Reverse a METAG_LINPC conversion step to return the original PCVal */
+#define METAG_PCLIN( LinVal )                              ( 0xFFFFFFFC & (\
+        ( (LinVal & METAG_LINPC_X2BIT) != 0 ) ? METAG_PCMINIM( LinVal ) :  \
+                                                               (LinVal)   ))
+
+/*
+ * Flush the MMCU Table cache privately for each thread. On cores that do not
+ * support per-thread flushing it will flush all threads mapping data.
+ */
+#define TBIMCACHE_TFLUSH(Thread)                                   do {\
+    ((volatile int *)( LINSYSCFLUSH_TxMMCU_BASE            +           \
+                      (LINSYSCFLUSH_TxMMCU_STRIDE*(Thread)) ))[0] = 0; \
+                                                             } while(0)
+
+/*
+ * To flush a single linear-matched cache line from the code cache. In
+ * cases where Minim is possible the METAC_LINPC operation must be used
+ * to pre-process the address being flushed.
+ */
+#define TBIICACHE_FLUSH( pAddr ) TBIXCACHE_WD (pAddr, CACHEW_ICACHE_BIT)
+
+/* To flush a single linear-matched mapping from code/data MMU table cache */
+#define TBIMCACHE_AFLUSH( pAddr, SegType )                                \
+    TBIXCACHE_WD(pAddr, CACHEW_TLBFLUSH_BIT + (                           \
+                 ((SegType) == TBID_SEGTYPE_TEXT) ? CACHEW_ICACHE_BIT : 0 ))
+
+/*
+ * To flush translation data corresponding to a range of addresses without
+ * using TBITCACHE_FLUSH to flush all of this threads translation data. It
+ * is necessary to know what stride (>= 4K) must be used to flush a specific
+ * region.
+ *
+ * For example direct mapped regions use the maximum page size (512K) which may
+ * mean that only one flush is needed to cover the sub-set of the direct
+ * mapped area used since it was setup.
+ *
+ * The function returns the stride on which flushes should be performed.
+ *
+ * If 0 is returned then the region is not subject to MMU caching, if -1 is
+ * returned then this indicates that only TBIMCACHE_TFLUSH can be used to
+ * flush the region concerned rather than TBIMCACHE_AFLUSH which this
+ * function is designed to support.
+ */
+int __TBIMMUCacheStride( const void *pStart, int Bytes );
+
+/*
+ * This function will use the above lower level functions to achieve a MMU
+ * table data flush in an optimal a fashion as possible. On a system that
+ * supports linear address based caching this function will also call the
+ * code or data cache flush functions to maintain address/data coherency.
+ *
+ * SegType should be TBID_SEGTYPE_TEXT if the address range is for code or
+ * any other value such as TBID_SEGTYPE_DATA for data. If an area is
+ * used in both ways then call this function twice; once for each.
+ */
+void __TBIMMUCacheFlush( const void *pStart, int Bytes, int SegType );
+
+/*
+ * Cached Core mode setup and flush functions allow one code and one data
+ * region of the corresponding global or local cache partion size to be
+ * locked into the corresponding cache memory. This prevents normal LRU
+ * logic discarding the code or data and avoids write-thru bandwidth in
+ * data areas. Code mappings are selected by specifying TBID_SEGTYPE_TEXT
+ * for SegType, otherwise data mappings are created.
+ * 
+ * Mode supplied should always contain the VALID bit and WINx selection data.
+ * Data areas will be mapped read-only if the WRITE bit is not added.
+ *
+ * The address returned by the Opt function will either be the same as that
+ * passed in (if optimisation cannot be supported) or the base of the new core
+ * cached region in linear address space. The returned address must be passed
+ * into the End function to remove the mapping when required. If a non-core
+ * cached memory address is passed into it the End function has no effect.
+ * Note that the region accessed MUST be flushed from the appropriate cache
+ * before the End function is called to deliver correct operation.
+ */
+void *__TBICoreCacheOpt( const void *pStart, int Bytes, int SegType, int Mode );
+void __TBICoreCacheEnd( const void *pOpt, int Bytes, int SegType );
+
+/*
+ * Optimise physical access channel and flush side effects before releasing
+ * the channel. If pStart is NULL the whole region must be flushed and this is
+ * done automatically by the channel release function if optimisation is
+ * enabled. Flushing the specific region that may have been accessed before
+ * release should optimises this process. On physically cached systems we do
+ * not flush the code/data caches only the MMU table data needs flushing.
+ */
+void __TBIPhysOptim( int Channel, int IMode, int DMode );
+void __TBIPhysFlush( int Channel, const void *pStart, int Bytes );
+#endif
+#endif /* ifdef TBI_1_3 */
+
+#endif /* _ASM_METAG_TBX_H_ */
diff --git a/arch/metag/include/asm/tcm.h b/arch/metag/include/asm/tcm.h
new file mode 100644
index 0000000..7711c31
--- /dev/null
+++ b/arch/metag/include/asm/tcm.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_TCM_H__
+#define __ASM_TCM_H__
+
+#include <linux/ioport.h>
+#include <linux/list.h>
+
+struct tcm_allocation {
+	struct list_head list;
+	unsigned int tag;
+	unsigned long addr;
+	unsigned long size;
+};
+
+/*
+ * TCM memory region descriptor.
+ */
+struct tcm_region {
+	unsigned int tag;
+	struct resource res;
+};
+
+#define TCM_INVALID_TAG	0xffffffff
+
+unsigned long tcm_alloc(unsigned int tag, size_t len);
+void tcm_free(unsigned int tag, unsigned long addr, size_t len);
+unsigned int tcm_lookup_tag(unsigned long p);
+
+int tcm_add_region(struct tcm_region *reg);
+
+#endif
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
new file mode 100644
index 0000000..0ecd34d
--- /dev/null
+++ b/arch/metag/include/asm/thread_info.h
@@ -0,0 +1,155 @@
+/* thread_info.h: Meta low-level thread information
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ *
+ * Meta port by Imagination Technologies
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/compiler.h>
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must
+ *   also be changed
+ */
+#ifndef __ASSEMBLY__
+
+/* This must be 8 byte aligned so we can ensure stack alignment. */
+struct thread_info {
+	struct task_struct *task;	/* main task structure */
+	struct exec_domain *exec_domain;	/* execution domain */
+	unsigned long flags;	/* low level flags */
+	unsigned long status;	/* thread-synchronous flags */
+	u32 cpu;		/* current CPU */
+	int preempt_count;	/* 0 => preemptable, <0 => BUG */
+
+	mm_segment_t addr_limit;	/* thread address space */
+	struct restart_block restart_block;
+
+	u8 supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <generated/asm-offsets.h>
+
+#endif
+
+#define PREEMPT_ACTIVE		0x10000000
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SHIFT		12
+#else
+#define THREAD_SHIFT		13
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER	0
+#endif
+
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#define STACK_WARN		(THREAD_SIZE/8)
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/* how to get the current stack pointer from C */
+register unsigned long current_stack_pointer asm("A0StP") __used;
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+	return (struct thread_info *)(current_stack_pointer &
+				      ~(THREAD_SIZE - 1));
+}
+
+#define __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	return addr == (void *) (((unsigned long) addr & ~(THREAD_SIZE - 1))
+				 + sizeof(struct thread_info));
+}
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ *   access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_SINGLESTEP		3	/* restore singlestep on return to user
+					   mode */
+#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_SECCOMP		5	/* secure computing */
+#define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME	7	/* callback before returning to user */
+#define TIF_POLLING_NRFLAG      8	/* true if poll_idle() is polling
+					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		9	/* is terminating due to OOM killer */
+#define TIF_SYSCALL_TRACEPOINT  10	/* syscall tracepoint instrumentation */
+
+
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
+
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK	(_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+				 _TIF_SYSCALL_TRACEPOINT)
+
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK	(_TIF_SYSCALL_TRACE | _TIF_SIGPENDING      | \
+				 _TIF_NEED_RESCHED  | _TIF_SYSCALL_AUDIT   | \
+				 _TIF_SINGLESTEP    | _TIF_RESTORE_SIGMASK | \
+				 _TIF_NOTIFY_RESUME)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK		(_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
+
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/metag/include/asm/tlb.h b/arch/metag/include/asm/tlb.h
new file mode 100644
index 0000000..048282f1
--- /dev/null
+++ b/arch/metag/include/asm/tlb.h
@@ -0,0 +1,36 @@
+#ifndef __ASM_METAG_TLB_H
+#define __ASM_METAG_TLB_H
+
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* Note, read http://lkml.org/lkml/2004/1/15/6 */
+
+#ifdef CONFIG_METAG_META12
+
+#define tlb_start_vma(tlb, vma)						      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_cache_range(vma, vma->vm_start, vma->vm_end);   \
+	} while (0)
+
+#define tlb_end_vma(tlb, vma)						      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);     \
+	} while (0)
+
+
+#else
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+
+#endif
+
+#define __tlb_remove_tlb_entry(tlb, pte, addr)	do { } while (0)
+#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif
diff --git a/arch/metag/include/asm/tlbflush.h b/arch/metag/include/asm/tlbflush.h
new file mode 100644
index 0000000..566acf9
--- /dev/null
+++ b/arch/metag/include/asm/tlbflush.h
@@ -0,0 +1,77 @@
+#ifndef __ASM_METAG_TLBFLUSH_H
+#define __ASM_METAG_TLBFLUSH_H
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <asm/metag_mem.h>
+#include <asm/pgalloc.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * FIXME: Meta 2 can flush single TLB entries.
+ *
+ */
+
+#if defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP)
+static inline void __flush_tlb(void)
+{
+	/* flush TLB entries for just the current hardware thread */
+	int thread = hard_processor_id();
+	metag_out32(0, (LINSYSCFLUSH_TxMMCU_BASE +
+			LINSYSCFLUSH_TxMMCU_STRIDE * thread));
+}
+#else
+static inline void __flush_tlb(void)
+{
+	/* flush TLB entries for all hardware threads */
+	metag_out32(0, LINSYSCFLUSH_MMCU);
+}
+#endif /* defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) */
+
+#define flush_tlb() __flush_tlb()
+
+#define flush_tlb_all() __flush_tlb()
+
+#define local_flush_tlb_all() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* __ASM_METAG_TLBFLUSH_H */
+
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
new file mode 100644
index 0000000..23f5118
--- /dev/null
+++ b/arch/metag/include/asm/topology.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_METAG_TOPOLOGY_H
+#define _ASM_METAG_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+/* sched_domains SD_NODE_INIT for Meta machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 2,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_FORK	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_SERIALIZE,		\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+#define cpu_to_node(cpu)	((void)(cpu), 0)
+#define parent_node(node)	((void)(node), 0)
+
+#define cpumask_of_node(node)	((void)node, cpu_online_mask)
+
+#define pcibus_to_node(bus)	((void)(bus), -1)
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ? \
+					cpu_all_mask : \
+					cpumask_of_node(pcibus_to_node(bus)))
+
+#endif
+
+#define mc_capable()    (1)
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+#define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_METAG_TOPOLOGY_H */
diff --git a/arch/metag/include/asm/traps.h b/arch/metag/include/asm/traps.h
new file mode 100644
index 0000000..ac80874
--- /dev/null
+++ b/arch/metag/include/asm/traps.h
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (C) 2005,2008 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _METAG_TBIVECTORS_H
+#define _METAG_TBIVECTORS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/tbx.h>
+
+typedef TBIRES (*kick_irq_func_t)(TBIRES, int, int, int, PTBI, int *);
+
+extern TBIRES kick_handler(TBIRES, int, int, int, PTBI);
+struct kick_irq_handler {
+	struct list_head list;
+	kick_irq_func_t func;
+};
+
+extern void kick_register_func(struct kick_irq_handler *);
+extern void kick_unregister_func(struct kick_irq_handler *);
+
+extern void head_end(TBIRES, unsigned long);
+extern void restart_critical_section(TBIRES State);
+extern TBIRES tail_end_sys(TBIRES, int, int *);
+static inline TBIRES tail_end(TBIRES state)
+{
+	return tail_end_sys(state, -1, NULL);
+}
+
+DECLARE_PER_CPU(PTBI, pTBI);
+extern PTBI pTBI_get(unsigned int);
+
+extern int ret_from_fork(TBIRES arg);
+
+extern int do_page_fault(struct pt_regs *regs, unsigned long address,
+			 unsigned int write_access, unsigned int trapno);
+
+extern TBIRES __TBIUnExpXXX(TBIRES State, int SigNum, int Triggers, int Inst,
+			    PTBI pTBI);
+
+#endif
+
+#endif /* _METAG_TBIVECTORS_H */
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
new file mode 100644
index 0000000..0748b0a
--- /dev/null
+++ b/arch/metag/include/asm/uaccess.h
@@ -0,0 +1,241 @@
+#ifndef __METAG_UACCESS_H
+#define __METAG_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)  ((mm_segment_t) { (s) })
+
+#define KERNEL_DS       MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()        (current_thread_info()->addr_limit)
+#define set_fs(x)       (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)	((a).seg == (b).seg)
+
+#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
+/*
+ * Explicitly allow NULL pointers here. Parts of the kernel such
+ * as readv/writev use access_ok to validate pointers, but want
+ * to allow NULL pointers for various reasons. NULL pointers are
+ * safe to allow through because the first page is not mappable on
+ * Meta.
+ *
+ * We also wish to avoid letting user code access the system area
+ * and the kernel half of the address space.
+ */
+#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
+				((addr) > PAGE_OFFSET &&		\
+				 (addr) < LINCORE_BASE))
+
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+	return __kernel_ok || !__user_bad(addr, size);
+}
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),	\
+						(unsigned long)(size))
+
+static inline int verify_area(int type, const void *addr, unsigned long size)
+{
+	return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define put_user(x, ptr) \
+	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user(x, ptr) \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+
+extern void __put_user_bad(void);
+
+#define __put_user_nocheck(x, ptr, size)		\
+({                                                      \
+	long __pu_err;                                  \
+	__put_user_size((x), (ptr), (size), __pu_err);	\
+	__pu_err;                                       \
+})
+
+#define __put_user_check(x, ptr, size)				\
+({                                                              \
+	long __pu_err = -EFAULT;                                \
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
+	if (access_ok(VERIFY_WRITE, __pu_addr, size))		\
+		__put_user_size((x), __pu_addr, (size), __pu_err);	\
+	__pu_err;                                               \
+})
+
+extern long __put_user_asm_b(unsigned int x, void __user *addr);
+extern long __put_user_asm_w(unsigned int x, void __user *addr);
+extern long __put_user_asm_d(unsigned int x, void __user *addr);
+extern long __put_user_asm_l(unsigned long long x, void __user *addr);
+
+#define __put_user_size(x, ptr, size, retval)			\
+do {                                                            \
+	retval = 0;                                             \
+	switch (size) {                                         \
+	case 1:								\
+		retval = __put_user_asm_b((unsigned int)x, ptr); break;	\
+	case 2:								\
+		retval = __put_user_asm_w((unsigned int)x, ptr); break;	\
+	case 4:								\
+		retval = __put_user_asm_d((unsigned int)x, ptr); break;	\
+	case 8:								\
+		retval = __put_user_asm_l((unsigned long long)x, ptr); break; \
+	default:							\
+		__put_user_bad();					\
+	}								\
+} while (0)
+
+#define get_user(x, ptr) \
+	__get_user_check((x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __get_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size)			\
+({                                                              \
+	long __gu_err, __gu_val;                                \
+	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;                     \
+	__gu_err;                                               \
+})
+
+#define __get_user_check(x, ptr, size)					\
+({                                                                      \
+	long __gu_err = -EFAULT, __gu_val = 0;                          \
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
+	if (access_ok(VERIFY_READ, __gu_addr, size))			\
+		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;                             \
+	__gu_err;                                                       \
+})
+
+extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
+extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
+extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+
+#define __get_user_size(x, ptr, size, retval)			\
+do {                                                            \
+	retval = 0;                                             \
+	switch (size) {                                         \
+	case 1:							\
+		x = __get_user_asm_b(ptr, &retval); break;	\
+	case 2:							\
+		x = __get_user_asm_w(ptr, &retval); break;	\
+	case 4:							\
+		x = __get_user_asm_d(ptr, &retval); break;	\
+	default:						\
+		(x) = __get_user_bad();				\
+	}                                                       \
+} while (0)
+
+/*
+ * Copy a null terminated string from userspace.
+ *
+ * Must return:
+ * -EFAULT		for an exception
+ * count		if we hit the buffer limit
+ * bytes copied		if we hit a null byte
+ * (without the null byte)
+ */
+
+extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
+					     long count);
+
+#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+extern long __must_check strnlen_user(const char __user *src, long count);
+
+#define strlen_user(str) strnlen_user(str, 32767)
+
+extern unsigned long __must_check __copy_user_zeroing(void *to,
+						      const void __user *from,
+						      unsigned long n);
+
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		return __copy_user_zeroing(to, from, n);
+	return n;
+}
+
+#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user_inatomic __copy_from_user
+
+extern unsigned long __must_check __copy_user(void __user *to,
+					      const void *from,
+					      unsigned long n);
+
+static inline unsigned long copy_to_user(void __user *to, const void *from,
+					 unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		return __copy_user(to, from, n);
+	return n;
+}
+
+#define __copy_to_user(to, from, n) __copy_user(to, from, n)
+#define __copy_to_user_inatomic __copy_to_user
+
+/*
+ * Zero Userspace
+ */
+
+extern unsigned long __must_check __do_clear_user(void __user *to,
+						  unsigned long n);
+
+static inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		return __do_clear_user(to, n);
+	return n;
+}
+
+#define __clear_user(to, n)            __do_clear_user(to, n)
+
+#endif /* _METAG_UACCESS_H */
diff --git a/arch/metag/include/asm/unistd.h b/arch/metag/include/asm/unistd.h
new file mode 100644
index 0000000..32955a1
--- /dev/null
+++ b/arch/metag/include/asm/unistd.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <uapi/asm/unistd.h>
+
+#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/metag/include/asm/user_gateway.h b/arch/metag/include/asm/user_gateway.h
new file mode 100644
index 0000000..e404c09
--- /dev/null
+++ b/arch/metag/include/asm/user_gateway.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_USER_GATEWAY_H
+#define __ASM_METAG_USER_GATEWAY_H
+
+#include <asm/page.h>
+
+/* Page of kernel code accessible to userspace. */
+#define USER_GATEWAY_PAGE	0x6ffff000
+/* Offset of TLS pointer array in gateway page. */
+#define USER_GATEWAY_TLS	0x100
+
+#ifndef __ASSEMBLY__
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+/* Kernel mapping of the gateway page. */
+extern void *gateway_page;
+
+static inline void set_gateway_tls(void __user *tls_ptr)
+{
+	void **gateway_tls = (void **)(gateway_page + USER_GATEWAY_TLS +
+				       hard_processor_id() * 4);
+
+	*gateway_tls = (__force void *)tls_ptr;
+#ifdef CONFIG_METAG_META12
+	/* Avoid cache aliases on virtually tagged cache. */
+	__builtin_dcache_flush((void *)USER_GATEWAY_PAGE + USER_GATEWAY_TLS +
+				       hard_processor_id() * sizeof(void *));
+#endif
+}
+
+extern int __kuser_get_tls(void);
+extern char *__kuser_get_tls_end[];
+
+extern int __kuser_cmpxchg(int, int, unsigned long *);
+extern char *__kuser_cmpxchg_end[];
+
+#endif
+
+#endif
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
new file mode 100644
index 0000000..876c71f
--- /dev/null
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -0,0 +1,13 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += byteorder.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += swab.h
+header-y += unistd.h
+
+generic-y += mman.h
+generic-y += setup.h
diff --git a/arch/metag/include/uapi/asm/byteorder.h b/arch/metag/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000..9558416
--- /dev/null
+++ b/arch/metag/include/uapi/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..45d9780
--- /dev/null
+++ b/arch/metag/include/uapi/asm/ptrace.h
@@ -0,0 +1,113 @@
+#ifndef _UAPI_METAG_PTRACE_H
+#define _UAPI_METAG_PTRACE_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * These are the layouts of the regsets returned by the GETREGSET ptrace call
+ */
+
+/* user_gp_regs::status */
+
+/* CBMarker bit (indicates catch state / catch replay) */
+#define USER_GP_REGS_STATUS_CATCH_BIT		(1 << 22)
+#define USER_GP_REGS_STATUS_CATCH_S		22
+/* LSM_STEP field (load/store multiple step) */
+#define USER_GP_REGS_STATUS_LSM_STEP_BITS	(0x7 << 8)
+#define USER_GP_REGS_STATUS_LSM_STEP_S		8
+/* SCC bit (indicates split 16x16 condition flags) */
+#define USER_GP_REGS_STATUS_SCC_BIT		(1 << 4)
+#define USER_GP_REGS_STATUS_SCC_S		4
+
+/* normal condition flags */
+/* CF_Z bit (Zero flag) */
+#define USER_GP_REGS_STATUS_CF_Z_BIT		(1 << 3)
+#define USER_GP_REGS_STATUS_CF_Z_S		3
+/* CF_N bit (Negative flag) */
+#define USER_GP_REGS_STATUS_CF_N_BIT		(1 << 2)
+#define USER_GP_REGS_STATUS_CF_N_S		2
+/* CF_V bit (oVerflow flag) */
+#define USER_GP_REGS_STATUS_CF_V_BIT		(1 << 1)
+#define USER_GP_REGS_STATUS_CF_V_S		1
+/* CF_C bit (Carry flag) */
+#define USER_GP_REGS_STATUS_CF_C_BIT		(1 << 0)
+#define USER_GP_REGS_STATUS_CF_C_S		0
+
+/* split 16x16 condition flags */
+/* SCF_LZ bit (Low Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_LZ_BIT		(1 << 3)
+#define USER_GP_REGS_STATUS_SCF_LZ_S		3
+/* SCF_HZ bit (High Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_HZ_BIT		(1 << 2)
+#define USER_GP_REGS_STATUS_SCF_HZ_S		2
+/* SCF_HC bit (High Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_HC_BIT		(1 << 1)
+#define USER_GP_REGS_STATUS_SCF_HC_S		1
+/* SCF_LC bit (Low Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_LC_BIT		(1 << 0)
+#define USER_GP_REGS_STATUS_SCF_LC_S		0
+
+/**
+ * struct user_gp_regs - User general purpose registers
+ * @dx:		GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7})
+ * @ax:		GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3})
+ * @pc:		PC register
+ * @status:	TXSTATUS register (condition flags, LSM_STEP etc)
+ * @rpt:	TXRPT registers (branch repeat counter)
+ * @bpobits:	TXBPOBITS register ("branch prediction other" bits)
+ * @mode:	TXMODE register
+ * @_pad1:	Reserved padding to make sizeof obviously 64bit aligned
+ *
+ * This is the user-visible general purpose register state structure.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS.
+ *
+ * It is also used in the signal context.
+ */
+struct user_gp_regs {
+	unsigned long dx[8][2];
+	unsigned long ax[4][2];
+	unsigned long pc;
+	unsigned long status;
+	unsigned long rpt;
+	unsigned long bpobits;
+	unsigned long mode;
+	unsigned long _pad1;
+};
+
+/**
+ * struct user_cb_regs - User catch buffer registers
+ * @flags:	TXCATCH0 register (fault flags)
+ * @addr:	TXCATCH1 register (fault address)
+ * @data:	TXCATCH2 and TXCATCH3 registers (low and high data word)
+ *
+ * This is the user-visible catch buffer register state structure containing
+ * information about a failed memory access, and allowing the access to be
+ * modified and replayed.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF.
+ */
+struct user_cb_regs {
+	unsigned long flags;
+	unsigned long addr;
+	unsigned long long data;
+};
+
+/**
+ * struct user_rp_state - User read pipeline state
+ * @entries:	Read pipeline entries
+ * @mask:	Mask of valid pipeline entries (RPMask from TXDIVTIME register)
+ *
+ * This is the user-visible read pipeline state structure containing the entries
+ * currently in the read pipeline and the mask of valid entries.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE.
+ */
+struct user_rp_state {
+	unsigned long long entries[6];
+	unsigned long mask;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_METAG_PTRACE_H */
diff --git a/arch/metag/include/uapi/asm/resource.h b/arch/metag/include/uapi/asm/resource.h
new file mode 100644
index 0000000..526d23c
--- /dev/null
+++ b/arch/metag/include/uapi/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _UAPI_METAG_RESOURCE_H
+#define _UAPI_METAG_RESOURCE_H
+
+#define _STK_LIM_MAX    (1 << 28)
+#include <asm-generic/resource.h>
+
+#endif /* _UAPI_METAG_RESOURCE_H */
diff --git a/arch/metag/include/uapi/asm/sigcontext.h b/arch/metag/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000..ef79a91
--- /dev/null
+++ b/arch/metag/include/uapi/asm/sigcontext.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_METAG_SIGCONTEXT_H
+#define _ASM_METAG_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * In a sigcontext structure we need to store the active state of the
+ * user process so that it does not get trashed when we call the signal
+ * handler. That not really the same as a user context that we are
+ * going to store on syscall etc.
+ */
+struct sigcontext {
+	struct user_gp_regs regs;	/* needs to be first */
+
+	/*
+	 * Catch registers describing a memory fault.
+	 * If USER_GP_REGS_STATUS_CATCH_BIT is set in regs.status then catch
+	 * buffers have been saved and will be replayed on sigreturn.
+	 * Clear that bit to discard the catch state instead of replaying it.
+	 */
+	struct user_cb_regs cb;
+
+	/*
+	 * Read pipeline state. This will get restored on sigreturn.
+	 */
+	struct user_rp_state rp;
+
+	unsigned long oldmask;
+};
+
+#endif
diff --git a/arch/metag/include/uapi/asm/siginfo.h b/arch/metag/include/uapi/asm/siginfo.h
new file mode 100644
index 0000000..b2e0c8b
--- /dev/null
+++ b/arch/metag/include/uapi/asm/siginfo.h
@@ -0,0 +1,8 @@
+#ifndef _METAG_SIGINFO_H
+#define _METAG_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/metag/include/uapi/asm/swab.h b/arch/metag/include/uapi/asm/swab.h
new file mode 100644
index 0000000..1076b3a
--- /dev/null
+++ b/arch/metag/include/uapi/asm/swab.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_METAG_SWAB_H
+#define __ASM_METAG_SWAB_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm-generic/swab.h>
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+	return __builtin_metag_bswaps(x);
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	return __builtin_metag_bswap(x);
+}
+#define __arch_swab32 __arch_swab32
+
+static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+	return __builtin_metag_bswapll(x);
+}
+#define __arch_swab64 __arch_swab64
+
+#endif /* __ASM_METAG_SWAB_H */
diff --git a/arch/metag/include/uapi/asm/unistd.h b/arch/metag/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..b80b8e8
--- /dev/null
+++ b/arch/metag/include/uapi/asm/unistd.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* metag-specific syscalls. */
+#define __NR_metag_setglobalbit		(__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_metag_setglobalbit, sys_metag_setglobalbit)
+#define __NR_metag_set_fpu_flags	(__NR_arch_specific_syscall + 2)
+__SYSCALL(__NR_metag_set_fpu_flags, sys_metag_set_fpu_flags)
+#define __NR_metag_set_tls		(__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_metag_set_tls, sys_metag_set_tls)
+#define __NR_metag_get_tls		(__NR_arch_specific_syscall + 4)
+__SYSCALL(__NR_metag_get_tls, sys_metag_get_tls)
diff --git a/arch/metag/kernel/.gitignore b/arch/metag/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/metag/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile
new file mode 100644
index 0000000..d7675f4
--- /dev/null
+++ b/arch/metag/kernel/Makefile
@@ -0,0 +1,39 @@
+#
+# Makefile for the Linux/Meta kernel.
+#
+
+extra-y	+= head.o
+extra-y	+= vmlinux.lds
+
+obj-y	+= cachepart.o
+obj-y	+= clock.o
+obj-y	+= core_reg.o
+obj-y	+= devtree.o
+obj-y	+= dma.o
+obj-y	+= irq.o
+obj-y	+= kick.o
+obj-y	+= machines.o
+obj-y	+= process.o
+obj-y	+= ptrace.o
+obj-y	+= setup.o
+obj-y	+= signal.o
+obj-y	+= stacktrace.o
+obj-y	+= sys_metag.o
+obj-y	+= tbiunexp.o
+obj-y	+= time.o
+obj-y	+= topology.o
+obj-y	+= traps.o
+obj-y	+= user_gateway.o
+
+obj-$(CONFIG_PERF_EVENTS)		+= perf/
+
+obj-$(CONFIG_METAG_COREMEM)		+= coremem.o
+obj-$(CONFIG_METAG_DA)			+= da.o
+obj-$(CONFIG_DYNAMIC_FTRACE)		+= ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace_stub.o
+obj-$(CONFIG_MODULES)			+= metag_ksyms.o
+obj-$(CONFIG_MODULES)			+= module.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_callchain.o
+obj-$(CONFIG_SMP)			+= smp.o
+obj-$(CONFIG_METAG_SUSPEND_MEM)		+= suspend.o
+obj-$(CONFIG_METAG_USER_TCM)		+= tcm.o
diff --git a/arch/metag/kernel/asm-offsets.c b/arch/metag/kernel/asm-offsets.c
new file mode 100644
index 0000000..bfc9205
--- /dev/null
+++ b/arch/metag/kernel/asm-offsets.c
@@ -0,0 +1,14 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+
+int main(void)
+{
+	DEFINE(THREAD_INFO_SIZE, sizeof(struct thread_info));
+	return 0;
+}
diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c
new file mode 100644
index 0000000..3a589dfb
--- /dev/null
+++ b/arch/metag/kernel/cachepart.c
@@ -0,0 +1,124 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/cachepart.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+#define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
+#define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
+
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define ICACHE 0
+#define DCACHE 1
+
+/* The CORE_CONFIG2 register is not available on Meta 1 */
+#ifdef CONFIG_METAG_META21
+unsigned int get_dcache_size(void)
+{
+	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+	return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
+				>> METAC_CORECFG2_DCSZ_S);
+}
+
+unsigned int get_icache_size(void)
+{
+	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+	return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
+				>> METAC_CORE_C2ICSZ_S);
+}
+
+unsigned int get_global_dcache_size(void)
+{
+	unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id()));
+	unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+	return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+unsigned int get_global_icache_size(void)
+{
+	unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id()));
+	unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+	return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
+{
+	unsigned int cache_size;
+	unsigned int t_cache_part;
+	unsigned int isEnabled;
+	unsigned int offset = 0;
+	isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 :
+		metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1);
+	if (!isEnabled)
+		return 0;
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+	/* Checking for global cache */
+	cache_size = (cache == DCACHE ? get_global_dache_size() :
+		get_global_icache_size());
+	offset = 8;
+#else
+	cache_size = (cache == DCACHE ? get_dcache_size() :
+		get_icache_size());
+#endif
+	t_cache_part = (cache == DCACHE ?
+		(metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF :
+		(metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF);
+	switch (t_cache_part) {
+	case 0xF:
+		return cache_size;
+	case 0x7:
+		return cache_size / 2;
+	case 0x3:
+		return cache_size / 4;
+	case 0x1:
+		return cache_size / 8;
+	case 0:
+		return cache_size / 16;
+	}
+	return -1;
+}
+
+void check_for_cache_aliasing(int thread_id)
+{
+	unsigned int thread_cache_size;
+	unsigned int cache_type;
+	for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) {
+		thread_cache_size =
+				get_thread_cache_size(cache_type, thread_id);
+		if (thread_cache_size < 0)
+			pr_emerg("Can't read %s cache size", \
+				 cache_type ? "DCACHE" : "ICACHE");
+		else if (thread_cache_size == 0)
+			/* Cache is off. No need to check for aliasing */
+			continue;
+		if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) {
+			pr_emerg("Cache aliasing detected in %s on Thread %d",
+				 cache_type ? "DCACHE" : "ICACHE", thread_id);
+			pr_warn("Total %s size: %u bytes",
+				cache_type ? "DCACHE" : "ICACHE ",
+				cache_type ? get_dcache_size()
+				: get_icache_size());
+			pr_warn("Thread %s size: %d bytes",
+				cache_type ? "CACHE" : "ICACHE",
+				thread_cache_size);
+			pr_warn("Page Size: %lu bytes", PAGE_SIZE);
+		}
+	}
+}
+
+#else
+
+void check_for_cache_aliasing(int thread_id)
+{
+	return;
+}
+
+#endif
diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c
new file mode 100644
index 0000000..defc840
--- /dev/null
+++ b/arch/metag/kernel/clock.c
@@ -0,0 +1,53 @@
+/*
+ * arch/metag/kernel/clock.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/param.h>
+#include <asm/clock.h>
+
+struct meta_clock_desc _meta_clock;
+
+/* Default machine get_core_freq callback. */
+static unsigned long get_core_freq_default(void)
+{
+#ifdef CONFIG_METAG_META21
+	/*
+	 * Meta 2 cores divide down the core clock for the Meta timers, so we
+	 * can estimate the core clock from the divider.
+	 */
+	return (metag_in32(EXPAND_TIMER_DIV) + 1) * 1000000;
+#else
+	/*
+	 * On Meta 1 we don't know the core clock, but assuming the Meta timer
+	 * is correct it can be estimated based on loops_per_jiffy.
+	 */
+	return (loops_per_jiffy * HZ * 5) >> 1;
+#endif
+}
+
+/**
+ * setup_meta_clocks() - Set up the Meta clock.
+ * @desc:	Clock descriptor usually provided by machine description
+ *
+ * Ensures all callbacks are valid.
+ */
+void __init setup_meta_clocks(struct meta_clock_desc *desc)
+{
+	/* copy callbacks */
+	if (desc)
+		_meta_clock = *desc;
+
+	/* set fallback functions */
+	if (!_meta_clock.get_core_freq)
+		_meta_clock.get_core_freq = get_core_freq_default;
+}
+
diff --git a/arch/metag/kernel/core_reg.c b/arch/metag/kernel/core_reg.c
new file mode 100644
index 0000000..671cce8
--- /dev/null
+++ b/arch/metag/kernel/core_reg.c
@@ -0,0 +1,117 @@
+/*
+ *  Support for reading and writing Meta core internal registers.
+ *
+ *  Copyright (C) 2011 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define UNIT_BIT_MASK		TXUXXRXRQ_UXX_BITS
+#define REG_BIT_MASK		TXUXXRXRQ_RX_BITS
+#define THREAD_BIT_MASK		TXUXXRXRQ_TX_BITS
+
+#define UNIT_SHIFTS		TXUXXRXRQ_UXX_S
+#define REG_SHIFTS		TXUXXRXRQ_RX_S
+#define THREAD_SHIFTS		TXUXXRXRQ_TX_S
+
+#define UNIT_VAL(x)		(((x) << UNIT_SHIFTS) & UNIT_BIT_MASK)
+#define REG_VAL(x)		(((x) << REG_SHIFTS) & REG_BIT_MASK)
+#define THREAD_VAL(x)		(((x) << THREAD_SHIFTS) & THREAD_BIT_MASK)
+
+/*
+ * core_reg_write() - modify the content of a register in a core unit.
+ * @unit:	The unit to be modified.
+ * @reg:	Register number within the unit.
+ * @thread:	The thread we want to access.
+ * @val:	The new value to write.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+void core_reg_write(int unit, int reg, int thread, unsigned int val)
+{
+	unsigned long flags;
+
+	/* TXUCT_ID has its own memory mapped registers */
+	if (unit == TXUCT_ID) {
+		void __iomem *cu_reg = __CU_addr(thread, reg);
+		metag_out32(val, cu_reg);
+		return;
+	}
+
+	__global_lock2(flags);
+
+	/* wait for ready */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* set the value to write */
+	metag_out32(val, TXUXXRXDT);
+
+	/* set the register to write */
+	val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread);
+	metag_out32(val, TXUXXRXRQ);
+
+	/* wait for finish */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	__global_unlock2(flags);
+}
+EXPORT_SYMBOL(core_reg_write);
+
+/*
+ * core_reg_read() - read the content of a register in a core unit.
+ * @unit:	The unit to be modified.
+ * @reg:	Register number within the unit.
+ * @thread:	The thread we want to access.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+unsigned int core_reg_read(int unit, int reg, int thread)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	/* TXUCT_ID has its own memory mapped registers */
+	if (unit == TXUCT_ID) {
+		void __iomem *cu_reg = __CU_addr(thread, reg);
+		val = metag_in32(cu_reg);
+		return val;
+	}
+
+	__global_lock2(flags);
+
+	/* wait for ready */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* set the register to read */
+	val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) |
+							TXUXXRXRQ_RDnWR_BIT);
+	metag_out32(val, TXUXXRXRQ);
+
+	/* wait for finish */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* read the register value */
+	val = metag_in32(TXUXXRXDT);
+
+	__global_unlock2(flags);
+
+	return val;
+}
+EXPORT_SYMBOL(core_reg_read);
diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c
new file mode 100644
index 0000000..52aabb6
--- /dev/null
+++ b/arch/metag/kernel/da.c
@@ -0,0 +1,23 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <asm/da.h>
+#include <asm/metag_mem.h>
+
+bool _metag_da_present;
+
+int __init metag_da_probe(void)
+{
+	_metag_da_present = (metag_in32(T0VECINT_BHALT) == 1);
+	if (_metag_da_present)
+		pr_info("DA present\n");
+	else
+		pr_info("DA not present\n");
+	return 0;
+}
diff --git a/arch/metag/kernel/devtree.c b/arch/metag/kernel/devtree.c
new file mode 100644
index 0000000..7cd0252
--- /dev/null
+++ b/arch/metag/kernel/devtree.c
@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/metag/kernel/devtree.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/mach/arch.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	pr_err("%s(%llx, %llx)\n",
+	       __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return alloc_bootmem_align(size, align);
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt:		virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+	struct boot_param_header *devtree = dt;
+	struct machine_desc *mdesc, *mdesc_best = NULL;
+	unsigned int score, mdesc_score = ~1;
+	unsigned long dt_root;
+	const char *model;
+
+	/* check device tree validity */
+	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
+		return NULL;
+
+	/* Search the mdescs for the 'best' compatible value match */
+	initial_boot_params = devtree;
+	dt_root = of_get_flat_dt_root();
+
+	for_each_machine_desc(mdesc) {
+		score = of_flat_dt_match(dt_root, mdesc->dt_compat);
+		if (score > 0 && score < mdesc_score) {
+			mdesc_best = mdesc;
+			mdesc_score = score;
+		}
+	}
+	if (!mdesc_best) {
+		const char *prop;
+		long size;
+
+		pr_err("\nError: unrecognized/unsupported device tree compatible list:\n[ ");
+
+		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+		if (prop) {
+			while (size > 0) {
+				printk("'%s' ", prop);
+				size -= strlen(prop) + 1;
+				prop += strlen(prop) + 1;
+			}
+		}
+		printk("]\n\n");
+
+		dump_machine_table(); /* does not return */
+	}
+
+	model = of_get_flat_dt_prop(dt_root, "model", NULL);
+	if (!model)
+		model = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+	if (!model)
+		model = "<unknown>";
+	pr_info("Machine: %s, model: %s\n", mdesc_best->name, model);
+
+	/* Retrieve various information from the /chosen node */
+	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+	return mdesc_best;
+}
+
+/**
+ * copy_fdt - Copy device tree into non-init memory.
+ *
+ * We must copy the flattened device tree blob into non-init memory because the
+ * unflattened device tree will reference the strings in it directly.
+ */
+void __init copy_fdt(void)
+{
+	void *alloc = early_init_dt_alloc_memory_arch(
+			be32_to_cpu(initial_boot_params->totalsize), 0x40);
+	if (alloc) {
+		memcpy(alloc, initial_boot_params,
+		       be32_to_cpu(initial_boot_params->totalsize));
+		initial_boot_params = alloc;
+	}
+}
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
new file mode 100644
index 0000000..8c00ded
--- /dev/null
+++ b/arch/metag/kernel/dma.c
@@ -0,0 +1,507 @@
+/*
+ *  Meta version derived from arch/powerpc/lib/dma-noncoherent.c
+ *    Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *						-- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_START) \
+					>> PAGE_SHIFT)
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+	u64 mask = ~0ULL;
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			return 0;
+		}
+	}
+
+	return mask;
+}
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct metag_vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call metag_vm_region_alloc with an appropriate
+ * struct metag_vm_region head (eg):
+ *
+ *  struct metag_vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling
+ * metag_vm_region_alloc().
+ */
+struct metag_vm_region {
+	struct list_head vm_list;
+	unsigned long vm_start;
+	unsigned long vm_end;
+	struct page		*vm_pages;
+	int			vm_active;
+};
+
+static struct metag_vm_region consistent_head = {
+	.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start = CONSISTENT_START,
+	.vm_end = CONSISTENT_END,
+};
+
+static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
+						     *head, size_t size,
+						     gfp_t gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct metag_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct metag_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+	new->vm_active = 1;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+out:
+	return NULL;
+}
+
+static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
+						    *head, unsigned long addr)
+{
+	struct metag_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_active && c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+out:
+	return c;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+	struct metag_vm_region *c;
+	unsigned long order;
+	u64 mask = get_coherent_dma_mask(dev);
+	u64 limit;
+
+	if (!consistent_pte) {
+		pr_err("%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	if (!mask)
+		goto no_page;
+	size = PAGE_ALIGN(size);
+	limit = (mask + 1) & ~mask;
+	if ((limit && size >= limit)
+	    || size >= (CONSISTENT_END - CONSISTENT_START)) {
+		pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
+			size, mask);
+		return NULL;
+	}
+
+	order = get_order(size);
+
+	if (mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		goto no_page;
+
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region for device DMA.
+	 */
+	{
+		void *kaddr = page_address(page);
+		memset(kaddr, 0, size);
+		flush_dcache_region(kaddr, size);
+	}
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = metag_vm_region_alloc(&consistent_head, size,
+				  gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+		struct page *end = page + (1 << order);
+
+		c->vm_pages = page;
+		split_page(page, order);
+
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_bus(page);
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			SetPageReserved(page);
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page,
+					       pgprot_writecombine
+					       (PAGE_KERNEL)));
+			page++;
+			pte++;
+			vaddr += PAGE_SIZE;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
+	}
+
+	if (page)
+		__free_pages(page, order);
+no_page:
+	return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *vaddr, dma_addr_t dma_handle)
+{
+	struct metag_vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
+	if (!c)
+		goto no_area;
+
+	c->vm_active = 0;
+	if ((c->vm_end - c->vm_start) != size) {
+		pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		pr_crit("%s: bad page in kernel page table\n",
+			__func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	pr_err("%s: trying to free invalid coherent area: %p\n",
+	       __func__, vaddr);
+	dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+
+	unsigned long flags, user_size, kern_size;
+	struct metag_vm_region *c;
+
+	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+	c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	if (c) {
+		unsigned long off = vma->vm_pgoff;
+
+		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+		if (off < kern_size &&
+		    user_size <= (kern_size - off)) {
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      page_to_pfn(c->vm_pages) + off,
+					      user_size << PAGE_SHIFT,
+					      vma->vm_page_prot);
+		}
+	}
+
+
+	return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+
+
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+	pte_t *pte;
+	int ret = 0;
+
+	do {
+		int offset = pgd_index(CONSISTENT_START);
+		pgd = pgd_offset(&init_mm, CONSISTENT_START);
+		pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
+		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
+		if (!pmd) {
+			pr_err("%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+		WARN_ON(!pmd_none(*pmd));
+
+		pte = pte_alloc_kernel(pmd, CONSISTENT_START);
+		if (!pte) {
+			pr_err("%s: no pte tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		pgd_k = ((pgd_t *) mmu_get_base()) + offset;
+		pud_k = pud_offset(pgd_k, CONSISTENT_START);
+		pmd_k = pmd_offset(pud_k, CONSISTENT_START);
+		set_pmd(pmd_k, *pmd);
+
+		consistent_pte = pte;
+	} while (0);
+
+	return ret;
+}
+early_initcall(dma_alloc_init);
+
+/*
+ * make an area consistent to devices.
+ */
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
+{
+	/*
+	 * Ensure any writes get through the write combiner. This is necessary
+	 * even with DMA_FROM_DEVICE, or the write may dirty the cache after
+	 * we've invalidated it and get written back during the DMA.
+	 */
+
+	barrier();
+
+	switch (dma_direction) {
+	case DMA_BIDIRECTIONAL:
+		/*
+		 * Writeback to ensure the device can see our latest changes and
+		 * so that we have no dirty lines, and invalidate the cache
+		 * lines too in preparation for receiving the buffer back
+		 * (dma_sync_for_cpu) later.
+		 */
+		flush_dcache_region(vaddr, size);
+		break;
+	case DMA_TO_DEVICE:
+		/*
+		 * Writeback to ensure the device can see our latest changes.
+		 * There's no need to invalidate as the device shouldn't write
+		 * to the buffer.
+		 */
+		writeback_dcache_region(vaddr, size);
+		break;
+	case DMA_FROM_DEVICE:
+		/*
+		 * Invalidate to ensure we have no dirty lines that could get
+		 * written back during the DMA. It's also safe to flush
+		 * (writeback) here if necessary.
+		 */
+		invalidate_dcache_region(vaddr, size);
+		break;
+	case DMA_NONE:
+		BUG();
+	}
+
+	wmb();
+}
+EXPORT_SYMBOL(dma_sync_for_device);
+
+/*
+ * make an area consistent to the core.
+ */
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
+{
+	/*
+	 * Hardware L2 cache prefetch doesn't occur across 4K physical
+	 * boundaries, however according to Documentation/DMA-API-HOWTO.txt
+	 * kmalloc'd memory is DMA'able, so accesses in nearby memory could
+	 * trigger a cache fill in the DMA buffer.
+	 *
+	 * This should never cause dirty lines, so a flush or invalidate should
+	 * be safe to allow us to see data from the device.
+	 */
+	if (_meta_l2c_pf_is_enabled()) {
+		switch (dma_direction) {
+		case DMA_BIDIRECTIONAL:
+		case DMA_FROM_DEVICE:
+			invalidate_dcache_region(vaddr, size);
+			break;
+		case DMA_TO_DEVICE:
+			/* The device shouldn't have written to the buffer */
+			break;
+		case DMA_NONE:
+			BUG();
+		}
+	}
+
+	rmb();
+}
+EXPORT_SYMBOL(dma_sync_for_cpu);
diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
new file mode 100644
index 0000000..a774f32
--- /dev/null
+++ b/arch/metag/kernel/ftrace.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ * Dynamic ftrace support.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+
+#define D04_MOVT_TEMPLATE	0x02200005
+#define D04_CALL_TEMPLATE	0xAC200005
+#define D1RTP_MOVT_TEMPLATE	0x03200005
+#define D1RTP_CALL_TEMPLATE	0xAC200006
+
+static const unsigned long NOP[2] = {0xa0fffffe, 0xa0fffffe};
+static unsigned long movt_and_call_insn[2];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return (char *)&NOP[0];
+}
+
+static unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+	unsigned long hi16, low16;
+
+	hi16 = (addr & 0xffff0000) >> 13;
+	low16 = (addr & 0x0000ffff) << 3;
+
+	/*
+	 * The compiler makes the call to mcount_wrapper()
+	 * (Meta's wrapper around mcount()) through the register
+	 * D0.4. So whenever we're patching one of those compiler-generated
+	 * calls we also need to go through D0.4. Otherwise use D1RtP.
+	 */
+	if (pc == (unsigned long)&ftrace_call) {
+		writel(D1RTP_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+		writel(D1RTP_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+	} else {
+		writel(D04_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+		writel(D04_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+	}
+
+	return (unsigned char *)&movt_and_call_insn[0];
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
+			      unsigned char *new_code)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine.
+	 */
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)pc, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+	/* replace the text with the new text */
+	if (probe_kernel_write((void *)pc, new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	int ret;
+	unsigned long pc;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+	pc = (unsigned long)&ftrace_call;
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+	new = ftrace_call_replace(pc, (unsigned long)func);
+	ret = ftrace_modify_code(pc, old, new);
+
+	return ret;
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_call_replace(ip, addr);
+	new = ftrace_nop_replace();
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	/* The return code is returned via data */
+	writel(0, data);
+
+	return 0;
+}
diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S
new file mode 100644
index 0000000..e70bff7
--- /dev/null
+++ b/arch/metag/kernel/ftrace_stub.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ */
+
+#include <asm/ftrace.h>
+
+	.text
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.global	_mcount_wrapper
+	.type	_mcount_wrapper,function
+_mcount_wrapper:
+	MOV	PC,D0.4
+
+	.global _ftrace_caller
+	.type	_ftrace_caller,function
+_ftrace_caller:
+	MOVT    D0Re0,#HI(_function_trace_stop)
+	ADD	D0Re0,D0Re0,#LO(_function_trace_stop)
+	GETD	D0Re0,[D0Re0]
+	CMP	D0Re0,#0
+	BEQ	$Lcall_stub
+	MOV	PC,D0.4
+$Lcall_stub:
+	MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+	MOV     D1Ar1, D0.4
+	MOV     D0Ar2, D1RtP
+	SUB	D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+
+	.global _ftrace_call
+_ftrace_call:
+	MOVT	D1RtP,#HI(_ftrace_stub)
+	CALL	D1RtP,#LO(_ftrace_stub)
+	GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+	GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+	GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+	GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+	MOV     PC, D0.4
+#else
+
+	.global	_mcount_wrapper
+	.type	_mcount_wrapper,function
+_mcount_wrapper:
+	MOVT    D0Re0,#HI(_function_trace_stop)
+	ADD	D0Re0,D0Re0,#LO(_function_trace_stop)
+	GETD	D0Re0,[D0Re0]
+	CMP	D0Re0,#0
+	BEQ	$Lcall_mcount
+	MOV	PC,D0.4
+$Lcall_mcount:
+	MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+	MOV     D1Ar1, D0.4
+	MOV     D0Ar2, D1RtP
+	MOVT    D0Re0,#HI(_ftrace_trace_function)
+	ADD	D0Re0,D0Re0,#LO(_ftrace_trace_function)
+	GET	D1Ar3,[D0Re0]
+	MOVT	D1Re0,#HI(_ftrace_stub)
+	ADD	D1Re0,D1Re0,#LO(_ftrace_stub)
+	CMP	D1Ar3,D1Re0
+	BEQ	$Ltrace_exit
+	MOV	D1RtP,D1Ar3
+	SUB	D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+	SWAP	PC,D1RtP
+$Ltrace_exit:
+	GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+	GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+	GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+	GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+	MOV     PC, D0.4
+
+#endif	/* CONFIG_DYNAMIC_FTRACE */
+
+	.global _ftrace_stub
+_ftrace_stub:
+	MOV 	PC,D1RtP
diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S
new file mode 100644
index 0000000..969dffa
--- /dev/null
+++ b/arch/metag/kernel/head.S
@@ -0,0 +1,57 @@
+	! Copyright 2005,2006,2007,2009 Imagination Technologies
+
+#include <linux/init.h>
+#include <generated/asm-offsets.h>
+#undef __exit
+
+	__HEAD
+	! Setup the stack and get going into _metag_start_kernel
+	.global	__start
+	.type   __start,function
+__start:
+	! D1Ar1 contains pTBI (ISTAT)
+	! D0Ar2 contains pTBI
+	! D1Ar3 contains __pTBISegs
+	! D0Ar4 contains kernel arglist pointer
+
+	MOVT    D0Re0,#HI(___pTBIs)
+	ADD     D0Re0,D0Re0,#LO(___pTBIs)
+	SETL    [D0Re0],D0Ar2,D1Ar1
+	MOVT    D0Re0,#HI(___pTBISegs)
+	ADD     D0Re0,D0Re0,#LO(___pTBISegs)
+	SETD    [D0Re0],D1Ar3
+	MOV	A0FrP,#0
+	MOV	D0Re0,#0
+	MOV	D1Re0,#0
+	MOV	D1Ar3,#0
+	MOV	D1Ar1,D0Ar4			!Store kernel boot params
+	MOV	D1Ar5,#0
+	MOV	D0Ar6,#0
+#ifdef CONFIG_METAG_DSP
+	MOV	D0.8,#0
+#endif
+	MOVT    A0StP,#HI(_init_thread_union)
+	ADD	A0StP,A0StP,#LO(_init_thread_union)
+	ADD     A0StP,A0StP,#THREAD_INFO_SIZE
+	MOVT	D1RtP,#HI(_metag_start_kernel)
+	CALL	D1RtP,#LO(_metag_start_kernel)
+	.size	__start,.-__start
+
+	!! Needed by TBX
+	.global	__exit
+	.type   __exit,function
+__exit:
+	XOR     TXENABLE,D0Re0,D0Re0
+	.size	__exit,.-__exit
+
+#ifdef CONFIG_SMP
+	.global _secondary_startup
+	.type _secondary_startup,function
+_secondary_startup:
+	MOVT	A0StP,#HI(_secondary_data_stack)
+	ADD	A0StP,A0StP,#LO(_secondary_data_stack)
+	GETD	A0StP,[A0StP]
+	ADD	A0StP,A0StP,#THREAD_INFO_SIZE
+	B	_secondary_start_kernel
+	.size	_secondary_startup,.-_secondary_startup
+#endif
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
new file mode 100644
index 0000000..87707ef
--- /dev/null
+++ b/arch/metag/kernel/irq.c
@@ -0,0 +1,323 @@
+/*
+ * Linux/Meta general interrupt handling code
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqchip/metag.h>
+#include <linux/irqdomain.h>
+#include <linux/ratelimit.h>
+
+#include <asm/core_reg.h>
+#include <asm/mach/arch.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_4KSTACKS
+union irq_ctx {
+	struct thread_info      tinfo;
+	u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+struct irq_domain *root_domain;
+
+static unsigned int startup_meta_irq(struct irq_data *data)
+{
+	tbi_startup_interrupt(data->hwirq);
+	return 0;
+}
+
+static void shutdown_meta_irq(struct irq_data *data)
+{
+	tbi_shutdown_interrupt(data->hwirq);
+}
+
+void do_IRQ(int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+#ifdef CONFIG_4KSTACKS
+	struct irq_desc *desc;
+	union irq_ctx *curctx, *irqctx;
+	u32 *isp;
+#endif
+
+	irq_enter();
+
+	irq = irq_linear_revmap(root_domain, irq);
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: is there less than 1KB free? */
+	{
+		unsigned long sp;
+
+		sp = __core_reg_get(A0StP);
+		sp &= THREAD_SIZE - 1;
+
+		if (unlikely(sp > (THREAD_SIZE - 1024)))
+			pr_err("Stack overflow in do_IRQ: %ld\n", sp);
+	}
+#endif
+
+
+#ifdef CONFIG_4KSTACKS
+	curctx = (union irq_ctx *) current_thread_info();
+	irqctx = hardirq_ctx[smp_processor_id()];
+
+	/*
+	 * this is where we switch to the IRQ stack. However, if we are
+	 * already using the IRQ stack (because we interrupted a hardirq
+	 * handler) we can't do that and just have to keep using the
+	 * current stack (which is the irq stack already after all)
+	 */
+	if (curctx != irqctx) {
+		/* build the stack frame on the IRQ stack */
+		isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+		irqctx->tinfo.task = curctx->tinfo.task;
+
+		/*
+		 * Copy the softirq bits in preempt_count so that the
+		 * softirq checks work in the hardirq context.
+		 */
+		irqctx->tinfo.preempt_count =
+			(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+			(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+		desc = irq_to_desc(irq);
+
+		asm volatile (
+			"MOV   D0.5,%0\n"
+			"MOV   D1Ar1,%1\n"
+			"MOV   D1RtP,%2\n"
+			"MOV   D0Ar2,%3\n"
+			"SWAP  A0StP,D0.5\n"
+			"SWAP  PC,D1RtP\n"
+			"MOV   A0StP,D0.5\n"
+			:
+			: "r" (isp), "r" (irq), "r" (desc->handle_irq),
+			  "r" (desc)
+			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+			  "D0.5"
+			);
+	} else
+#endif
+		generic_handle_irq(irq);
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_4KSTACKS
+
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+	union irq_ctx *irqctx;
+
+	if (hardirq_ctx[cpu])
+		return;
+
+	irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	hardirq_ctx[cpu] = irqctx;
+
+	irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = 0;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	softirq_ctx[cpu] = irqctx;
+
+	pr_info("CPU %u irqstacks, hard=%p soft=%p\n",
+		cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+	hardirq_ctx[smp_processor_id()] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+	unsigned long flags;
+	struct thread_info *curctx;
+	union irq_ctx *irqctx;
+	u32 *isp;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	if (local_softirq_pending()) {
+		curctx = current_thread_info();
+		irqctx = softirq_ctx[smp_processor_id()];
+		irqctx->tinfo.task = curctx->task;
+
+		/* build the stack frame on the softirq stack */
+		isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+
+		asm volatile (
+			"MOV   D0.5,%0\n"
+			"SWAP  A0StP,D0.5\n"
+			"CALLR D1RtP,___do_softirq\n"
+			"MOV   A0StP,D0.5\n"
+			:
+			: "r" (isp)
+			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+			  "D0.5"
+			);
+		/*
+		 * Shouldn't happen, we returned above if in_interrupt():
+		 */
+		WARN_ON_ONCE(softirq_count());
+	}
+
+	local_irq_restore(flags);
+}
+#endif
+
+static struct irq_chip meta_irq_type = {
+	.name = "META-IRQ",
+	.irq_startup = startup_meta_irq,
+	.irq_shutdown = shutdown_meta_irq,
+};
+
+/**
+ * tbisig_map() - Map a TBI signal number to a virtual IRQ number.
+ * @hw:		Number of the TBI signal. Must be in range.
+ *
+ * Returns:	The virtual IRQ number of the TBI signal number IRQ specified by
+ *		@hw.
+ */
+int tbisig_map(unsigned int hw)
+{
+	return irq_create_mapping(root_domain, hw);
+}
+
+/**
+ * metag_tbisig_map() - map a tbi signal to a Linux virtual IRQ number
+ * @d:		root irq domain
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number (TBI signal number)
+ *
+ * This sets up a virtual irq for a specified TBI signal number.
+ */
+static int metag_tbisig_map(struct irq_domain *d, unsigned int irq,
+			    irq_hw_number_t hw)
+{
+#ifdef CONFIG_SMP
+	irq_set_chip_and_handler(irq, &meta_irq_type, handle_percpu_irq);
+#else
+	irq_set_chip_and_handler(irq, &meta_irq_type, handle_simple_irq);
+#endif
+	return 0;
+}
+
+static const struct irq_domain_ops metag_tbisig_domain_ops = {
+	.map = metag_tbisig_map,
+};
+
+/*
+ * void init_IRQ(void)
+ *
+ * Parameters:	None
+ *
+ * Returns:	Nothing
+ *
+ * This function should be called during kernel startup to initialize
+ * the IRQ handling routines.
+ */
+void __init init_IRQ(void)
+{
+	root_domain = irq_domain_add_linear(NULL, 32,
+					    &metag_tbisig_domain_ops, NULL);
+	if (unlikely(!root_domain))
+		panic("init_IRQ: cannot add root IRQ domain");
+
+	irq_ctx_init(smp_processor_id());
+
+	init_internal_IRQ();
+	init_external_IRQ();
+
+	if (machine_desc->init_irq)
+		machine_desc->init_irq();
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	if (machine_desc->nr_irqs)
+		nr_irqs = machine_desc->nr_irqs;
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+	raw_spin_lock_irq(&desc->lock);
+	if (chip->irq_set_affinity)
+		chip->irq_set_affinity(data, cpumask_of(cpu), false);
+	raw_spin_unlock_irq(&desc->lock);
+}
+
+/*
+ * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
+ * the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i, cpu = smp_processor_id();
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *data = irq_desc_get_irq_data(desc);
+		unsigned int newcpu;
+
+		if (irqd_is_per_cpu(data))
+			continue;
+
+		if (!cpumask_test_cpu(cpu, data->affinity))
+			continue;
+
+		newcpu = cpumask_any_and(data->affinity, cpu_online_mask);
+
+		if (newcpu >= nr_cpu_ids) {
+			pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    i, cpu);
+
+			cpumask_setall(data->affinity);
+			newcpu = cpumask_any_and(data->affinity,
+						 cpu_online_mask);
+		}
+
+		route_irq(data, i, newcpu);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c
new file mode 100644
index 0000000..50fcbec
--- /dev/null
+++ b/arch/metag/kernel/kick.c
@@ -0,0 +1,101 @@
+/*
+ *  Copyright (C) 2009 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * The Meta KICK interrupt mechanism is generally a useful feature, so
+ * we provide an interface for registering multiple interrupt
+ * handlers. All the registered interrupt handlers are "chained". When
+ * a KICK interrupt is received the first function in the list is
+ * called. If that interrupt handler cannot handle the KICK the next
+ * one is called, then the next until someone handles it (or we run
+ * out of functions). As soon as one function handles the interrupt no
+ * other handlers are called.
+ *
+ * The only downside of chaining interrupt handlers is that each
+ * handler must be able to detect whether the KICK was intended for it
+ * or not.  For example, when the IPI handler runs and it sees that
+ * there are no IPI messages it must not signal that the KICK was
+ * handled, thereby giving the other handlers a chance to run.
+ *
+ * The reason that we provide our own interface for calling KICK
+ * handlers instead of using the generic kernel infrastructure is that
+ * the KICK handlers require access to a CPU's pTBI structure. So we
+ * pass it as an argument.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#include <asm/traps.h>
+
+/*
+ * All accesses/manipulations of kick_handlers_list should be
+ * performed while holding kick_handlers_lock.
+ */
+static DEFINE_SPINLOCK(kick_handlers_lock);
+static LIST_HEAD(kick_handlers_list);
+
+void kick_register_func(struct kick_irq_handler *kh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kick_handlers_lock, flags);
+
+	list_add_tail(&kh->list, &kick_handlers_list);
+
+	spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_register_func);
+
+void kick_unregister_func(struct kick_irq_handler *kh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kick_handlers_lock, flags);
+
+	list_del(&kh->list);
+
+	spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_unregister_func);
+
+TBIRES
+kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+	struct kick_irq_handler *kh;
+	struct list_head *lh;
+	int handled = 0;
+	TBIRES ret;
+
+	head_end(State, ~INTS_OFF_MASK);
+
+	/* If we interrupted user code handle any critical sections. */
+	if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+		restart_critical_section(State);
+
+	trace_hardirqs_off();
+
+	/*
+	 * There is no need to disable interrupts here because we
+	 * can't nest KICK interrupts in a KICK interrupt handler.
+	 */
+	spin_lock(&kick_handlers_lock);
+
+	list_for_each(lh, &kick_handlers_list) {
+		kh = list_entry(lh, struct kick_irq_handler, list);
+
+		ret = kh->func(State, SigNum, Triggers, Inst, pTBI, &handled);
+		if (handled)
+			break;
+	}
+
+	spin_unlock(&kick_handlers_lock);
+
+	WARN_ON(!handled);
+
+	return tail_end(ret);
+}
diff --git a/arch/metag/kernel/machines.c b/arch/metag/kernel/machines.c
new file mode 100644
index 0000000..1edf6ba
--- /dev/null
+++ b/arch/metag/kernel/machines.c
@@ -0,0 +1,20 @@
+/*
+ *  arch/metag/kernel/machines.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Generic Meta Boards.
+ */
+
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/mach/arch.h>
+
+static const char *meta_boards_compat[] __initdata = {
+	"img,meta",
+	NULL,
+};
+
+MACHINE_START(META, "Generic Meta")
+	.dt_compat	= meta_boards_compat,
+MACHINE_END
diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c
new file mode 100644
index 0000000..ec872ef
--- /dev/null
+++ b/arch/metag/kernel/metag_ksyms.c
@@ -0,0 +1,49 @@
+#include <linux/export.h>
+
+#include <asm/div64.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/tbx.h>
+
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FLATMEM
+/* needed for the pfn_valid macro */
+EXPORT_SYMBOL(max_pfn);
+EXPORT_SYMBOL(min_low_pfn);
+#endif
+
+/* TBI symbols */
+EXPORT_SYMBOL(__TBI);
+EXPORT_SYMBOL(__TBIFindSeg);
+EXPORT_SYMBOL(__TBIPoll);
+EXPORT_SYMBOL(__TBITimeStamp);
+
+#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
+
+/* libgcc functions */
+DECLARE_EXPORT(__ashldi3);
+DECLARE_EXPORT(__ashrdi3);
+DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__udivsi3);
+DECLARE_EXPORT(__divsi3);
+DECLARE_EXPORT(__umodsi3);
+DECLARE_EXPORT(__modsi3);
+DECLARE_EXPORT(__muldi3);
+DECLARE_EXPORT(__cmpdi2);
+DECLARE_EXPORT(__ucmpdi2);
+
+/* Maths functions */
+EXPORT_SYMBOL(div_u64);
+EXPORT_SYMBOL(div_s64);
+
+/* String functions */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(mcount_wrapper);
+#endif
diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c
new file mode 100644
index 0000000..986331c
--- /dev/null
+++ b/arch/metag/kernel/module.c
@@ -0,0 +1,284 @@
+/*  Kernel module help for Meta.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sort.h>
+
+#include <asm/unaligned.h>
+
+/* Count how many different relocations (different symbol, different
+   addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+	unsigned int i, r_info, r_addend, _count_relocs;
+
+	_count_relocs = 0;
+	r_info = 0;
+	r_addend = 0;
+	for (i = 0; i < num; i++)
+		/* Only count relbranch relocs, others don't need stubs */
+		if (ELF32_R_TYPE(rela[i].r_info) == R_METAG_RELBRANCH &&
+		    (r_info != ELF32_R_SYM(rela[i].r_info) ||
+		     r_addend != rela[i].r_addend)) {
+			_count_relocs++;
+			r_info = ELF32_R_SYM(rela[i].r_info);
+			r_addend = rela[i].r_addend;
+		}
+
+	return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+	const Elf32_Rela *x, *y;
+
+	y = (Elf32_Rela *)_x;
+	x = (Elf32_Rela *)_y;
+
+	/* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+	 * make the comparison cheaper/faster. It won't affect the sorting or
+	 * the counting algorithms' performance
+	 */
+	if (x->r_info < y->r_info)
+		return -1;
+	else if (x->r_info > y->r_info)
+		return 1;
+	else if (x->r_addend < y->r_addend)
+		return -1;
+	else if (x->r_addend > y->r_addend)
+		return 1;
+	else
+		return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+	uint32_t *x, *y, tmp;
+	int i;
+
+	y = (uint32_t *)_x;
+	x = (uint32_t *)_y;
+
+	for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+		tmp = x[i];
+		x[i] = y[i];
+		y[i] = tmp;
+	}
+}
+
+/* Get the potential trampolines size required of the init and
+   non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+				  const Elf32_Shdr *sechdrs,
+				  const char *secstrings,
+				  int is_init)
+{
+	unsigned long ret = 0;
+	unsigned i;
+
+	/* Everything marked ALLOC (this includes the exported
+	   symbols) */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* If it's called *.init*, and we're not init, we're
+		   not interested */
+		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
+		    != is_init)
+			continue;
+
+		/* We don't want to look at debug sections. */
+		if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL)
+			continue;
+
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %u\n",
+				 (void *)hdr + sechdrs[i].sh_offset,
+				 sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+			/* Sort the relocation information based on a symbol and
+			 * addend key. This is a stable O(n*log n) complexity
+			 * alogrithm but it will reduce the complexity of
+			 * count_relocs() to linear complexity O(n)
+			 */
+			sort((void *)hdr + sechdrs[i].sh_offset,
+			     sechdrs[i].sh_size / sizeof(Elf32_Rela),
+			     sizeof(Elf32_Rela), relacmp, relaswap);
+
+			ret += count_relocs((void *)hdr
+					     + sechdrs[i].sh_offset,
+					     sechdrs[i].sh_size
+					     / sizeof(Elf32_Rela))
+				* sizeof(struct metag_plt_entry);
+		}
+	}
+
+	return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+			      Elf32_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .plt and .init.plt sections */
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+			me->arch.init_plt_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+			me->arch.core_plt_section = i;
+	}
+	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+		pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+		return -ENOEXEC;
+	}
+
+	/* Override their sizes */
+	sechdrs[me->arch.core_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 0);
+	sechdrs[me->arch.core_plt_section].sh_type = SHT_NOBITS;
+	sechdrs[me->arch.init_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 1);
+	sechdrs[me->arch.init_plt_section].sh_type = SHT_NOBITS;
+	return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location, Elf32_Addr val,
+			    Elf32_Shdr *sechdrs, struct module *mod)
+{
+	struct metag_plt_entry *entry;
+	/* Instructions used to do the indirect jump.  */
+	uint32_t tramp[2];
+
+	/* We have to trash a register, so we assume that any control
+	   transfer more than 21-bits away must be a function call
+	   (so we can use a call-clobbered register).  */
+
+	/* MOVT D0Re0,#HI(v) */
+	tramp[0] = 0x02000005 | (((val & 0xffff0000) >> 16) << 3);
+	/* JUMP D0Re0,#LO(v) */
+	tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3);
+
+	/* Init, or core PLT? */
+	if (location >= mod->module_core
+	    && location < mod->module_core + mod->core_size)
+		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+	else
+		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+	/* Find this entry, or if that fails, the next avail. entry */
+	while (entry->tramp[0])
+		if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
+			return (uint32_t)entry;
+		else
+			entry++;
+
+	entry->tramp[0] = tramp[0];
+	entry->tramp[1] = tramp[1];
+
+	return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	Elf32_Addr relocation;
+	uint32_t *location;
+	int32_t value;
+
+	pr_debug("Applying relocate section %u to %u\n", relsec,
+		 sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+		relocation = sym->st_value + rel[i].r_addend;
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_METAG_NONE:
+			break;
+		case R_METAG_HIADDR16:
+			relocation >>= 16;
+		case R_METAG_LOADDR16:
+			*location = (*location & 0xfff80007) |
+				((relocation & 0xffff) << 3);
+			break;
+		case R_METAG_ADDR32:
+			/*
+			 * Packed data structures may cause a misaligned
+			 * R_METAG_ADDR32 to be emitted.
+			 */
+			put_unaligned(relocation, location);
+			break;
+		case R_METAG_GETSETOFF:
+			*location += ((relocation & 0xfff) << 7);
+			break;
+		case R_METAG_RELBRANCH:
+			if (*location & (0x7ffff << 5)) {
+				pr_err("bad relbranch relocation\n");
+				break;
+			}
+
+			/* This jump is too big for the offset slot. Build
+			 * a PLT to jump through to get to where we want to go.
+			 * NB: 21bit check - not scaled to 19bit yet
+			 */
+			if (((int32_t)(relocation -
+				       (uint32_t)location) > 0xfffff) ||
+			    ((int32_t)(relocation -
+				       (uint32_t)location) < -0xfffff)) {
+				relocation = do_plt_call(location, relocation,
+							 sechdrs, me);
+			}
+
+			value = relocation - (uint32_t)location;
+
+			/* branch instruction aligned */
+			value /= 4;
+
+			if ((value > 0x7ffff) || (value < -0x7ffff)) {
+				/*
+				 * this should have been caught by the code
+				 * above!
+				 */
+				pr_err("overflow of relbranch reloc\n");
+			}
+
+			*location = (*location & (~(0x7ffff << 5))) |
+				((value & 0x7ffff) << 5);
+			break;
+
+		default:
+			pr_err("module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644
index 0000000..b158cb2
--- /dev/null
+++ b/arch/metag/kernel/perf/Makefile
@@ -0,0 +1,3 @@
+# Makefile for performance event core
+
+obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644
index 0000000..a876d5f
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -0,0 +1,861 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This code is based on the sh pmu code:
+ *  Copyright (C) 2009 Paul Mundt
+ *
+ * and on the arm pmu code:
+ *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
+ *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include <asm/core_reg.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "perf_event.h"
+
+static int _hw_perf_event_init(struct perf_event *);
+static void _hw_perf_event_destroy(struct perf_event *);
+
+/* Determines which core type we are */
+static struct metag_pmu *metag_pmu __read_mostly;
+
+/* Processor specific data */
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/* PMU admin */
+const char *perf_pmu_name(void)
+{
+	if (metag_pmu)
+		return metag_pmu->pmu.name;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	if (metag_pmu)
+		return metag_pmu->max_events;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static inline int metag_pmu_initialised(void)
+{
+	return !!metag_pmu;
+}
+
+static void release_pmu_hardware(void)
+{
+	int irq;
+	unsigned int version = (metag_pmu->version &
+			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+			METAC_ID_REV_S;
+
+	/* Early cores don't have overflow interrupts */
+	if (version < 0x0104)
+		return;
+
+	irq = internal_irq_map(17);
+	if (irq >= 0)
+		free_irq(irq, (void *)1);
+
+	irq = internal_irq_map(16);
+	if (irq >= 0)
+		free_irq(irq, (void *)0);
+}
+
+static int reserve_pmu_hardware(void)
+{
+	int err = 0, irq[2];
+	unsigned int version = (metag_pmu->version &
+			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+			METAC_ID_REV_S;
+
+	/* Early cores don't have overflow interrupts */
+	if (version < 0x0104)
+		goto out;
+
+	/*
+	 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
+	 * similarly, 17 is the interrupt for performance counter 1.
+	 * We can't (yet) interrupt on the cycle counter, because it's a
+	 * register, however it holds a 32-bit value as opposed to 24-bit.
+	 */
+	irq[0] = internal_irq_map(16);
+	if (irq[0] < 0) {
+		pr_err("unable to map internal IRQ %d\n", 16);
+		goto out;
+	}
+	err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
+			"metagpmu0", (void *)0);
+	if (err) {
+		pr_err("unable to request IRQ%d for metag PMU counters\n",
+				irq[0]);
+		goto out;
+	}
+
+	irq[1] = internal_irq_map(17);
+	if (irq[1] < 0) {
+		pr_err("unable to map internal IRQ %d\n", 17);
+		goto out_irq1;
+	}
+	err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
+			"metagpmu1", (void *)1);
+	if (err) {
+		pr_err("unable to request IRQ%d for metag PMU counters\n",
+				irq[1]);
+		goto out_irq1;
+	}
+
+	return 0;
+
+out_irq1:
+	free_irq(irq[0], (void *)0);
+out:
+	return err;
+}
+
+/* PMU operations */
+static void metag_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void metag_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int metag_pmu_event_init(struct perf_event *event)
+{
+	int err = 0;
+	atomic_t *active_events = &metag_pmu->active_events;
+
+	if (!metag_pmu_initialised()) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	event->destroy = _hw_perf_event_destroy;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&metag_pmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = reserve_pmu_hardware();
+
+		if (!err)
+			atomic_inc(active_events);
+
+		mutex_unlock(&metag_pmu->reserve_mutex);
+	}
+
+	/* Hardware and caches counters */
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		err = _hw_perf_event_init(event);
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (err)
+		event->destroy(event);
+
+out:
+	return err;
+}
+
+void metag_pmu_event_update(struct perf_event *event,
+		struct hw_perf_event *hwc, int idx)
+{
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	/*
+	 * If this counter is chained, it may be that the previous counter
+	 * value has been changed beneath us.
+	 *
+	 * To get around this, we read and exchange the new raw count, then
+	 * add the delta (new - prev) to the generic counter atomically.
+	 *
+	 * Without interrupts, this is the simplest approach.
+	 */
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = metag_pmu->read(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Calculate the delta and add it to the counter.
+	 */
+	delta = new_raw_count - prev_raw_count;
+
+	local64_add(delta, &event->count);
+}
+
+int metag_pmu_event_set_period(struct perf_event *event,
+		struct hw_perf_event *hwc, int idx)
+{
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (left > (s64)metag_pmu->max_period)
+		left = metag_pmu->max_period;
+
+	if (metag_pmu->write)
+		metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static void metag_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	/*
+	 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	/*
+	 * Reset the period.
+	 * Some counters can't be stopped (i.e. are core global), so when the
+	 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
+	 * the period, then we'll either: a) get an overflow too soon;
+	 * or b) too late if the overflow happened since disabling.
+	 * Obviously, this has little bearing on cores without the overflow
+	 * interrupt, as the performance counter resets to zero on write
+	 * anyway.
+	 */
+	if (metag_pmu->max_period)
+		metag_pmu_event_set_period(event, hwc, hwc->idx);
+	cpuc->events[idx] = event;
+	metag_pmu->enable(hwc, idx);
+}
+
+static void metag_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * We should always update the counter on stop; see comment above
+	 * why.
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		metag_pmu_event_update(event, hwc, hwc->idx);
+		metag_pmu->disable(hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static int metag_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = 0, ret = 0;
+
+	perf_pmu_disable(event->pmu);
+
+	/* check whether we're counting instructions */
+	if (hwc->config == 0x100) {
+		if (__test_and_set_bit(METAG_INST_COUNTER,
+				cpuc->used_mask)) {
+			ret = -EAGAIN;
+			goto out;
+		}
+		idx = METAG_INST_COUNTER;
+	} else {
+		/* Check whether we have a spare counter */
+		idx = find_first_zero_bit(cpuc->used_mask,
+				atomic_read(&metag_pmu->active_events));
+		if (idx >= METAG_INST_COUNTER) {
+			ret = -EAGAIN;
+			goto out;
+		}
+
+		__set_bit(idx, cpuc->used_mask);
+	}
+	hwc->idx = idx;
+
+	/* Make sure the counter is disabled */
+	metag_pmu->disable(hwc, idx);
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		metag_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+out:
+	perf_pmu_enable(event->pmu);
+	return ret;
+}
+
+static void metag_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0);
+	metag_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[idx] = NULL;
+	__clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void metag_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
+
+	metag_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static struct pmu pmu = {
+	.pmu_enable	= metag_pmu_enable,
+	.pmu_disable	= metag_pmu_disable,
+
+	.event_init	= metag_pmu_event_init,
+
+	.add		= metag_pmu_add,
+	.del		= metag_pmu_del,
+	.start		= metag_pmu_start,
+	.stop		= metag_pmu_stop,
+	.read		= metag_pmu_read,
+};
+
+/* Core counter specific functions */
+static const int metag_general_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = 0x03,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+	[PERF_COUNT_HW_CACHE_MISSES] = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES] = -1,
+	[PERF_COUNT_HW_BUS_CYCLES] = -1,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
+};
+
+static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0x08,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0x09,
+			[C(RESULT_MISS)] = 0x0a,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0xd0,
+			[C(RESULT_MISS)] = 0xd2,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = 0xd4,
+			[C(RESULT_MISS)] = 0xd5,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0xd1,
+			[C(RESULT_MISS)] = 0xd3,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+
+static void _hw_perf_event_destroy(struct perf_event *event)
+{
+	atomic_t *active_events = &metag_pmu->active_events;
+	struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
+		release_pmu_hardware();
+		mutex_unlock(pmu_mutex);
+	}
+}
+
+static int _hw_perf_cache_event(int config, int *evp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!metag_pmu->cache_events)
+		return -EINVAL;
+
+	/* Unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+			op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+			result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*metag_pmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*evp = ev;
+	return 0;
+}
+
+static int _hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping = 0, err;
+
+	switch (attr->type) {
+	case PERF_TYPE_HARDWARE:
+		if (attr->config >= PERF_COUNT_HW_MAX)
+			return -EINVAL;
+
+		mapping = metag_pmu->event_map(attr->config);
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		err = _hw_perf_cache_event(attr->config, &mapping);
+		if (err)
+			return err;
+		break;
+	}
+
+	/* Return early if the event is unsupported */
+	if (mapping == -1)
+		return -EINVAL;
+
+	/*
+	 * Early cores have "limited" counters - they have no overflow
+	 * interrupts - and so are unable to do sampling without extra work
+	 * and timer assistance.
+	 */
+	if (metag_pmu->max_period == 0) {
+		if (hwc->sample_period)
+			return -EINVAL;
+	}
+
+	/*
+	 * Don't assign an index until the event is placed into the hardware.
+	 * -1 signifies that we're still deciding where to put it. On SMP
+	 * systems each core has its own set of counters, so we can't do any
+	 * constraint checking yet.
+	 */
+	hwc->idx = -1;
+
+	/* Store the event encoding */
+	hwc->config |= (unsigned long)mapping;
+
+	/*
+	 * For non-sampling runs, limit the sample_period to half of the
+	 * counter width. This way, the new counter value should be less
+	 * likely to overtake the previous one (unless there are IRQ latency
+	 * issues...)
+	 */
+	if (metag_pmu->max_period) {
+		if (!hwc->sample_period) {
+			hwc->sample_period = metag_pmu->max_period >> 1;
+			hwc->last_period = hwc->sample_period;
+			local64_set(&hwc->period_left, hwc->sample_period);
+		}
+	}
+
+	return 0;
+}
+
+static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	unsigned int config = event->config;
+	unsigned int tmp = config & 0xf0;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Check if we're enabling the instruction counter (index of
+	 * MAX_HWEVENTS - 1)
+	 */
+	if (METAG_INST_COUNTER == idx) {
+		WARN_ONCE((config != 0x100),
+			"invalid configuration (%d) for counter (%d)\n",
+			config, idx);
+
+		/* Reset the cycle count */
+		__core_reg_set(TXTACTCYC, 0);
+		goto unlock;
+	}
+
+	/* Check for a core internal or performance channel event. */
+	if (tmp) {
+		void *perf_addr = (void *)PERF_COUNT(idx);
+
+		/*
+		 * Anything other than a cycle count will write the low-
+		 * nibble to the correct counter register.
+		 */
+		switch (tmp) {
+		case 0xd0:
+			perf_addr = (void *)PERF_ICORE(idx);
+			break;
+
+		case 0xf0:
+			perf_addr = (void *)PERF_CHAN(idx);
+			break;
+		}
+
+		metag_out32((tmp & 0x0f), perf_addr);
+
+		/*
+		 * Now we use the high nibble as the performance event to
+		 * to count.
+		 */
+		config = tmp >> 4;
+	}
+
+	/*
+	 * Enabled counters start from 0. Early cores clear the count on
+	 * write but newer cores don't, so we make sure that the count is
+	 * set to 0.
+	 */
+	tmp = ((config & 0xf) << 28) |
+			((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+	metag_out32(tmp, PERF_COUNT(idx));
+unlock:
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	unsigned int tmp = 0;
+	unsigned long flags;
+
+	/*
+	 * The cycle counter can't be disabled per se, as it's a hardware
+	 * thread register which is always counting. We merely return if this
+	 * is the counter we're attempting to disable.
+	 */
+	if (METAG_INST_COUNTER == idx)
+		return;
+
+	/*
+	 * The counter value _should_ have been read prior to disabling,
+	 * as if we're running on an early core then the value gets reset to
+	 * 0, and any read after that would be useless. On the newer cores,
+	 * however, it's better to read-modify-update this for purposes of
+	 * the overflow interrupt.
+	 * Here we remove the thread id AND the event nibble (there are at
+	 * least two events that count events that are core global and ignore
+	 * the thread id mask). This only works because we don't mix thread
+	 * performance counts, and event 0x00 requires a thread id mask!
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	tmp = metag_in32(PERF_COUNT(idx));
+	tmp &= 0x00ffffff;
+	metag_out32(tmp, PERF_COUNT(idx));
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u64 metag_pmu_read_counter(int idx)
+{
+	u32 tmp = 0;
+
+	/* The act of reading the cycle counter also clears it */
+	if (METAG_INST_COUNTER == idx) {
+		__core_reg_swap(TXTACTCYC, tmp);
+		goto out;
+	}
+
+	tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+out:
+	return tmp;
+}
+
+static void metag_pmu_write_counter(int idx, u32 val)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	u32 tmp = 0;
+	unsigned long flags;
+
+	/*
+	 * This _shouldn't_ happen, but if it does, then we can just
+	 * ignore the write, as the register is read-only and clear-on-write.
+	 */
+	if (METAG_INST_COUNTER == idx)
+		return;
+
+	/*
+	 * We'll keep the thread mask and event id, and just update the
+	 * counter itself. Also , we should bound the value to 24-bits.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	val &= 0x00ffffff;
+	tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
+	val |= tmp;
+	metag_out32(val, PERF_COUNT(idx));
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int metag_pmu_event_map(int idx)
+{
+	return metag_general_events[idx];
+}
+
+static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
+{
+	int idx = (int)dev;
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event = cpuhw->events[idx];
+	struct hw_perf_event *hwc = &event->hw;
+	struct pt_regs *regs = get_irq_regs();
+	struct perf_sample_data sampledata;
+	unsigned long flags;
+	u32 counter = 0;
+
+	/*
+	 * We need to stop the core temporarily from generating another
+	 * interrupt while we disable this counter. However, we don't want
+	 * to flag the counter as free
+	 */
+	__global_lock2(flags);
+	counter = metag_in32(PERF_COUNT(idx));
+	metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
+	__global_unlock2(flags);
+
+	/* Update the counts and reset the sample period */
+	metag_pmu_event_update(event, hwc, idx);
+	perf_sample_data_init(&sampledata, 0, hwc->last_period);
+	metag_pmu_event_set_period(event, hwc, idx);
+
+	/*
+	 * Enable the counter again once core overflow processing has
+	 * completed.
+	 */
+	if (!perf_event_overflow(event, &sampledata, regs))
+		metag_out32(counter, PERF_COUNT(idx));
+
+	return IRQ_HANDLED;
+}
+
+static struct metag_pmu _metag_pmu = {
+	.handle_irq	= metag_pmu_counter_overflow,
+	.enable		= metag_pmu_enable_counter,
+	.disable	= metag_pmu_disable_counter,
+	.read		= metag_pmu_read_counter,
+	.write		= metag_pmu_write_counter,
+	.event_map	= metag_pmu_event_map,
+	.cache_events	= &metag_pmu_cache_events,
+	.max_period	= MAX_PERIOD,
+	.max_events	= MAX_HWEVENTS,
+};
+
+/* PMU CPU hotplug notifier */
+static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned int)hcpu;
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	memset(cpuc, 0, sizeof(struct cpu_hw_events));
+	raw_spin_lock_init(&cpuc->pmu_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata metag_pmu_notifier = {
+	.notifier_call = metag_pmu_cpu_notify,
+};
+
+/* PMU Initialisation */
+static int __init init_hw_perf_events(void)
+{
+	int ret = 0, cpu;
+	u32 version = *(u32 *)METAC_ID;
+	int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
+	int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
+			>> METAC_ID_REV_S;
+
+	/* Not a Meta 2 core, then not supported */
+	if (0x02 > major) {
+		pr_info("no hardware counter support available\n");
+		goto out;
+	} else if (0x02 == major) {
+		metag_pmu = &_metag_pmu;
+
+		if (min_rev < 0x0104) {
+			/*
+			 * A core without overflow interrupts, and clear-on-
+			 * write counters.
+			 */
+			metag_pmu->handle_irq = NULL;
+			metag_pmu->write = NULL;
+			metag_pmu->max_period = 0;
+		}
+
+		metag_pmu->name = "Meta 2";
+		metag_pmu->version = version;
+		metag_pmu->pmu = pmu;
+	}
+
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+			metag_pmu->name, metag_pmu->max_events);
+
+	/* Initialise the active events and reservation mutex */
+	atomic_set(&metag_pmu->active_events, 0);
+	mutex_init(&metag_pmu->reserve_mutex);
+
+	/* Clear the counters */
+	metag_out32(0, PERF_COUNT(0));
+	metag_out32(0, PERF_COUNT(1));
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+		memset(cpuc, 0, sizeof(struct cpu_hw_events));
+		raw_spin_lock_init(&cpuc->pmu_lock);
+	}
+
+	register_cpu_notifier(&metag_pmu_notifier);
+	ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+out:
+	return ret;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644
index 0000000..fd10a13
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.h
@@ -0,0 +1,106 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef METAG_PERF_EVENT_H_
+#define METAG_PERF_EVENT_H_
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+/* For performance counter definitions */
+#include <asm/metag_mem.h>
+
+/*
+ * The Meta core has two performance counters, with 24-bit resolution. Newer
+ * cores generate an overflow interrupt on transition from 0xffffff to 0.
+ *
+ * Each counter consists of the counter id, hardware thread id, and the count
+ * itself; each counter can be assigned to multiple hardware threads at any
+ * one time, with the returned count being an aggregate of events. A small
+ * number of events are thread global, i.e. they count the aggregate of all
+ * threads' events, regardless of the thread selected.
+ *
+ * Newer cores can store an arbitrary 24-bit number in the counter, whereas
+ * older cores will clear the counter bits on write.
+ *
+ * We also have a pseudo-counter in the form of the thread active cycles
+ * counter (which, incidentally, is also bound to
+ */
+
+#define MAX_HWEVENTS		3
+#define MAX_PERIOD		((1UL << 24) - 1)
+#define METAG_INST_COUNTER	(MAX_HWEVENTS - 1)
+
+/**
+ * struct cpu_hw_events - a processor core's performance events
+ * @events:	an array of perf_events active for a given index.
+ * @used_mask:	a bitmap of in-use counters.
+ * @pmu_lock:	a perf counter lock
+ *
+ * This is a per-cpu/core structure that maintains a record of its
+ * performance counters' state.
+ */
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	raw_spinlock_t		pmu_lock;
+};
+
+/**
+ * struct metag_pmu - the Meta PMU structure
+ * @pmu:		core pmu structure
+ * @name:		pmu name
+ * @version:		core version
+ * @handle_irq:		overflow interrupt handler
+ * @enable:		enable a counter
+ * @disable:		disable a counter
+ * @read:		read the value of a counter
+ * @write:		write a value to a counter
+ * @event_map:		kernel event to counter event id map
+ * @cache_events:	kernel cache counter to core cache counter map
+ * @max_period:		maximum value of the counter before overflow
+ * @max_events:		maximum number of counters available at any one time
+ * @active_events:	number of active counters
+ * @reserve_mutex:	counter reservation mutex
+ *
+ * This describes the main functionality and data used by the performance
+ * event core.
+ */
+struct metag_pmu {
+	struct pmu	pmu;
+	const char	*name;
+	u32		version;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	u64		(*read)(int idx);
+	void		(*write)(int idx, u32 val);
+	int		(*event_map)(int idx);
+	const int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+	u32		max_period;
+	int		max_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+};
+
+/* Convenience macros for accessing the perf counters */
+/* Define some convenience accessors */
+#define PERF_COUNT(x)	(PERF_COUNT0 + (sizeof(u64) * (x)))
+#define PERF_ICORE(x)	(PERF_ICORE0 + (sizeof(u64) * (x)))
+#define PERF_CHAN(x)	(PERF_CHAN0 + (sizeof(u64) * (x)))
+
+/* Cache index macros */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_OP_UNSUPPORTED	0xfffe
+#define CACHE_OP_NONSENSE	0xffff
+
+#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644
index 0000000..3156334
--- /dev/null
+++ b/arch/metag/kernel/perf_callchain.c
@@ -0,0 +1,96 @@
+/*
+ * Perf callchain handling code.
+ *
+ *   Based on the ARM perf implementation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static bool is_valid_call(unsigned long calladdr)
+{
+	unsigned int callinsn;
+
+	/* Check the possible return address is aligned. */
+	if (!(calladdr & 0x3)) {
+		if (!get_user(callinsn, (unsigned int *)calladdr)) {
+			/* Check for CALLR or SWAP PC,D1RtP. */
+			if ((callinsn & 0xff000000) == 0xab000000 ||
+			    callinsn == 0xa3200aa0)
+				return true;
+		}
+	}
+	return false;
+}
+
+static struct metag_frame __user *
+user_backtrace(struct metag_frame __user *user_frame,
+	       struct perf_callchain_entry *entry)
+{
+	struct metag_frame frame;
+	unsigned long calladdr;
+
+	/* We cannot rely on having frame pointers in user code. */
+	while (1) {
+		/* Also check accessibility of one struct frame beyond */
+		if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
+			return 0;
+		if (__copy_from_user_inatomic(&frame, user_frame,
+					      sizeof(frame)))
+			return 0;
+
+		--user_frame;
+
+		calladdr = frame.lr - 4;
+		if (is_valid_call(calladdr)) {
+			perf_callchain_store(entry, calladdr);
+			return user_frame;
+		}
+	}
+
+	return 0;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	unsigned long sp = regs->ctx.AX[0].U0;
+	struct metag_frame __user *frame;
+
+	frame = (struct metag_frame __user *)sp;
+
+	--frame;
+
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+		frame = user_backtrace(frame, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+		void *data)
+{
+	struct perf_callchain_entry *entry = data;
+	perf_callchain_store(entry, fr->pc);
+	return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	fr.fp = regs->ctx.AX[1].U0;
+	fr.sp = regs->ctx.AX[0].U0;
+	fr.lr = regs->ctx.DX[4].U1;
+	fr.pc = regs->ctx.CurrPC;
+	walk_stackframe(&fr, callchain_trace, entry);
+}
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
new file mode 100644
index 0000000..c6efe62
--- /dev/null
+++ b/arch/metag/kernel/process.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Imagination Technologies
+ *
+ * This file contains the architecture-dependent parts of process handling.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/elfcore.h>
+#include <linux/fs.h>
+#include <linux/tick.h>
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <linux/pm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/core_reg.h>
+#include <asm/user_gateway.h>
+#include <asm/tcm.h>
+#include <asm/traps.h>
+#include <asm/switch_to.h>
+
+/*
+ * Wait for the next interrupt and enable local interrupts
+ */
+static inline void arch_idle(void)
+{
+	int tmp;
+
+	/*
+	 * Quickly jump straight into the interrupt entry point without actually
+	 * triggering an interrupt. When TXSTATI gets read the processor will
+	 * block until an interrupt is triggered.
+	 */
+	asm volatile (/* Switch into ISTAT mode */
+		      "RTH\n\t"
+		      /* Enable local interrupts */
+		      "MOV	TXMASKI, %1\n\t"
+		      /*
+		       * We can't directly "SWAP PC, PCX", so we swap via a
+		       * temporary. Essentially we do:
+		       *  PCX_new = 1f (the place to continue execution)
+		       *  PC = PCX_old
+		       */
+		      "ADD	%0, CPC0, #(1f-.)\n\t"
+		      "SWAP	PCX, %0\n\t"
+		      "MOV	PC, %0\n"
+		      /* Continue execution here with interrupts enabled */
+		      "1:"
+		      : "=a" (tmp)
+		      : "r" (get_trigger_mask()));
+}
+
+void cpu_idle(void)
+{
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	while (1) {
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
+
+		while (!need_resched()) {
+			/*
+			 * We need to disable interrupts here to ensure we don't
+			 * miss a wakeup call.
+			 */
+			local_irq_disable();
+			if (!need_resched()) {
+#ifdef CONFIG_HOTPLUG_CPU
+				if (cpu_is_offline(smp_processor_id()))
+					cpu_die();
+#endif
+				arch_idle();
+			} else {
+				local_irq_enable();
+			}
+		}
+
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
+		schedule_preempt_disabled();
+	 }
+}
+
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void (*soc_restart)(char *cmd);
+void (*soc_halt)(void);
+
+void machine_restart(char *cmd)
+{
+	if (soc_restart)
+		soc_restart(cmd);
+	hard_processor_halt(HALT_OK);
+}
+
+void machine_halt(void)
+{
+	if (soc_halt)
+		soc_halt();
+	smp_send_stop();
+	hard_processor_halt(HALT_OK);
+}
+
+void machine_power_off(void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	smp_send_stop();
+	hard_processor_halt(HALT_OK);
+}
+
+#define FLAG_Z 0x8
+#define FLAG_N 0x4
+#define FLAG_O 0x2
+#define FLAG_C 0x1
+
+void show_regs(struct pt_regs *regs)
+{
+	int i;
+	const char *AX0_names[] = {"A0StP", "A0FrP"};
+	const char *AX1_names[] = {"A1GbP", "A1LbP"};
+
+	const char *DX0_names[] = {
+		"D0Re0",
+		"D0Ar6",
+		"D0Ar4",
+		"D0Ar2",
+		"D0FrT",
+		"D0.5 ",
+		"D0.6 ",
+		"D0.7 "
+	};
+
+	const char *DX1_names[] = {
+		"D1Re0",
+		"D1Ar5",
+		"D1Ar3",
+		"D1Ar1",
+		"D1RtP",
+		"D1.5 ",
+		"D1.6 ",
+		"D1.7 "
+	};
+
+	pr_info(" pt_regs @ %p\n", regs);
+	pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask);
+	pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags,
+		regs->ctx.Flags & FLAG_Z ? 'Z' : 'z',
+		regs->ctx.Flags & FLAG_N ? 'N' : 'n',
+		regs->ctx.Flags & FLAG_O ? 'O' : 'o',
+		regs->ctx.Flags & FLAG_C ? 'C' : 'c');
+	pr_info(" TXRPT = 0x%08x\n", regs->ctx.CurrRPT);
+	pr_info(" PC = 0x%08x\n", regs->ctx.CurrPC);
+
+	/* AX regs */
+	for (i = 0; i < 2; i++) {
+		pr_info(" %s = 0x%08x    ",
+			AX0_names[i],
+			regs->ctx.AX[i].U0);
+		printk(" %s = 0x%08x\n",
+			AX1_names[i],
+			regs->ctx.AX[i].U1);
+	}
+
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		pr_warn(" Extended state present - AX2.[01] will be WRONG\n");
+
+	/* Special place with AXx.2 */
+	pr_info(" A0.2  = 0x%08x    ",
+		regs->ctx.Ext.AX2.U0);
+	printk(" A1.2  = 0x%08x\n",
+		regs->ctx.Ext.AX2.U1);
+
+	/* 'extended' AX regs (nominally, just AXx.3) */
+	for (i = 0; i < (TBICTX_AX_REGS - 3); i++) {
+		pr_info(" A0.%d  = 0x%08x    ", i + 3, regs->ctx.AX3[i].U0);
+		printk(" A1.%d  = 0x%08x\n", i + 3, regs->ctx.AX3[i].U1);
+	}
+
+	for (i = 0; i < 8; i++) {
+		pr_info(" %s = 0x%08x    ", DX0_names[i], regs->ctx.DX[i].U0);
+		printk(" %s = 0x%08x\n", DX1_names[i], regs->ctx.DX[i].U1);
+	}
+
+	show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long arg, struct task_struct *tsk)
+{
+	struct pt_regs *childregs = task_pt_regs(tsk);
+	void *kernel_context = ((void *) childregs +
+				sizeof(struct pt_regs));
+	unsigned long global_base;
+
+	BUG_ON(((unsigned long)childregs) & 0x7);
+	BUG_ON(((unsigned long)kernel_context) & 0x7);
+
+	memset(&tsk->thread.kernel_context, 0,
+			sizeof(tsk->thread.kernel_context));
+
+	tsk->thread.kernel_context = __TBISwitchInit(kernel_context,
+						     ret_from_fork,
+						     0, 0);
+
+	if (unlikely(tsk->flags & PF_KTHREAD)) {
+		/*
+		 * Make sure we don't leak any kernel data to child's regs
+		 * if kernel thread becomes a userspace thread in the future
+		 */
+		memset(childregs, 0 , sizeof(struct pt_regs));
+
+		global_base = __core_reg_get(A1GbP);
+		childregs->ctx.AX[0].U1 = (unsigned long) global_base;
+		childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
+		/* Set D1Ar1=arg and D1RtP=usp (fn) */
+		childregs->ctx.DX[4].U1 = usp;
+		childregs->ctx.DX[3].U1 = arg;
+		tsk->thread.int_depth = 2;
+		return 0;
+	}
+	/*
+	 * Get a pointer to where the new child's register block should have
+	 * been pushed.
+	 * The Meta's stack grows upwards, and the context is the the first
+	 * thing to be pushed by TBX (phew)
+	 */
+	*childregs = *current_pt_regs();
+	/* Set the correct stack for the clone mode */
+	if (usp)
+		childregs->ctx.AX[0].U0 = ALIGN(usp, 8);
+	tsk->thread.int_depth = 1;
+
+	/* set return value for child process */
+	childregs->ctx.DX[0].U0 = 0;
+
+	/* The TLS pointer is passed as an argument to sys_clone. */
+	if (clone_flags & CLONE_SETTLS)
+		tsk->thread.tls_ptr =
+				(__force void __user *)childregs->ctx.DX[1].U1;
+
+#ifdef CONFIG_METAG_FPU
+	if (tsk->thread.fpu_context) {
+		struct meta_fpu_context *ctx;
+
+		ctx = kmemdup(tsk->thread.fpu_context,
+			      sizeof(struct meta_fpu_context), GFP_ATOMIC);
+		tsk->thread.fpu_context = ctx;
+	}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+	if (tsk->thread.dsp_context) {
+		struct meta_ext_context *ctx;
+		int i;
+
+		ctx = kmemdup(tsk->thread.dsp_context,
+			      sizeof(struct meta_ext_context), GFP_ATOMIC);
+		for (i = 0; i < 2; i++)
+			ctx->ram[i] = kmemdup(ctx->ram[i], ctx->ram_sz[i],
+					      GFP_ATOMIC);
+		tsk->thread.dsp_context = ctx;
+	}
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_METAG_FPU
+static void alloc_fpu_context(struct thread_struct *thread)
+{
+	thread->fpu_context = kzalloc(sizeof(struct meta_fpu_context),
+				      GFP_ATOMIC);
+}
+
+static void clear_fpu(struct thread_struct *thread)
+{
+	thread->user_flags &= ~TBICTX_FPAC_BIT;
+	kfree(thread->fpu_context);
+	thread->fpu_context = NULL;
+}
+#else
+static void clear_fpu(struct thread_struct *thread)
+{
+}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+static void clear_dsp(struct thread_struct *thread)
+{
+	if (thread->dsp_context) {
+		kfree(thread->dsp_context->ram[0]);
+		kfree(thread->dsp_context->ram[1]);
+
+		kfree(thread->dsp_context);
+
+		thread->dsp_context = NULL;
+	}
+
+	__core_reg_set(D0.8, 0);
+}
+#else
+static void clear_dsp(struct thread_struct *thread)
+{
+}
+#endif
+
+struct task_struct *__sched __switch_to(struct task_struct *prev,
+					struct task_struct *next)
+{
+	TBIRES to, from;
+
+	to.Switch.pCtx = next->thread.kernel_context;
+	to.Switch.pPara = prev;
+
+#ifdef CONFIG_METAG_FPU
+	if (prev->thread.user_flags & TBICTX_FPAC_BIT) {
+		struct pt_regs *regs = task_pt_regs(prev);
+		TBIRES state;
+
+		state.Sig.SaveMask = prev->thread.user_flags;
+		state.Sig.pCtx = &regs->ctx;
+
+		if (!prev->thread.fpu_context)
+			alloc_fpu_context(&prev->thread);
+		if (prev->thread.fpu_context)
+			__TBICtxFPUSave(state, prev->thread.fpu_context);
+	}
+	/*
+	 * Force a restore of the FPU context next time this process is
+	 * scheduled.
+	 */
+	if (prev->thread.fpu_context)
+		prev->thread.fpu_context->needs_restore = true;
+#endif
+
+
+	from = __TBISwitch(to, &prev->thread.kernel_context);
+
+	/* Restore TLS pointer for this process. */
+	set_gateway_tls(current->thread.tls_ptr);
+
+	return (struct task_struct *) from.Switch.pPara;
+}
+
+void flush_thread(void)
+{
+	clear_fpu(&current->thread);
+	clear_dsp(&current->thread);
+}
+
+/*
+ * Free current thread data structures etc.
+ */
+void exit_thread(void)
+{
+	clear_fpu(&current->thread);
+	clear_dsp(&current->thread);
+}
+
+/* TODO: figure out how to unwind the kernel stack here to figure out
+ * where we went to sleep. */
+unsigned long get_wchan(struct task_struct *p)
+{
+	return 0;
+}
+
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	/* Returning 0 indicates that the FPU state was not stored (as it was
+	 * not in use) */
+	return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#define ELF_MIN_ALIGN	PAGE_SIZE
+
+#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
+#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
+
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+			      struct elf_phdr *eppnt, int prot, int type,
+			      unsigned long total_size)
+{
+	unsigned long map_addr, size;
+	unsigned long page_off = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long raw_size = eppnt->p_filesz + page_off;
+	unsigned long off = eppnt->p_offset - page_off;
+	unsigned int tcm_tag;
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(raw_size);
+
+	/* mmap() will return -EINVAL if given a zero size, but a
+	 * segment with zero filesize is perfectly valid */
+	if (!size)
+		return addr;
+
+	tcm_tag = tcm_lookup_tag(addr);
+
+	if (tcm_tag != TCM_INVALID_TAG)
+		type &= ~MAP_FIXED;
+
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			vm_munmap(map_addr+size, total_size-size);
+	} else
+		map_addr = vm_mmap(filep, addr, size, prot, type, off);
+
+	if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) {
+		struct tcm_allocation *tcm;
+		unsigned long tcm_addr;
+
+		tcm = kmalloc(sizeof(*tcm), GFP_KERNEL);
+		if (!tcm)
+			return -ENOMEM;
+
+		tcm_addr = tcm_alloc(tcm_tag, raw_size);
+		if (tcm_addr != addr) {
+			kfree(tcm);
+			return -ENOMEM;
+		}
+
+		tcm->tag = tcm_tag;
+		tcm->addr = tcm_addr;
+		tcm->size = raw_size;
+
+		list_add(&tcm->list, &current->mm->context.tcm);
+
+		eppnt->p_vaddr = map_addr;
+		if (copy_from_user((void *) addr, (void __user *) map_addr,
+				   raw_size))
+			return -EFAULT;
+	}
+
+	return map_addr;
+}
+#endif
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
new file mode 100644
index 0000000..47a8828
--- /dev/null
+++ b/arch/metag/kernel/ptrace.c
@@ -0,0 +1,380 @@
+/*
+ *  Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <trace/syscall.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * user_regset definitions.
+ */
+
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	const void *ptr;
+	unsigned long data;
+	int ret;
+
+	/* D{0-1}.{0-7} */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->ctx.DX, 0, 4*16);
+	if (ret)
+		goto out;
+	/* A{0-1}.{0-1} */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->ctx.AX, 4*16, 4*20);
+	if (ret)
+		goto out;
+	/* A{0-1}.2 */
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		ptr = regs->ctx.Ext.Ctx.pExt;
+	else
+		ptr = &regs->ctx.Ext.AX2;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  ptr, 4*20, 4*22);
+	if (ret)
+		goto out;
+	/* A{0-1}.3 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.AX3, 4*22, 4*24);
+	if (ret)
+		goto out;
+	/* PC */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.CurrPC, 4*24, 4*25);
+	if (ret)
+		goto out;
+	/* TXSTATUS */
+	data = (unsigned long)regs->ctx.Flags;
+	if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+		data |= USER_GP_REGS_STATUS_CATCH_BIT;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &data, 4*25, 4*26);
+	if (ret)
+		goto out;
+	/* TXRPT, TXBPOBITS, TXMODE */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.CurrRPT, 4*26, 4*29);
+	if (ret)
+		goto out;
+	/* Padding */
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       4*29, 4*30);
+out:
+	return ret;
+}
+
+int metag_gp_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	void *ptr;
+	unsigned long data;
+	int ret;
+
+	/* D{0-1}.{0-7} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->ctx.DX, 0, 4*16);
+	if (ret)
+		goto out;
+	/* A{0-1}.{0-1} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->ctx.AX, 4*16, 4*20);
+	if (ret)
+		goto out;
+	/* A{0-1}.2 */
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		ptr = regs->ctx.Ext.Ctx.pExt;
+	else
+		ptr = &regs->ctx.Ext.AX2;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 ptr, 4*20, 4*22);
+	if (ret)
+		goto out;
+	/* A{0-1}.3 */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.AX3, 4*22, 4*24);
+	if (ret)
+		goto out;
+	/* PC */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.CurrPC, 4*24, 4*25);
+	if (ret)
+		goto out;
+	/* TXSTATUS */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &data, 4*25, 4*26);
+	if (ret)
+		goto out;
+	regs->ctx.Flags = data & 0xffff;
+	if (data & USER_GP_REGS_STATUS_CATCH_BIT)
+		regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT;
+	else
+		regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT;
+	/* TXRPT, TXBPOBITS, TXMODE */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.CurrRPT, 4*26, 4*29);
+out:
+	return ret;
+}
+
+static int metag_gp_regs_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_gp_regs_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	/* TXCATCH{0-3} */
+	if (regs->ctx.SaveMask & TBICTX_XCBF_BIT)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  regs->extcb0, 0, 4*4);
+	else
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       0, 4*4);
+	return ret;
+}
+
+int metag_cb_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	/* TXCATCH{0-3} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->extcb0, 0, 4*4);
+	return ret;
+}
+
+static int metag_cb_regs_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_cb_regs_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_rp_state_copyout(const struct pt_regs *regs,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf)
+{
+	unsigned long mask;
+	u64 *ptr;
+	int ret, i;
+
+	/* Empty read pipeline */
+	if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) {
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       0, 4*13);
+		goto out;
+	}
+
+	mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >>
+		TXDIVTIME_RPMASK_S;
+
+	/* Read pipeline entries */
+	ptr = (void *)&regs->extcb0[1];
+	for (i = 0; i < 6; ++i, ++ptr) {
+		if (mask & (1 << i))
+			ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+						  ptr, 8*i, 8*(i + 1));
+		else
+			ret = user_regset_copyout_zero(&pos, &count, &kbuf,
+						       &ubuf, 8*i, 8*(i + 1));
+		if (ret)
+			goto out;
+	}
+	/* Mask of entries */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &mask, 4*12, 4*13);
+out:
+	return ret;
+}
+
+int metag_rp_state_copyin(struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct user_rp_state rp;
+	unsigned long long *ptr;
+	int ret, i;
+
+	/* Read the entire pipeline before making any changes */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &rp, 0, 4*13);
+	if (ret)
+		goto out;
+
+	/* Write pipeline entries */
+	ptr = (void *)&regs->extcb0[1];
+	for (i = 0; i < 6; ++i, ++ptr)
+		if (rp.mask & (1 << i))
+			*ptr = rp.entries[i];
+
+	/* Update RPMask in TXDIVTIME */
+	regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS;
+	regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S)
+				 & TXDIVTIME_RPMASK_BITS;
+
+	/* Set/clear flags to indicate catch/read pipeline state */
+	if (rp.mask)
+		regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT;
+	else
+		regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT;
+out:
+	return ret;
+}
+
+static int metag_rp_state_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_rp_state_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+enum metag_regset {
+	REGSET_GENERAL,
+	REGSET_CBUF,
+	REGSET_READPIPE,
+};
+
+static const struct user_regset metag_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_gp_regs_get,
+		.set = metag_gp_regs_set,
+	},
+	[REGSET_CBUF] = {
+		.core_note_type = NT_METAG_CBUF,
+		.n = sizeof(struct user_cb_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_cb_regs_get,
+		.set = metag_cb_regs_set,
+	},
+	[REGSET_READPIPE] = {
+		.core_note_type = NT_METAG_RPIPE,
+		.n = sizeof(struct user_rp_state) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_rp_state_get,
+		.set = metag_rp_state_set,
+	},
+};
+
+static const struct user_regset_view user_metag_view = {
+	.name = "metag",
+	.e_machine = EM_METAG,
+	.regsets = metag_regsets,
+	.n = ARRAY_SIZE(metag_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_metag_view;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* nothing to do.. */
+}
+
+long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
+		 unsigned long data)
+{
+	int ret;
+
+	switch (request) {
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+int syscall_trace_enter(struct pt_regs *regs)
+{
+	int ret = 0;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		ret = tracehook_report_syscall_entry(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->ctx.DX[0].U1);
+
+	return ret ? -1 : regs->ctx.DX[0].U1;
+}
+
+void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs->ctx.DX[0].U1);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c
new file mode 100644
index 0000000..8792461
--- /dev/null
+++ b/arch/metag/kernel/setup.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file contains the architecture-dependant parts of system setup.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/pfn.h>
+#include <linux/root_dev.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+
+#include <asm/cachepart.h>
+#include <asm/clock.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/da.h>
+#include <asm/highmem.h>
+#include <asm/hwthread.h>
+#include <asm/l2cache.h>
+#include <asm/mach/arch.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/mmu.h>
+#include <asm/mmzone.h>
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+
+/* Priv protect as many registers as possible. */
+#define DEFAULT_PRIV	(TXPRIVEXT_COPRO_BITS		| \
+			 TXPRIVEXT_TXTRIGGER_BIT	| \
+			 TXPRIVEXT_TXGBLCREG_BIT	| \
+			 TXPRIVEXT_ILOCK_BIT		| \
+			 TXPRIVEXT_TXITACCYC_BIT	| \
+			 TXPRIVEXT_TXDIVTIME_BIT	| \
+			 TXPRIVEXT_TXAMAREGX_BIT	| \
+			 TXPRIVEXT_TXTIMERI_BIT		| \
+			 TXPRIVEXT_TXSTATUS_BIT		| \
+			 TXPRIVEXT_TXDISABLE_BIT)
+
+/* Meta2 specific bits. */
+#ifdef CONFIG_METAG_META12
+#define META2_PRIV	0
+#else
+#define META2_PRIV	(TXPRIVEXT_TXTIMER_BIT		| \
+			 TXPRIVEXT_TRACE_BIT)
+#endif
+
+/* Unaligned access checking bits. */
+#ifdef CONFIG_METAG_UNALIGNED
+#define UNALIGNED_PRIV	TXPRIVEXT_ALIGNREW_BIT
+#else
+#define UNALIGNED_PRIV	0
+#endif
+
+#define PRIV_BITS 	(DEFAULT_PRIV			| \
+			 META2_PRIV			| \
+			 UNALIGNED_PRIV)
+
+/*
+ * Protect access to:
+ * 0x06000000-0x07ffffff Direct mapped region
+ * 0x05000000-0x05ffffff MMU table region (Meta1)
+ * 0x04400000-0x047fffff Cache flush region
+ * 0x84000000-0x87ffffff Core cache memory region (Meta2)
+ *
+ * Allow access to:
+ * 0x80000000-0x81ffffff Core code memory region (Meta2)
+ */
+#ifdef CONFIG_METAG_META12
+#define PRIVSYSR_BITS	TXPRIVSYSR_ALL_BITS
+#else
+#define PRIVSYSR_BITS	(TXPRIVSYSR_ALL_BITS & ~TXPRIVSYSR_CORECODE_BIT)
+#endif
+
+/* Protect all 0x02xxxxxx and 0x048xxxxx. */
+#define PIOREG_BITS	0xffffffff
+
+/*
+ * Protect all 0x04000xx0 (system events)
+ * except write combiner flush and write fence (system events 4 and 5).
+ */
+#define PSYREG_BITS	0xfffffffb
+
+
+extern char _heap_start[];
+
+#ifdef CONFIG_METAG_BUILTIN_DTB
+extern u32 __dtb_start[];
+#endif
+
+#ifdef CONFIG_DA_CONSOLE
+/* Our early channel based console driver */
+extern struct console dash_console;
+#endif
+
+struct machine_desc *machine_desc __initdata;
+
+/*
+ * Map a Linux CPU number to a hardware thread ID
+ * In SMP this will be setup with the correct mapping at startup; in UP this
+ * will map to the HW thread on which we are running.
+ */
+u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS-1] = BAD_HWTHREAD_ID
+};
+
+/*
+ * Map a hardware thread ID to a Linux CPU number
+ * In SMP this will be fleshed out with the correct CPU ID for a particular
+ * hardware thread. In UP this will be initialised with the boot CPU ID.
+ */
+u8 hwthread_id_2_cpu[4] __read_mostly = {
+	[0 ... 3] = BAD_CPU_ID
+};
+
+/* The relative offset of the MMU mapped memory (from ldlk or bootloader)
+ * to the real physical memory.  This is needed as we have to use the
+ * physical addresses in the MMU tables (pte entries), and not the virtual
+ * addresses.
+ * This variable is used in the __pa() and __va() macros, and should
+ * probably only be used via them.
+ */
+unsigned int meta_memoffset;
+EXPORT_SYMBOL(meta_memoffset);
+
+static char __initdata *original_cmd_line;
+
+DEFINE_PER_CPU(PTBI, pTBI);
+
+/*
+ * Mapping are specified as "CPU_ID:HWTHREAD_ID", e.g.
+ *
+ *	"hwthread_map=0:1,1:2,2:3,3:0"
+ *
+ *	Linux CPU ID	HWTHREAD_ID
+ *	---------------------------
+ *	    0		      1
+ *	    1		      2
+ *	    2		      3
+ *	    3		      0
+ */
+static int __init parse_hwthread_map(char *p)
+{
+	int cpu;
+
+	while (*p) {
+		cpu = (*p++) - '0';
+		if (cpu < 0 || cpu > 9)
+			goto err_cpu;
+
+		p++;		/* skip semi-colon */
+		cpu_2_hwthread_id[cpu] = (*p++) - '0';
+		if (cpu_2_hwthread_id[cpu] >= 4)
+			goto err_thread;
+		hwthread_id_2_cpu[cpu_2_hwthread_id[cpu]] = cpu;
+
+		if (*p == ',')
+			p++;		/* skip comma */
+	}
+
+	return 0;
+err_cpu:
+	pr_err("%s: hwthread_map cpu argument out of range\n", __func__);
+	return -EINVAL;
+err_thread:
+	pr_err("%s: hwthread_map thread argument out of range\n", __func__);
+	return -EINVAL;
+}
+early_param("hwthread_map", parse_hwthread_map);
+
+void __init dump_machine_table(void)
+{
+	struct machine_desc *p;
+	const char **compat;
+
+	pr_info("Available machine support:\n\tNAME\t\tCOMPATIBLE LIST\n");
+	for_each_machine_desc(p) {
+		pr_info("\t%s\t[", p->name);
+		for (compat = p->dt_compat; compat && *compat; ++compat)
+			printk(" '%s'", *compat);
+		printk(" ]\n");
+	}
+
+	pr_info("\nPlease check your kernel config and/or bootloader.\n");
+
+	hard_processor_halt(HALT_PANIC);
+}
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+static int metag_panic_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	hard_processor_halt(HALT_PANIC);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block metag_panic_block = {
+	metag_panic_event,
+	NULL,
+	0
+};
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long start_pfn;
+	unsigned long text_start = (unsigned long)(&_stext);
+	unsigned long cpu = smp_processor_id();
+	unsigned long heap_start, heap_end;
+	unsigned long start_pte;
+	PTBI _pTBI;
+	PTBISEG p_heap;
+	int heap_id, i;
+
+	metag_cache_probe();
+
+	metag_da_probe();
+#ifdef CONFIG_DA_CONSOLE
+	if (metag_da_enabled()) {
+		/* An early channel based console driver */
+		register_console(&dash_console);
+		add_preferred_console("ttyDA", 1, NULL);
+	}
+#endif
+
+	/* try interpreting the argument as a device tree */
+	machine_desc = setup_machine_fdt(original_cmd_line);
+	/* if it doesn't look like a device tree it must be a command line */
+	if (!machine_desc) {
+#ifdef CONFIG_METAG_BUILTIN_DTB
+		/* try the embedded device tree */
+		machine_desc = setup_machine_fdt(__dtb_start);
+		if (!machine_desc)
+			panic("Invalid embedded device tree.");
+#else
+		/* use the default machine description */
+		machine_desc = default_machine_desc();
+#endif
+#ifndef CONFIG_CMDLINE_FORCE
+		/* append the bootloader cmdline to any builtin fdt cmdline */
+		if (boot_command_line[0] && original_cmd_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, original_cmd_line,
+			COMMAND_LINE_SIZE);
+#endif
+	}
+	setup_meta_clocks(machine_desc->clocks);
+
+	*cmdline_p = boot_command_line;
+	parse_early_param();
+
+	/*
+	 * Make sure we don't alias in dcache or icache
+	 */
+	check_for_cache_aliasing(cpu);
+
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &metag_panic_block);
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	if (!(__core_reg_get(TXSTATUS) & TXSTATUS_PSTAT_BIT))
+		panic("Privilege must be enabled for this thread.");
+
+	_pTBI = __TBI(TBID_ISTAT_BIT);
+
+	per_cpu(pTBI, cpu) = _pTBI;
+
+	if (!per_cpu(pTBI, cpu))
+		panic("No TBI found!");
+
+	/*
+	 * Initialize all interrupt vectors to our copy of __TBIUnExpXXX,
+	 * rather than the version from the bootloader. This makes call
+	 * stacks easier to understand and may allow us to unmap the
+	 * bootloader at some point.
+	 *
+	 * We need to keep the LWK handler that TBI installed in order to
+	 * be able to do inter-thread comms.
+	 */
+	for (i = 0; i <= TBID_SIGNUM_MAX; i++)
+		if (i != TBID_SIGNUM_LWK)
+			_pTBI->fnSigs[i] = __TBIUnExpXXX;
+
+	/* A Meta requirement is that the kernel is loaded (virtually)
+	 * at the PAGE_OFFSET.
+	 */
+	if (PAGE_OFFSET != text_start)
+		panic("Kernel not loaded at PAGE_OFFSET (%#x) but at %#lx.",
+		      PAGE_OFFSET, text_start);
+
+	start_pte = mmu_read_second_level_page(text_start);
+
+	/*
+	 * Kernel pages should have the PRIV bit set by the bootloader.
+	 */
+	if (!(start_pte & _PAGE_KERNEL))
+		panic("kernel pte does not have PRIV set");
+
+	/*
+	 * See __pa and __va in include/asm/page.h.
+	 * This value is negative when running in local space but the
+	 * calculations work anyway.
+	 */
+	meta_memoffset = text_start - (start_pte & PAGE_MASK);
+
+	/* Now lets look at the heap space */
+	heap_id = (__TBIThreadId() & TBID_THREAD_BITS)
+		+ TBID_SEG(0, TBID_SEGSCOPE_LOCAL, TBID_SEGTYPE_HEAP);
+
+	p_heap = __TBIFindSeg(NULL, heap_id);
+
+	if (!p_heap)
+		panic("Could not find heap from TBI!");
+
+	/* The heap begins at the first full page after the kernel data. */
+	heap_start = (unsigned long) &_heap_start;
+
+	/* The heap ends at the end of the heap segment specified with
+	 * ldlk.
+	 */
+	if (is_global_space(text_start)) {
+		pr_debug("WARNING: running in global space!\n");
+		heap_end = (unsigned long)p_heap->pGAddr + p_heap->Bytes;
+	} else {
+		heap_end = (unsigned long)p_heap->pLAddr + p_heap->Bytes;
+	}
+
+	ROOT_DEV = Root_RAM0;
+
+	/* init_mm is the mm struct used for the first task.  It is then
+	 * cloned for all other tasks spawned from that task.
+	 *
+	 * Note - we are using the virtual addresses here.
+	 */
+	init_mm.start_code = (unsigned long)(&_stext);
+	init_mm.end_code = (unsigned long)(&_etext);
+	init_mm.end_data = (unsigned long)(&_edata);
+	init_mm.brk = (unsigned long)heap_start;
+
+	min_low_pfn = PFN_UP(__pa(text_start));
+	max_low_pfn = PFN_DOWN(__pa(heap_end));
+
+	pfn_base = min_low_pfn;
+
+	/* Round max_pfn up to a 4Mb boundary. The free_bootmem_node()
+	 * call later makes sure to keep the rounded up pages marked reserved.
+	 */
+	max_pfn = max_low_pfn + ((1 << MAX_ORDER) - 1);
+	max_pfn &= ~((1 << MAX_ORDER) - 1);
+
+	start_pfn = PFN_UP(__pa(heap_start));
+
+	if (min_low_pfn & ((1 << MAX_ORDER) - 1)) {
+		/* Theoretically, we could expand the space that the
+		 * bootmem allocator covers - much as we do for the
+		 * 'high' address, and then tell the bootmem system
+		 * that the lowest chunk is 'not available'.  Right
+		 * now it is just much easier to constrain the
+		 * user to always MAX_ORDER align their kernel space.
+		 */
+
+		panic("Kernel must be %d byte aligned, currently at %#lx.",
+		      1 << (MAX_ORDER + PAGE_SHIFT),
+		      min_low_pfn << PAGE_SHIFT);
+	}
+
+#ifdef CONFIG_HIGHMEM
+	highstart_pfn = highend_pfn = max_pfn;
+	high_memory = (void *) __va(PFN_PHYS(highstart_pfn));
+#else
+	high_memory = (void *)__va(PFN_PHYS(max_pfn));
+#endif
+
+	paging_init(heap_end);
+
+	setup_priv();
+
+	/* Setup the boot cpu's mapping. The rest will be setup below. */
+	cpu_2_hwthread_id[smp_processor_id()] = hard_processor_id();
+	hwthread_id_2_cpu[hard_processor_id()] = smp_processor_id();
+
+	/* Copy device tree blob into non-init memory before unflattening */
+	copy_fdt();
+	unflatten_device_tree();
+
+#ifdef CONFIG_SMP
+	smp_init_cpus();
+#endif
+
+	if (machine_desc->init_early)
+		machine_desc->init_early();
+}
+
+static int __init customize_machine(void)
+{
+	/* customizes platform devices, or adds new ones */
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
+	return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_machine_late(void)
+{
+	if (machine_desc->init_late)
+		machine_desc->init_late();
+	return 0;
+}
+late_initcall(init_machine_late);
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	Get CPU information for use by the procfs.
+ */
+static const char *get_cpu_capabilities(unsigned int txenable)
+{
+#ifdef CONFIG_METAG_META21
+	/* See CORE_ID in META HTP.GP TRM - Architecture Overview 2.1.238 */
+	int coreid = metag_in32(METAC_CORE_ID);
+	unsigned int dsp_type = (coreid >> 3) & 7;
+	unsigned int fpu_type = (coreid >> 7) & 3;
+
+	switch (dsp_type | fpu_type << 3) {
+	case (0x00): return "EDSP";
+	case (0x01): return "DSP";
+	case (0x08): return "EDSP+LFPU";
+	case (0x09): return "DSP+LFPU";
+	case (0x10): return "EDSP+FPU";
+	case (0x11): return "DSP+FPU";
+	}
+	return "UNKNOWN";
+
+#else
+	if (!(txenable & TXENABLE_CLASS_BITS))
+		return "DSP";
+	else
+		return "";
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	const char *cpu;
+	unsigned int txenable, thread_id, major, minor;
+	unsigned long clockfreq = get_coreclock();
+#ifdef CONFIG_SMP
+	int i;
+	unsigned long lpj;
+#endif
+
+	cpu = "META";
+
+	txenable = __core_reg_get(TXENABLE);
+	major = (txenable & TXENABLE_MAJOR_REV_BITS) >> TXENABLE_MAJOR_REV_S;
+	minor = (txenable & TXENABLE_MINOR_REV_BITS) >> TXENABLE_MINOR_REV_S;
+	thread_id = (txenable >> 8) & 0x3;
+
+#ifdef CONFIG_SMP
+	for_each_online_cpu(i) {
+		lpj = per_cpu(cpu_data, i).loops_per_jiffy;
+		txenable = core_reg_read(TXUCT_ID, TXENABLE_REGNUM,
+							cpu_2_hwthread_id[i]);
+
+		seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+			      "Clocking:\t%lu.%1luMHz\n"
+			      "BogoMips:\t%lu.%02lu\n"
+			      "Calibration:\t%lu loops\n"
+			      "Capabilities:\t%s\n\n",
+			      cpu, major, minor, i,
+			      clockfreq / 1000000, (clockfreq / 100000) % 10,
+			      lpj / (500000 / HZ), (lpj / (5000 / HZ)) % 100,
+			      lpj,
+			      get_cpu_capabilities(txenable));
+	}
+#else
+	seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+		   "Clocking:\t%lu.%1luMHz\n"
+		   "BogoMips:\t%lu.%02lu\n"
+		   "Calibration:\t%lu loops\n"
+		   "Capabilities:\t%s\n",
+		   cpu, major, minor, thread_id,
+		   clockfreq / 1000000, (clockfreq / 100000) % 10,
+		   loops_per_jiffy / (500000 / HZ),
+		   (loops_per_jiffy / (5000 / HZ)) % 100,
+		   loops_per_jiffy,
+		   get_cpu_capabilities(txenable));
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_METAG_L2C
+	if (meta_l2c_is_present()) {
+		seq_printf(m, "L2 cache:\t%s\n"
+			      "L2 cache size:\t%d KB\n",
+			      meta_l2c_is_enabled() ? "enabled" : "disabled",
+			      meta_l2c_size() >> 10);
+	}
+#endif
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return (void *)(*pos == 0);
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+	.start = c_start,
+	.next  = c_next,
+	.stop  = c_stop,
+	.show  = show_cpuinfo,
+};
+#endif /* CONFIG_PROC_FS */
+
+void __init metag_start_kernel(char *args)
+{
+	/* Zero the timer register so timestamps are from the point at
+	 * which the kernel started running.
+	 */
+	__core_reg_set(TXTIMER, 0);
+
+	/* Clear the bss. */
+	memset(__bss_start, 0,
+	       (unsigned long)__bss_stop - (unsigned long)__bss_start);
+
+	/* Remember where these are for use in setup_arch */
+	original_cmd_line = args;
+
+	current_thread_info()->cpu = hard_processor_id();
+
+	start_kernel();
+}
+
+/**
+ * setup_priv() - Set up privilege protection registers.
+ *
+ * Set up privilege protection registers such as TXPRIVEXT to prevent userland
+ * from touching our precious registers and sensitive memory areas.
+ */
+void setup_priv(void)
+{
+	unsigned int offset = hard_processor_id() << TXPRIVREG_STRIDE_S;
+
+	__core_reg_set(TXPRIVEXT, PRIV_BITS);
+
+	metag_out32(PRIVSYSR_BITS, T0PRIVSYSR + offset);
+	metag_out32(PIOREG_BITS,   T0PIOREG   + offset);
+	metag_out32(PSYREG_BITS,   T0PSYREG   + offset);
+}
+
+PTBI pTBI_get(unsigned int cpu)
+{
+	return per_cpu(pTBI, cpu);
+}
+EXPORT_SYMBOL(pTBI_get);
+
+#if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU)
+char capabilites[] = "dsp fpu";
+#elif defined(CONFIG_METAG_DSP)
+char capabilites[] = "dsp";
+#elif defined(CONFIG_METAG_FPU)
+char capabilites[] = "fpu";
+#else
+char capabilites[] = "";
+#endif
+
+static struct ctl_table caps_kern_table[] = {
+	{
+		.procname	= "capabilities",
+		.data		= capabilites,
+		.maxlen		= sizeof(capabilites),
+		.mode		= 0444,
+		.proc_handler	= proc_dostring,
+	},
+	{}
+};
+
+static struct ctl_table caps_root_table[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= caps_kern_table,
+	},
+	{}
+};
+
+static int __init capabilities_register_sysctl(void)
+{
+	struct ctl_table_header *caps_table_header;
+
+	caps_table_header = register_sysctl_table(caps_root_table);
+	if (!caps_table_header) {
+		pr_err("Unable to register CAPABILITIES sysctl\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+core_initcall(capabilities_register_sysctl);
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
new file mode 100644
index 0000000..3be61cf
--- /dev/null
+++ b/arch/metag/kernel/signal.c
@@ -0,0 +1,344 @@
+/*
+ *  Copyright (C) 1991,1992  Linus Torvalds
+ *  Copyright (C) 2005-2012  Imagination Technologies Ltd.
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/tracehook.h>
+
+#include <asm/ucontext.h>
+#include <asm/cacheflush.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+#define REG_FLAGS	ctx.SaveMask
+#define REG_RETVAL	ctx.DX[0].U0
+#define REG_SYSCALL	ctx.DX[0].U1
+#define REG_SP		ctx.AX[0].U0
+#define REG_ARG1	ctx.DX[3].U1
+#define REG_ARG2	ctx.DX[3].U0
+#define REG_ARG3	ctx.DX[2].U1
+#define REG_PC		ctx.CurrPC
+#define REG_RTP		ctx.DX[4].U1
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned long retcode[2];
+};
+
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc)
+{
+	int err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL,
+				   &sc->regs);
+	if (!err)
+		err = metag_cb_regs_copyin(regs, 0,
+					   sizeof(struct user_cb_regs), NULL,
+					   &sc->cb);
+	if (!err)
+		err = metag_rp_state_copyin(regs, 0,
+					    sizeof(struct user_rp_state), NULL,
+					    &sc->rp);
+
+	/* This is a user-mode context. */
+	regs->REG_FLAGS |= TBICTX_PRIV_BIT;
+
+	return err;
+}
+
+long sys_rt_sigreturn(void)
+{
+	/* NOTE - Meta stack goes UPWARDS - so we wind the stack back */
+	struct pt_regs *regs = current_pt_regs();
+	struct rt_sigframe __user *frame;
+	sigset_t set;
+
+	frame = (__force struct rt_sigframe __user *)(regs->REG_SP -
+						      sizeof(*frame));
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+
+	return regs->REG_RETVAL;
+
+badframe:
+	force_sig(SIGSEGV, current);
+
+	return 0;
+}
+
+static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+			    unsigned long mask)
+{
+	int err;
+
+	err = metag_gp_regs_copyout(regs, 0, sizeof(struct user_gp_regs), NULL,
+				    &sc->regs);
+
+	if (!err)
+		err = metag_cb_regs_copyout(regs, 0,
+					    sizeof(struct user_cb_regs), NULL,
+					    &sc->cb);
+	if (!err)
+		err = metag_rp_state_copyout(regs, 0,
+					     sizeof(struct user_rp_state), NULL,
+					     &sc->rp);
+
+	/* OK, clear that cbuf flag in the old context, or our stored
+	 * catch buffer will be restored when we go to call the signal
+	 * handler. Also clear out the CBRP RA/RD pipe bit incase
+	 * that is pending as well!
+	 * Note that as we have already stored this context, these
+	 * flags will get restored on sigreturn to their original
+	 * state.
+	 */
+	regs->REG_FLAGS &= ~(TBICTX_XCBF_BIT | TBICTX_CBUF_BIT |
+			     TBICTX_CBRP_BIT);
+
+	/* Clear out the LSM_STEP bits in case we are in the middle of
+	 * and MSET/MGET.
+	 */
+	regs->ctx.Flags &= ~TXSTATUS_LSM_STEP_BITS;
+
+	err |= __put_user(mask, &sc->oldmask);
+
+	return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static void __user *get_sigframe(struct k_sigaction *ka, unsigned long sp,
+				 size_t frame_size)
+{
+	/* Meta stacks grows upwards */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+		sp = current->sas_ss_sp;
+
+	sp = (sp + 7) & ~7;			/* 8byte align stack */
+
+	return (void __user *)sp;
+}
+
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			  sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int err = -EFAULT;
+	unsigned long code;
+
+	frame = get_sigframe(ka, regs->REG_SP, sizeof(*frame));
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto out;
+
+	err = copy_siginfo_to_user(&frame->info, info);
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, (unsigned long __user *)&frame->uc.uc_link);
+	err |= __save_altstack(&frame->uc.uc_stack, regs->REG_SP);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext,
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	if (err)
+		goto out;
+
+	/* Set up to return from userspace.  */
+
+	/* MOV D1Re0 (D1.0), #__NR_rt_sigreturn */
+	code = 0x03000004 | (__NR_rt_sigreturn << 3);
+	err |= __put_user(code, (unsigned long __user *)(&frame->retcode[0]));
+
+	/* SWITCH #__METAG_SW_SYS */
+	code = __METAG_SW_ENCODING(SYS);
+	err |= __put_user(code, (unsigned long __user *)(&frame->retcode[1]));
+
+	if (err)
+		goto out;
+
+	/* Set up registers for signal handler */
+	regs->REG_RTP = (unsigned long) frame->retcode;
+	regs->REG_SP = (unsigned long) frame + sizeof(*frame);
+	regs->REG_ARG1 = sig;
+	regs->REG_ARG2 = (unsigned long) &frame->info;
+	regs->REG_ARG3 = (unsigned long) &frame->uc;
+	regs->REG_PC = (unsigned long) ka->sa.sa_handler;
+
+	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08x pr=%08x\n",
+		 current->comm, current->pid, frame, regs->REG_PC,
+		 regs->REG_RTP);
+
+	/* Now pass size of 'new code' into sigtramp so we can do a more
+	 * effective cache flush - directed rather than 'full flush'.
+	 */
+	flush_cache_sigtramp(regs->REG_RTP, sizeof(frame->retcode));
+out:
+	if (err) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static void handle_signal(unsigned long sig, siginfo_t *info,
+			  struct k_sigaction *ka, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+
+	/* Set up the stack frame */
+	if (setup_rt_frame(sig, ka, info, oldset, regs))
+		return;
+
+	signal_delivered(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP));
+}
+
+ /*
+  * Notes for Meta.
+  * We have moved from the old 2.4.9 SH way of using syscall_nr (in the stored
+  * context) to passing in the syscall flag on the stack.
+  * This is because having syscall_nr in our context does not fit with TBX, and
+  * corrupted the stack.
+  */
+static int do_signal(struct pt_regs *regs, int syscall)
+{
+	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
+	struct k_sigaction ka;
+	siginfo_t info;
+	int signr;
+	int restart = 0;
+
+	/*
+	 * By the end of rt_sigreturn the context describes the point that the
+	 * signal was taken (which may happen to be just before a syscall if
+	 * it's already been restarted). This should *never* be mistaken for a
+	 * system call in need of restarting.
+	 */
+	if (syscall == __NR_rt_sigreturn)
+		syscall = -1;
+
+	/* Did we come from a system call? */
+	if (syscall >= 0) {
+		continue_addr = regs->REG_PC;
+		restart_addr = continue_addr - 4;
+		retval = regs->REG_RETVAL;
+
+		/*
+		 * Prepare for system call restart. We do this here so that a
+		 * debugger will see the already changed PC.
+		 */
+		switch (retval) {
+		case -ERESTART_RESTARTBLOCK:
+			restart = -2;
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			++restart;
+			regs->REG_PC = restart_addr;
+			break;
+		}
+	}
+
+	/*
+	 * Get the signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all our registers ...
+	 */
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	/*
+	 * Depending on the signal settings we may need to revert the decision
+	 * to restart the system call. But skip this if a debugger has chosen to
+	 * restart at a different PC.
+	 */
+	if (regs->REG_PC != restart_addr)
+		restart = 0;
+	if (signr > 0) {
+		if (unlikely(restart)) {
+			if (retval == -ERESTARTNOHAND
+			    || retval == -ERESTART_RESTARTBLOCK
+			    || (retval == -ERESTARTSYS
+				&& !(ka.sa.sa_flags & SA_RESTART))) {
+				regs->REG_RETVAL = -EINTR;
+				regs->REG_PC = continue_addr;
+			}
+		}
+
+		/* Whee! Actually deliver the signal.  */
+		handle_signal(signr, &info, &ka, regs);
+		return 0;
+	}
+
+	/* Handlerless -ERESTART_RESTARTBLOCK re-enters via restart_syscall */
+	if (unlikely(restart < 0))
+		regs->REG_SYSCALL = __NR_restart_syscall;
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask back.
+	 */
+	restore_saved_sigmask();
+
+	return restart;
+}
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+		    int syscall)
+{
+	do {
+		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+			schedule();
+		} else {
+			if (unlikely(!user_mode(regs)))
+				return 0;
+			local_irq_enable();
+			if (thread_flags & _TIF_SIGPENDING) {
+				int restart = do_signal(regs, syscall);
+				if (unlikely(restart)) {
+					/*
+					 * Restart without handlers.
+					 * Deal with it without leaving
+					 * the kernel space.
+					 */
+					return restart;
+				}
+				syscall = -1;
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+	return 0;
+}
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
new file mode 100644
index 0000000..4b6d1f14
--- /dev/null
+++ b/arch/metag/kernel/smp.c
@@ -0,0 +1,575 @@
+/*
+ *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachepart.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/hwthread.h>
+#include <asm/traps.h>
+
+DECLARE_PER_CPU(PTBI, pTBI);
+
+void *secondary_data_stack;
+
+/*
+ * structures for inter-processor calls
+ * - A collection of single bit ipi messages.
+ */
+struct ipi_data {
+	spinlock_t lock;
+	unsigned long ipi_count;
+	unsigned long bits;
+};
+
+static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
+	.lock	= __SPIN_LOCK_UNLOCKED(ipi_data.lock),
+};
+
+static DEFINE_SPINLOCK(boot_lock);
+
+/*
+ * "thread" is assumed to be a valid Meta hardware thread ID.
+ */
+int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
+{
+	u32 val;
+
+	/*
+	 * set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	spin_lock(&boot_lock);
+
+	core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
+	core_reg_write(TXUPC_ID, 1, thread, 0);
+
+	/*
+	 * Give the thread privilege (PSTAT) and clear potentially problematic
+	 * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
+	 */
+	core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
+
+	/* Clear the minim enable bit. */
+	val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
+	core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
+
+	/*
+	 * set the ThreadEnable bit (0x1) in the TXENABLE register
+	 * for the specified thread - off it goes!
+	 */
+	val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
+	core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
+
+	/*
+	 * now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	spin_unlock(&boot_lock);
+
+	return 0;
+}
+
+int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	int ret;
+
+	load_pgd(swapper_pg_dir, thread);
+
+	flush_tlb_all();
+
+	/*
+	 * Tell the secondary CPU where to find its idle thread's stack.
+	 */
+	secondary_data_stack = task_stack_page(idle);
+
+	wmb();
+
+	/*
+	 * Now bring the CPU into our world.
+	 */
+	ret = boot_secondary(thread, idle);
+	if (ret == 0) {
+		unsigned long timeout;
+
+		/*
+		 * CPU was successfully started, wait for it
+		 * to come online or time out.
+		 */
+		timeout = jiffies + HZ;
+		while (time_before(jiffies, timeout)) {
+			if (cpu_online(cpu))
+				break;
+
+			udelay(10);
+			barrier();
+		}
+
+		if (!cpu_online(cpu))
+			ret = -EIO;
+	}
+
+	secondary_data_stack = NULL;
+
+	if (ret) {
+		pr_crit("CPU%u: processor failed to boot\n", cpu);
+
+		/*
+		 * FIXME: We need to clean up the new idle thread. --rmk
+		 */
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DECLARE_COMPLETION(cpu_killed);
+
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpuexit __cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct task_struct *p;
+
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	set_cpu_online(cpu, false);
+
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
+
+	/*
+	 * Flush user cache and TLB mappings, and then remove this CPU
+	 * from the vm mask set of all processes.
+	 */
+	flush_cache_all();
+	local_flush_tlb_all();
+
+	read_lock(&tasklist_lock);
+	for_each_process(p) {
+		if (p->mm)
+			cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
+	}
+	read_unlock(&tasklist_lock);
+
+	return 0;
+}
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpuexit __cpu_die(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
+		pr_err("CPU%u: unable to kill\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we do not return from this function. If this cpu is
+ * brought online again it will need to run secondary_startup().
+ */
+void __cpuexit cpu_die(void)
+{
+	local_irq_disable();
+	idle_task_exit();
+
+	complete(&cpu_killed);
+
+	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+	struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
+
+	cpu_info->loops_per_jiffy = loops_per_jiffy;
+}
+
+/*
+ * This is the secondary CPU boot entry.  We're using this CPUs
+ * idle thread stack and the global page tables.
+ */
+asmlinkage void secondary_start_kernel(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu = smp_processor_id();
+
+	/*
+	 * All kernel threads share the same mm context; grab a
+	 * reference and switch to it.
+	 */
+	atomic_inc(&mm->mm_users);
+	atomic_inc(&mm->mm_count);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+	enter_lazy_tlb(mm, current);
+	local_flush_tlb_all();
+
+	/*
+	 * TODO: Some day it might be useful for each Linux CPU to
+	 * have its own TBI structure. That would allow each Linux CPU
+	 * to run different interrupt handlers for the same IRQ
+	 * number.
+	 *
+	 * For now, simply copying the pointer to the boot CPU's TBI
+	 * structure is sufficient because we always want to run the
+	 * same interrupt handler whatever CPU takes the interrupt.
+	 */
+	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+	if (!per_cpu(pTBI, cpu))
+		panic("No TBI found!");
+
+	per_cpu_trap_init(cpu);
+
+	preempt_disable();
+
+	setup_priv();
+
+	/*
+	 * Enable local interrupts.
+	 */
+	tbi_startup_interrupt(TBID_SIGNUM_TRT);
+	notify_cpu_starting(cpu);
+	local_irq_enable();
+
+	pr_info("CPU%u (thread %u): Booted secondary processor\n",
+		cpu, cpu_2_hwthread_id[cpu]);
+
+	calibrate_delay();
+	smp_store_cpu_info(cpu);
+
+	/*
+	 * OK, now it's safe to let the boot CPU continue
+	 */
+	set_cpu_online(cpu, true);
+
+	/*
+	 * Check for cache aliasing.
+	 * Preemption is disabled
+	 */
+	check_for_cache_aliasing(cpu);
+
+	/*
+	 * OK, it's off to the idle thread for us
+	 */
+	cpu_idle();
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	for_each_online_cpu(cpu)
+		bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
+
+	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		num_online_cpus(),
+		bogosum / (500000/HZ),
+		(bogosum / (5000/HZ)) % 100);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int cpu = smp_processor_id();
+
+	init_new_context(current, &init_mm);
+	current_thread_info()->cpu = cpu;
+
+	smp_store_cpu_info(cpu);
+	init_cpu_present(cpu_possible_mask);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+	if (!per_cpu(pTBI, cpu))
+		panic("No TBI found!");
+}
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
+
+static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
+{
+	unsigned long flags;
+	unsigned int cpu;
+	cpumask_t map;
+
+	cpumask_clear(&map);
+	local_irq_save(flags);
+
+	for_each_cpu(cpu, mask) {
+		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+
+		spin_lock(&ipi->lock);
+
+		/*
+		 * KICK interrupts are queued in hardware so we'll get
+		 * multiple interrupts if we call smp_cross_call()
+		 * multiple times for one msg. The problem is that we
+		 * only have one bit for each message - we can't queue
+		 * them in software.
+		 *
+		 * The first time through ipi_handler() we'll clear
+		 * the msg bit, having done all the work. But when we
+		 * return we'll get _another_ interrupt (and another,
+		 * and another until we've handled all the queued
+		 * KICKs). Running ipi_handler() when there's no work
+		 * to do is bad because that's how kick handler
+		 * chaining detects who the KICK was intended for.
+		 * See arch/metag/kernel/kick.c for more details.
+		 *
+		 * So only add 'cpu' to 'map' if we haven't already
+		 * queued a KICK interrupt for 'msg'.
+		 */
+		if (!(ipi->bits & (1 << msg))) {
+			ipi->bits |= 1 << msg;
+			cpumask_set_cpu(cpu, &map);
+		}
+
+		spin_unlock(&ipi->lock);
+	}
+
+	/*
+	 * Call the platform specific cross-CPU call function.
+	 */
+	smp_cross_call(map, msg);
+
+	local_irq_restore(flags);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_ipi_message(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+void show_ipi_list(struct seq_file *p)
+{
+	unsigned int cpu;
+
+	seq_puts(p, "IPI:");
+
+	for_each_present_cpu(cpu)
+		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
+
+	seq_putc(p, '\n');
+}
+
+static DEFINE_SPINLOCK(stop_lock);
+
+/*
+ * Main handler for inter-processor interrupts
+ *
+ * For Meta, the ipimask now only identifies a single
+ * category of IPI (Bit 1 IPIs have been replaced by a
+ * different mechanism):
+ *
+ *  Bit 0 - Inter-processor function call
+ */
+static int do_IPI(struct pt_regs *regs)
+{
+	unsigned int cpu = smp_processor_id();
+	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned long msgs, nextmsg;
+	int handled = 0;
+
+	ipi->ipi_count++;
+
+	spin_lock(&ipi->lock);
+	msgs = ipi->bits;
+	nextmsg = msgs & -msgs;
+	ipi->bits &= ~nextmsg;
+	spin_unlock(&ipi->lock);
+
+	if (nextmsg) {
+		handled = 1;
+
+		nextmsg = ffz(~nextmsg);
+		switch (nextmsg) {
+		case IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
+
+		case IPI_CALL_FUNC:
+			generic_smp_call_function_interrupt();
+			break;
+
+		case IPI_CALL_FUNC_SINGLE:
+			generic_smp_call_function_single_interrupt();
+			break;
+
+		default:
+			pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
+				cpu, nextmsg);
+			break;
+		}
+	}
+
+	set_irq_regs(old_regs);
+
+	return handled;
+}
+
+void smp_send_reschedule(int cpu)
+{
+	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+static void stop_this_cpu(void *data)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (system_state == SYSTEM_BOOTING ||
+	    system_state == SYSTEM_RUNNING) {
+		spin_lock(&stop_lock);
+		pr_crit("CPU%u: stopping\n", cpu);
+		dump_stack();
+		spin_unlock(&stop_lock);
+	}
+
+	set_cpu_online(cpu, false);
+
+	local_irq_disable();
+
+	hard_processor_halt(HALT_OK);
+}
+
+void smp_send_stop(void)
+{
+	smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+/*
+ * We use KICKs for inter-processor interrupts.
+ *
+ * For every CPU in "callmap" the IPI data must already have been
+ * stored in that CPU's "ipi_data" member prior to calling this
+ * function.
+ */
+static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
+{
+	int cpu;
+
+	for_each_cpu(cpu, &callmap) {
+		unsigned int thread;
+
+		thread = cpu_2_hwthread_id[cpu];
+
+		BUG_ON(thread == BAD_HWTHREAD_ID);
+
+		metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
+	}
+}
+
+static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
+		   int Inst, PTBI pTBI, int *handled)
+{
+	*handled = do_IPI((struct pt_regs *)State.Sig.pCtx);
+
+	return State;
+}
+
+static struct kick_irq_handler ipi_irq = {
+	.func = ipi_handler,
+};
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
+{
+	kick_raise_softirq(callmap, 1);
+}
+
+static inline unsigned int get_core_count(void)
+{
+	int i;
+	unsigned int ret = 0;
+
+	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+		if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
+			ret++;
+	}
+
+	return ret;
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+	unsigned int i, ncores = get_core_count();
+
+	/* If no hwthread_map early param was set use default mapping */
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
+			cpu_2_hwthread_id[i] = i;
+			hwthread_id_2_cpu[i] = i;
+		}
+
+	for (i = 0; i < ncores; i++)
+		set_cpu_possible(i, true);
+
+	kick_register_func(&ipi_irq);
+}
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
new file mode 100644
index 0000000..5510361
--- /dev/null
+++ b/arch/metag/kernel/stacktrace.c
@@ -0,0 +1,187 @@
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+
+#if defined(CONFIG_FRAME_POINTER)
+
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+static unsigned long tbi_boing_addr;
+static unsigned long tbi_boing_size;
+
+static void tbi_boing_init(void)
+{
+	/* We need to know where TBIBoingVec is and it's size */
+	unsigned long size;
+	unsigned long offset;
+	char modname[MODULE_NAME_LEN];
+	char name[KSYM_NAME_LEN];
+	tbi_boing_addr = kallsyms_lookup_name("___TBIBoingVec");
+	if (!tbi_boing_addr)
+		tbi_boing_addr = 1;
+	else if (!lookup_symbol_attrs(tbi_boing_addr, &size,
+				      &offset, modname, name))
+		tbi_boing_size = size;
+}
+#endif
+
+#define ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
+
+/*
+ * Unwind the current stack frame and store the new register values in the
+ * structure passed as argument. Unwinding is equivalent to a function return,
+ * hence the new PC value rather than LR should be used for backtrace.
+ */
+int notrace unwind_frame(struct stackframe *frame)
+{
+	struct metag_frame *fp = (struct metag_frame *)frame->fp;
+	unsigned long lr;
+	unsigned long fpnew;
+
+	if (frame->fp & 0x7)
+		return -EINVAL;
+
+	fpnew = fp->fp;
+	lr = fp->lr - 4;
+
+#ifdef CONFIG_KALLSYMS
+	/* If we've reached TBIBoingVec then we're at an interrupt
+	 * entry point or a syscall entry point. The frame pointer
+	 * points to a pt_regs which can be used to continue tracing on
+	 * the other side of the boing.
+	 */
+	if (!tbi_boing_addr)
+		tbi_boing_init();
+	if (tbi_boing_size && lr >= tbi_boing_addr &&
+	    lr < tbi_boing_addr + tbi_boing_size) {
+		struct pt_regs *regs = (struct pt_regs *)fpnew;
+		if (user_mode(regs))
+			return -EINVAL;
+		fpnew = regs->ctx.AX[1].U0;
+		lr = regs->ctx.DX[4].U1;
+	}
+#endif
+
+	/* stack grows up, so frame pointers must decrease */
+	if (fpnew < (ALIGN_DOWN((unsigned long)fp, THREAD_SIZE) +
+		     sizeof(struct thread_info)) || fpnew >= (unsigned long)fp)
+		return -EINVAL;
+
+	/* restore the registers from the stack frame */
+	frame->fp = fpnew;
+	frame->pc = lr;
+
+	return 0;
+}
+#else
+int notrace unwind_frame(struct stackframe *frame)
+{
+	struct metag_frame *sp = (struct metag_frame *)frame->sp;
+
+	if (frame->sp & 0x7)
+		return -EINVAL;
+
+	while (!kstack_end(sp)) {
+		unsigned long addr = sp->lr - 4;
+		sp--;
+
+		if (__kernel_text_address(addr)) {
+			frame->sp = (unsigned long)sp;
+			frame->pc = addr;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+#endif
+
+void notrace walk_stackframe(struct stackframe *frame,
+		     int (*fn)(struct stackframe *, void *), void *data)
+{
+	while (1) {
+		int ret;
+
+		if (fn(frame, data))
+			break;
+		ret = unwind_frame(frame);
+		if (ret < 0)
+			break;
+	}
+}
+EXPORT_SYMBOL(walk_stackframe);
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned int no_sched_functions;
+	unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+	struct stack_trace_data *data = d;
+	struct stack_trace *trace = data->trace;
+	unsigned long addr = frame->pc;
+
+	if (data->no_sched_functions && in_sched_functions(addr))
+		return 0;
+	if (data->skip) {
+		data->skip--;
+		return 0;
+	}
+
+	trace->entries[trace->nr_entries++] = addr;
+
+	return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.skip = trace->skip;
+
+	if (tsk != current) {
+#ifdef CONFIG_SMP
+		/*
+		 * What guarantees do we have here that 'tsk' is not
+		 * running on another CPU?  For now, ignore it as we
+		 * can't guarantee we won't explode.
+		 */
+		if (trace->nr_entries < trace->max_entries)
+			trace->entries[trace->nr_entries++] = ULONG_MAX;
+		return;
+#else
+		data.no_sched_functions = 1;
+		frame.fp = thread_saved_fp(tsk);
+		frame.sp = thread_saved_sp(tsk);
+		frame.lr = 0;		/* recovered from the stack */
+		frame.pc = thread_saved_pc(tsk);
+#endif
+	} else {
+		register unsigned long current_sp asm ("A0StP");
+
+		data.no_sched_functions = 0;
+		frame.fp = (unsigned long)__builtin_frame_address(0);
+		frame.sp = current_sp;
+		frame.lr = (unsigned long)__builtin_return_address(0);
+		frame.pc = (unsigned long)save_stack_trace_tsk;
+	}
+
+	walk_stackframe(&frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/metag/kernel/sys_metag.c b/arch/metag/kernel/sys_metag.c
new file mode 100644
index 0000000..efe833a
--- /dev/null
+++ b/arch/metag/kernel/sys_metag.c
@@ -0,0 +1,180 @@
+/*
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/Meta
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/unistd.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+#include <asm/user_gateway.h>
+
+#define merge_64(hi, lo) ((((unsigned long long)(hi)) << 32) + \
+			  ((lo) & 0xffffffffUL))
+
+int metag_mmap_check(unsigned long addr, unsigned long len,
+		     unsigned long flags)
+{
+	/* We can't have people trying to write to the bottom of the
+	 * memory map, there are mysterious unspecified things there that
+	 * we don't want people trampling on.
+	 */
+	if ((flags & MAP_FIXED) && (addr < TASK_UNMAPPED_BASE))
+		return -EINVAL;
+
+	return 0;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff)
+{
+	/* The shift for mmap2 is constant, regardless of PAGE_SIZE setting. */
+	if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1))
+		return -EINVAL;
+
+	pgoff >>= PAGE_SHIFT - 12;
+
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
+asmlinkage int sys_metag_setglobalbit(char __user *addr, int mask)
+{
+	char tmp;
+	int ret = 0;
+	unsigned int flags;
+
+	if (!((__force unsigned int)addr >= LINCORE_BASE))
+		return -EFAULT;
+
+	__global_lock2(flags);
+
+	metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+	ret = __get_user(tmp, addr);
+	if (ret)
+		goto out;
+	tmp |= mask;
+	ret = __put_user(tmp, addr);
+
+	metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+out:
+	__global_unlock2(flags);
+
+	return ret;
+}
+
+#define TXDEFR_FPU_MASK ((0x1f << 16) | 0x1f)
+
+asmlinkage void sys_metag_set_fpu_flags(unsigned int flags)
+{
+	unsigned int temp;
+
+	flags &= TXDEFR_FPU_MASK;
+
+	temp = __core_reg_get(TXDEFR);
+	temp &= ~TXDEFR_FPU_MASK;
+	temp |= flags;
+	__core_reg_set(TXDEFR, temp);
+}
+
+asmlinkage int sys_metag_set_tls(void __user *ptr)
+{
+	current->thread.tls_ptr = ptr;
+	set_gateway_tls(ptr);
+
+	return 0;
+}
+
+asmlinkage void *sys_metag_get_tls(void)
+{
+	return (__force void *)current->thread.tls_ptr;
+}
+
+asmlinkage long sys_truncate64_metag(const char __user *path, unsigned long lo,
+				     unsigned long hi)
+{
+	return sys_truncate64(path, merge_64(hi, lo));
+}
+
+asmlinkage long sys_ftruncate64_metag(unsigned int fd, unsigned long lo,
+				      unsigned long hi)
+{
+	return sys_ftruncate64(fd, merge_64(hi, lo));
+}
+
+asmlinkage long sys_fadvise64_64_metag(int fd, unsigned long offs_lo,
+				       unsigned long offs_hi,
+				       unsigned long len_lo,
+				       unsigned long len_hi, int advice)
+{
+	return sys_fadvise64_64(fd, merge_64(offs_hi, offs_lo),
+				merge_64(len_hi, len_lo), advice);
+}
+
+asmlinkage long sys_readahead_metag(int fd, unsigned long lo, unsigned long hi,
+				    size_t count)
+{
+	return sys_readahead(fd, merge_64(hi, lo), count);
+}
+
+asmlinkage ssize_t sys_pread64_metag(unsigned long fd, char __user *buf,
+				     size_t count, unsigned long lo,
+				     unsigned long hi)
+{
+	return sys_pread64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long fd, char __user *buf,
+				      size_t count, unsigned long lo,
+				      unsigned long hi)
+{
+	return sys_pwrite64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage long sys_sync_file_range_metag(int fd, unsigned long offs_lo,
+					  unsigned long offs_hi,
+					  unsigned long len_lo,
+					  unsigned long len_hi,
+					  unsigned int flags)
+{
+	return sys_sync_file_range(fd, merge_64(offs_hi, offs_lo),
+				   merge_64(len_hi, len_lo), flags);
+}
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+/*
+ * We need wrappers for anything with unaligned 64bit arguments
+ */
+#define sys_truncate64		sys_truncate64_metag
+#define sys_ftruncate64		sys_ftruncate64_metag
+#define sys_fadvise64_64	sys_fadvise64_64_metag
+#define sys_readahead		sys_readahead_metag
+#define sys_pread64		sys_pread64_metag
+#define sys_pwrite64		sys_pwrite64_metag
+#define sys_sync_file_range	sys_sync_file_range_metag
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+const void *sys_call_table[__NR_syscalls] = {
+	[0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/metag/kernel/tbiunexp.S b/arch/metag/kernel/tbiunexp.S
new file mode 100644
index 0000000..907bbe0
--- /dev/null
+++ b/arch/metag/kernel/tbiunexp.S
@@ -0,0 +1,22 @@
+/* Pass a breakpoint through to Codescape */
+
+#include <asm/tbx.h>
+
+	.text
+        .global	___TBIUnExpXXX
+        .type   ___TBIUnExpXXX,function
+___TBIUnExpXXX:
+	TSTT	D0Ar2,#TBICTX_CRIT_BIT	! Result of nestable int call?
+	BZ	$LTBINormCase		! UnExpXXX at background level
+	MOV	D0Re0,TXMASKI		! Read TXMASKI
+	XOR	TXMASKI,D1Re0,D1Re0	! Turn off BGNDHALT handling!
+	OR	D0Ar2,D0Ar2,D0Re0	! Preserve bits cleared
+$LTBINormCase:
+	MSETL 	[A0StP],D0Ar6,D0Ar4,D0Ar2	! Save args on stack
+	SETL 	[A0StP++],D0Ar2,D1Ar1	! Init area for returned values
+	SWITCH 	#0xC20208		! Total stack frame size 8 Dwords
+					!            write back size 2 Dwords
+	GETL 	D0Re0,D1Re0,[--A0StP]	! Get result
+	SUB 	A0StP,A0StP,#(8*3)	! Recover stack frame
+	MOV 	PC,D1RtP
+        .size   	___TBIUnExpXXX,.-___TBIUnExpXXX
diff --git a/arch/metag/kernel/tcm.c b/arch/metag/kernel/tcm.c
new file mode 100644
index 0000000..5d102b3
--- /dev/null
+++ b/arch/metag/kernel/tcm.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/genalloc.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <asm/tcm.h>
+
+struct tcm_pool {
+	struct list_head list;
+	unsigned int tag;
+	unsigned long start;
+	unsigned long end;
+	struct gen_pool *pool;
+};
+
+static LIST_HEAD(pool_list);
+
+static struct tcm_pool *find_pool(unsigned int tag)
+{
+	struct list_head *lh;
+	struct tcm_pool *pool;
+
+	list_for_each(lh, &pool_list) {
+		pool = list_entry(lh, struct tcm_pool, list);
+		if (pool->tag == tag)
+			return pool;
+	}
+
+	return NULL;
+}
+
+/**
+ * tcm_alloc - allocate memory from a TCM pool
+ * @tag: tag of the pool to allocate memory from
+ * @len: number of bytes to be allocated
+ *
+ * Allocate the requested number of bytes from the pool matching
+ * the specified tag. Returns the address of the allocated memory
+ * or zero on failure.
+ */
+unsigned long tcm_alloc(unsigned int tag, size_t len)
+{
+	unsigned long vaddr;
+	struct tcm_pool *pool;
+
+	pool = find_pool(tag);
+	if (!pool)
+		return 0;
+
+	vaddr = gen_pool_alloc(pool->pool, len);
+	if (!vaddr)
+		return 0;
+
+	return vaddr;
+}
+
+/**
+ * tcm_free - free a block of memory to a TCM pool
+ * @tag: tag of the pool to free memory to
+ * @addr: address of the memory to be freed
+ * @len: number of bytes to be freed
+ *
+ * Free the requested number of bytes at a specific address to the
+ * pool matching the specified tag.
+ */
+void tcm_free(unsigned int tag, unsigned long addr, size_t len)
+{
+	struct tcm_pool *pool;
+
+	pool = find_pool(tag);
+	if (!pool)
+		return;
+	gen_pool_free(pool->pool, addr, len);
+}
+
+/**
+ * tcm_lookup_tag - find the tag matching an address
+ * @p: memory address to lookup the tag for
+ *
+ * Find the tag of the tcm memory region that contains the
+ * specified address. Returns %TCM_INVALID_TAG if no such
+ * memory region could be found.
+ */
+unsigned int tcm_lookup_tag(unsigned long p)
+{
+	struct list_head *lh;
+	struct tcm_pool *pool;
+	unsigned long addr = (unsigned long) p;
+
+	list_for_each(lh, &pool_list) {
+		pool = list_entry(lh, struct tcm_pool, list);
+		if (addr >= pool->start && addr < pool->end)
+			return pool->tag;
+	}
+
+	return TCM_INVALID_TAG;
+}
+
+/**
+ * tcm_add_region - add a memory region to TCM pool list
+ * @reg: descriptor of region to be added
+ *
+ * Add a region of memory to the TCM pool list. Returns 0 on success.
+ */
+int __init tcm_add_region(struct tcm_region *reg)
+{
+	struct tcm_pool *pool;
+
+	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		pr_err("Failed to alloc memory for TCM pool!\n");
+		return -ENOMEM;
+	}
+
+	pool->tag = reg->tag;
+	pool->start = reg->res.start;
+	pool->end = reg->res.end;
+
+	/*
+	 * 2^3 = 8 bytes granularity to allow for 64bit access alignment.
+	 * -1 = NUMA node specifier.
+	 */
+	pool->pool = gen_pool_create(3, -1);
+
+	if (!pool->pool) {
+		pr_err("Failed to create TCM pool!\n");
+		kfree(pool);
+		return -ENOMEM;
+	}
+
+	if (gen_pool_add(pool->pool, reg->res.start,
+			 reg->res.end - reg->res.start + 1, -1)) {
+		pr_err("Failed to add memory to TCM pool!\n");
+		return -ENOMEM;
+	}
+	pr_info("Added %s TCM pool (%08x bytes @ %08x)\n",
+		reg->res.name, reg->res.end - reg->res.start + 1,
+		reg->res.start);
+
+	list_add_tail(&pool->list, &pool_list);
+
+	return 0;
+}
diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c
new file mode 100644
index 0000000..17dc107
--- /dev/null
+++ b/arch/metag/kernel/time.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This file contains the Meta-specific time handling details.
+ *
+ */
+
+#include <linux/init.h>
+
+#include <clocksource/metag_generic.h>
+
+void __init time_init(void)
+{
+	metag_generic_timer_init();
+}
diff --git a/arch/metag/kernel/topology.c b/arch/metag/kernel/topology.c
new file mode 100644
index 0000000..bec3dec
--- /dev/null
+++ b/arch/metag/kernel/topology.c
@@ -0,0 +1,77 @@
+/*
+ *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2010  Imagination Technolohies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/topology.h>
+
+#include <asm/cpu.h>
+
+DEFINE_PER_CPU(struct cpuinfo_metag, cpu_data);
+
+cpumask_t cpu_core_map[NR_CPUS];
+
+static cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+	return *cpu_possible_mask;
+}
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+	return &cpu_core_map[cpu];
+}
+
+int arch_update_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+
+	return 0;
+}
+
+static int __init topology_init(void)
+{
+	int i, ret;
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	for_each_online_node(i)
+		register_one_node(i);
+#endif
+
+	for_each_present_cpu(i) {
+		struct cpuinfo_metag *cpuinfo = &per_cpu(cpu_data, i);
+#ifdef CONFIG_HOTPLUG_CPU
+		cpuinfo->cpu.hotpluggable = 1;
+#endif
+		ret = register_cpu(&cpuinfo->cpu, i);
+		if (unlikely(ret))
+			pr_warn("%s: register_cpu %d failed (%d)\n",
+				__func__, i, ret);
+	}
+
+#if defined(CONFIG_NUMA) && !defined(CONFIG_SMP)
+	/*
+	 * In the UP case, make sure the CPU association is still
+	 * registered under each node. Without this, sysfs fails
+	 * to make the connection between nodes other than node0
+	 * and cpu0.
+	 */
+	for_each_online_node(i)
+		if (i != numa_node_id())
+			register_cpu_under_node(raw_smp_processor_id(), i);
+#endif
+
+	return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
new file mode 100644
index 0000000..8961f24
--- /dev/null
+++ b/arch/metag/kernel/traps.c
@@ -0,0 +1,995 @@
+/*
+ *  Meta exception handling.
+ *
+ *  Copyright (C) 2005,2006,2007,2008,2009,2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/preempt.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/kdebug.h>
+#include <linux/kexec.h>
+#include <linux/unistd.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+#include <asm/bug.h>
+#include <asm/core_reg.h>
+#include <asm/irqflags.h>
+#include <asm/siginfo.h>
+#include <asm/traps.h>
+#include <asm/hwthread.h>
+#include <asm/switch.h>
+#include <asm/user_gateway.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+/* Passing syscall arguments as long long is quicker. */
+typedef unsigned int (*LPSYSCALL) (unsigned long long,
+				   unsigned long long,
+				   unsigned long long);
+
+/*
+ * Users of LNKSET should compare the bus error bits obtained from DEFR
+ * against TXDEFR_LNKSET_SUCCESS only as the failure code will vary between
+ * different cores revisions.
+ */
+#define TXDEFR_LNKSET_SUCCESS 0x02000000
+#define TXDEFR_LNKSET_FAILURE 0x04000000
+
+/*
+ * Our global TBI handle.  Initialised from setup.c/setup_arch.
+ */
+DECLARE_PER_CPU(PTBI, pTBI);
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(unsigned int, trigger_mask);
+#else
+unsigned int global_trigger_mask;
+EXPORT_SYMBOL(global_trigger_mask);
+#endif
+
+unsigned long per_cpu__stack_save[NR_CPUS];
+
+static const char * const trap_names[] = {
+	[TBIXXF_SIGNUM_IIF] = "Illegal instruction fault",
+	[TBIXXF_SIGNUM_PGF] = "Privilege violation",
+	[TBIXXF_SIGNUM_DHF] = "Unaligned data access fault",
+	[TBIXXF_SIGNUM_IGF] = "Code fetch general read failure",
+	[TBIXXF_SIGNUM_DGF] = "Data access general read/write fault",
+	[TBIXXF_SIGNUM_IPF] = "Code fetch page fault",
+	[TBIXXF_SIGNUM_DPF] = "Data access page fault",
+	[TBIXXF_SIGNUM_IHF] = "Instruction breakpoint",
+	[TBIXXF_SIGNUM_DWF] = "Read-only data access fault",
+};
+
+const char *trap_name(int trapno)
+{
+	if (trapno >= 0 && trapno < ARRAY_SIZE(trap_names)
+			&& trap_names[trapno])
+		return trap_names[trapno];
+	return "Unknown fault";
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(const char *str, struct pt_regs *regs, long err,
+	 unsigned long addr)
+{
+	static int die_counter;
+
+	oops_enter();
+
+	spin_lock_irq(&die_lock);
+	console_verbose();
+	bust_spinlocks(1);
+	pr_err("%s: err %04lx (%s) addr %08lx [#%d]\n", str, err & 0xffff,
+	       trap_name(err & 0xffff), addr, ++die_counter);
+
+	print_modules();
+	show_regs(regs);
+
+	pr_err("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+	       task_pid_nr(current), task_stack_page(current) + THREAD_SIZE);
+
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+	spin_unlock_irq(&die_lock);
+	oops_exit();
+	do_exit(SIGSEGV);
+}
+
+#ifdef CONFIG_METAG_DSP
+/*
+ * The ECH encoding specifies the size of a DSPRAM as,
+ *
+ *		"slots" / 4
+ *
+ * A "slot" is the size of two DSPRAM bank entries; an entry from
+ * DSPRAM bank A and an entry from DSPRAM bank B. One DSPRAM bank
+ * entry is 4 bytes.
+ */
+#define SLOT_SZ	8
+static inline unsigned int decode_dspram_size(unsigned int size)
+{
+	unsigned int _sz = size & 0x7f;
+
+	return _sz * SLOT_SZ * 4;
+}
+
+static void dspram_save(struct meta_ext_context *dsp_ctx,
+			unsigned int ramA_sz, unsigned int ramB_sz)
+{
+	unsigned int ram_sz[2];
+	int i;
+
+	ram_sz[0] = ramA_sz;
+	ram_sz[1] = ramB_sz;
+
+	for (i = 0; i < 2; i++) {
+		if (ram_sz[i] != 0) {
+			unsigned int sz;
+
+			if (i == 0)
+				sz = decode_dspram_size(ram_sz[i] >> 8);
+			else
+				sz = decode_dspram_size(ram_sz[i]);
+
+			if (dsp_ctx->ram[i] == NULL) {
+				dsp_ctx->ram[i] = kmalloc(sz, GFP_KERNEL);
+
+				if (dsp_ctx->ram[i] == NULL)
+					panic("couldn't save DSP context");
+			} else {
+				if (ram_sz[i] > dsp_ctx->ram_sz[i]) {
+					kfree(dsp_ctx->ram[i]);
+
+					dsp_ctx->ram[i] = kmalloc(sz,
+								  GFP_KERNEL);
+
+					if (dsp_ctx->ram[i] == NULL)
+						panic("couldn't save DSP context");
+				}
+			}
+
+			if (i == 0)
+				__TBIDspramSaveA(ram_sz[i], dsp_ctx->ram[i]);
+			else
+				__TBIDspramSaveB(ram_sz[i], dsp_ctx->ram[i]);
+
+			dsp_ctx->ram_sz[i] = ram_sz[i];
+		}
+	}
+}
+#endif /* CONFIG_METAG_DSP */
+
+/*
+ * Allow interrupts to be nested and save any "extended" register
+ * context state, e.g. DSP regs and RAMs.
+ */
+static void nest_interrupts(TBIRES State, unsigned long mask)
+{
+#ifdef CONFIG_METAG_DSP
+	struct meta_ext_context *dsp_ctx;
+	unsigned int D0_8;
+
+	/*
+	 * D0.8 may contain an ECH encoding. The upper 16 bits
+	 * tell us what DSP resources the current process is
+	 * using. OR the bits into the SaveMask so that
+	 * __TBINestInts() knows what resources to save as
+	 * part of this context.
+	 *
+	 * Don't save the context if we're nesting interrupts in the
+	 * kernel because the kernel doesn't use DSP hardware.
+	 */
+	D0_8 = __core_reg_get(D0.8);
+
+	if (D0_8 && (State.Sig.SaveMask & TBICTX_PRIV_BIT)) {
+		State.Sig.SaveMask |= (D0_8 >> 16);
+
+		dsp_ctx = current->thread.dsp_context;
+		if (dsp_ctx == NULL) {
+			dsp_ctx = kzalloc(sizeof(*dsp_ctx), GFP_KERNEL);
+			if (dsp_ctx == NULL)
+				panic("couldn't save DSP context: ENOMEM");
+
+			current->thread.dsp_context = dsp_ctx;
+		}
+
+		current->thread.user_flags |= (D0_8 & 0xffff0000);
+		__TBINestInts(State, &dsp_ctx->regs, mask);
+		dspram_save(dsp_ctx, D0_8 & 0x7f00, D0_8 & 0x007f);
+	} else
+		__TBINestInts(State, NULL, mask);
+#else
+	__TBINestInts(State, NULL, mask);
+#endif
+}
+
+void head_end(TBIRES State, unsigned long mask)
+{
+	unsigned int savemask = (unsigned short)State.Sig.SaveMask;
+	unsigned int ctx_savemask = (unsigned short)State.Sig.pCtx->SaveMask;
+
+	if (savemask & TBICTX_PRIV_BIT) {
+		ctx_savemask |= TBICTX_PRIV_BIT;
+		current->thread.user_flags = savemask;
+	}
+
+	/* Always undo the sleep bit */
+	ctx_savemask &= ~TBICTX_WAIT_BIT;
+
+	/* Always save the catch buffer and RD pipe if they are dirty */
+	savemask |= TBICTX_XCBF_BIT;
+
+	/* Only save the catch and RD if we have not already done so.
+	 * Note - the RD bits are in the pCtx only, and not in the
+	 * State.SaveMask.
+	 */
+	if ((savemask & TBICTX_CBUF_BIT) ||
+	    (ctx_savemask & TBICTX_CBRP_BIT)) {
+		/* Have we already saved the buffers though?
+		 * - See TestTrack 5071 */
+		if (ctx_savemask & TBICTX_XCBF_BIT) {
+			/* Strip off the bits so the call to __TBINestInts
+			 * won't save the buffers again. */
+			savemask &= ~TBICTX_CBUF_BIT;
+			ctx_savemask &= ~TBICTX_CBRP_BIT;
+		}
+	}
+
+#ifdef CONFIG_METAG_META21
+	{
+		unsigned int depth, txdefr;
+
+		/*
+		 * Save TXDEFR state.
+		 *
+		 * The process may have been interrupted after a LNKSET, but
+		 * before it could read the DEFR state, so we mustn't lose that
+		 * state or it could end up retrying an atomic operation that
+		 * succeeded.
+		 *
+		 * All interrupts are disabled at this point so we
+		 * don't need to perform any locking. We must do this
+		 * dance before we use LNKGET or LNKSET.
+		 */
+		BUG_ON(current->thread.int_depth > HARDIRQ_BITS);
+
+		depth = current->thread.int_depth++;
+
+		txdefr = __core_reg_get(TXDEFR);
+
+		txdefr &= TXDEFR_BUS_STATE_BITS;
+		if (txdefr & TXDEFR_LNKSET_SUCCESS)
+			current->thread.txdefr_failure &= ~(1 << depth);
+		else
+			current->thread.txdefr_failure |= (1 << depth);
+	}
+#endif
+
+	State.Sig.SaveMask = savemask;
+	State.Sig.pCtx->SaveMask = ctx_savemask;
+
+	nest_interrupts(State, mask);
+
+#ifdef CONFIG_METAG_POISON_CATCH_BUFFERS
+	/* Poison the catch registers.  This shows up any mistakes we have
+	 * made in their handling MUCH quicker.
+	 */
+	__core_reg_set(TXCATCH0, 0x87650021);
+	__core_reg_set(TXCATCH1, 0x87654322);
+	__core_reg_set(TXCATCH2, 0x87654323);
+	__core_reg_set(TXCATCH3, 0x87654324);
+#endif /* CONFIG_METAG_POISON_CATCH_BUFFERS */
+}
+
+TBIRES tail_end_sys(TBIRES State, int syscall, int *restart)
+{
+	struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+	unsigned long flags;
+
+	local_irq_disable();
+
+	if (user_mode(regs)) {
+		flags = current_thread_info()->flags;
+		if (flags & _TIF_WORK_MASK &&
+		    do_work_pending(regs, flags, syscall)) {
+			*restart = 1;
+			return State;
+		}
+
+#ifdef CONFIG_METAG_FPU
+		if (current->thread.fpu_context &&
+		    current->thread.fpu_context->needs_restore) {
+			__TBICtxFPURestore(State, current->thread.fpu_context);
+			/*
+			 * Clearing this bit ensures the FP unit is not made
+			 * active again unless it is used.
+			 */
+			State.Sig.SaveMask &= ~TBICTX_FPAC_BIT;
+			current->thread.fpu_context->needs_restore = false;
+		}
+		State.Sig.TrigMask |= TBI_TRIG_BIT(TBID_SIGNUM_DFR);
+#endif
+	}
+
+	/* TBI will turn interrupts back on at some point. */
+	if (!irqs_disabled_flags((unsigned long)State.Sig.TrigMask))
+		trace_hardirqs_on();
+
+#ifdef CONFIG_METAG_DSP
+	/*
+	 * If we previously saved an extended context then restore it
+	 * now. Otherwise, clear D0.8 because this process is not
+	 * using DSP hardware.
+	 */
+	if (State.Sig.pCtx->SaveMask & TBICTX_XEXT_BIT) {
+		unsigned int D0_8;
+		struct meta_ext_context *dsp_ctx = current->thread.dsp_context;
+
+		/* Make sure we're going to return to userland. */
+		BUG_ON(current->thread.int_depth != 1);
+
+		if (dsp_ctx->ram_sz[0] > 0)
+			__TBIDspramRestoreA(dsp_ctx->ram_sz[0],
+					    dsp_ctx->ram[0]);
+		if (dsp_ctx->ram_sz[1] > 0)
+			__TBIDspramRestoreB(dsp_ctx->ram_sz[1],
+					    dsp_ctx->ram[1]);
+
+		State.Sig.SaveMask |= State.Sig.pCtx->SaveMask;
+		__TBICtxRestore(State, current->thread.dsp_context);
+		D0_8 = __core_reg_get(D0.8);
+		D0_8 |= current->thread.user_flags & 0xffff0000;
+		D0_8 |= (dsp_ctx->ram_sz[1] | dsp_ctx->ram_sz[0]) & 0xffff;
+		__core_reg_set(D0.8, D0_8);
+	} else
+		__core_reg_set(D0.8, 0);
+#endif /* CONFIG_METAG_DSP */
+
+#ifdef CONFIG_METAG_META21
+	{
+		unsigned int depth, txdefr;
+
+		/*
+		 * If there hasn't been a LNKSET since the last LNKGET then the
+		 * link flag will be set, causing the next LNKSET to succeed if
+		 * the addresses match. The two LNK operations may not be a pair
+		 * (e.g. see atomic_read()), so the LNKSET should fail.
+		 * We use a conditional-never LNKSET to clear the link flag
+		 * without side effects.
+		 */
+		asm volatile("LNKSETDNV [D0Re0],D0Re0");
+
+		depth = --current->thread.int_depth;
+
+		BUG_ON(user_mode(regs) && depth);
+
+		txdefr = __core_reg_get(TXDEFR);
+
+		txdefr &= ~TXDEFR_BUS_STATE_BITS;
+
+		/* Do we need to restore a failure code into TXDEFR? */
+		if (current->thread.txdefr_failure & (1 << depth))
+			txdefr |= (TXDEFR_LNKSET_FAILURE | TXDEFR_BUS_TRIG_BIT);
+		else
+			txdefr |= (TXDEFR_LNKSET_SUCCESS | TXDEFR_BUS_TRIG_BIT);
+
+		__core_reg_set(TXDEFR, txdefr);
+	}
+#endif
+	return State;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * If we took an interrupt in the middle of __kuser_get_tls then we need
+ * to rewind the PC to the start of the function in case the process
+ * gets migrated to another thread (SMP only) and it reads the wrong tls
+ * data.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+	unsigned long get_tls_start;
+	unsigned long get_tls_end;
+
+	get_tls_start = (unsigned long)__kuser_get_tls -
+		(unsigned long)&__user_gateway_start;
+
+	get_tls_start += USER_GATEWAY_PAGE;
+
+	get_tls_end = (unsigned long)__kuser_get_tls_end -
+		(unsigned long)&__user_gateway_start;
+
+	get_tls_end += USER_GATEWAY_PAGE;
+
+	if ((State.Sig.pCtx->CurrPC >= get_tls_start) &&
+	    (State.Sig.pCtx->CurrPC < get_tls_end))
+		State.Sig.pCtx->CurrPC = get_tls_start;
+}
+#else
+/*
+ * If we took an interrupt in the middle of
+ * __kuser_cmpxchg then we need to rewind the PC to the
+ * start of the function.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+	unsigned long cmpxchg_start;
+	unsigned long cmpxchg_end;
+
+	cmpxchg_start = (unsigned long)__kuser_cmpxchg -
+		(unsigned long)&__user_gateway_start;
+
+	cmpxchg_start += USER_GATEWAY_PAGE;
+
+	cmpxchg_end = (unsigned long)__kuser_cmpxchg_end -
+		(unsigned long)&__user_gateway_start;
+
+	cmpxchg_end += USER_GATEWAY_PAGE;
+
+	if ((State.Sig.pCtx->CurrPC >= cmpxchg_start) &&
+	    (State.Sig.pCtx->CurrPC < cmpxchg_end))
+		State.Sig.pCtx->CurrPC = cmpxchg_start;
+}
+#endif
+
+/* Used by kick_handler() */
+void restart_critical_section(TBIRES State)
+{
+	_restart_critical_section(State);
+}
+
+TBIRES trigger_handler(TBIRES State, int SigNum, int Triggers, int Inst,
+		       PTBI pTBI)
+{
+	head_end(State, ~INTS_OFF_MASK);
+
+	/* If we interrupted user code handle any critical sections. */
+	if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+		_restart_critical_section(State);
+
+	trace_hardirqs_off();
+
+	do_IRQ(SigNum, (struct pt_regs *)State.Sig.pCtx);
+
+	return tail_end(State);
+}
+
+static unsigned int load_fault(PTBICTXEXTCB0 pbuf)
+{
+	return pbuf->CBFlags & TXCATCH0_READ_BIT;
+}
+
+static unsigned long fault_address(PTBICTXEXTCB0 pbuf)
+{
+	return pbuf->CBAddr;
+}
+
+static void unhandled_fault(struct pt_regs *regs, unsigned long addr,
+			    int signo, int code, int trapno)
+{
+	if (user_mode(regs)) {
+		siginfo_t info;
+
+		if (show_unhandled_signals && unhandled_signal(current, signo)
+		    && printk_ratelimit()) {
+
+			pr_info("pid %d unhandled fault: pc 0x%08x, addr 0x%08lx, trap %d (%s)\n",
+				current->pid, regs->ctx.CurrPC, addr,
+				trapno, trap_name(trapno));
+			print_vma_addr(" in ", regs->ctx.CurrPC);
+			print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+			printk("\n");
+			show_regs(regs);
+		}
+
+		info.si_signo = signo;
+		info.si_errno = 0;
+		info.si_code = code;
+		info.si_addr = (__force void __user *)addr;
+		info.si_trapno = trapno;
+		force_sig_info(signo, &info, current);
+	} else {
+		die("Oops", regs, trapno, addr);
+	}
+}
+
+static int handle_data_fault(PTBICTXEXTCB0 pcbuf, struct pt_regs *regs,
+			     unsigned int data_address, int trapno)
+{
+	int ret;
+
+	ret = do_page_fault(regs, data_address, !load_fault(pcbuf), trapno);
+
+	return ret;
+}
+
+static unsigned long get_inst_fault_address(struct pt_regs *regs)
+{
+	return regs->ctx.CurrPC;
+}
+
+TBIRES fault_handler(TBIRES State, int SigNum, int Triggers,
+		     int Inst, PTBI pTBI)
+{
+	struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+	PTBICTXEXTCB0 pcbuf = (PTBICTXEXTCB0)&regs->extcb0;
+	unsigned long data_address;
+
+	head_end(State, ~INTS_OFF_MASK);
+
+	/* Hardware breakpoint or data watch */
+	if ((SigNum == TBIXXF_SIGNUM_IHF) ||
+	    ((SigNum == TBIXXF_SIGNUM_DHF) &&
+	     (pcbuf[0].CBFlags & (TXCATCH0_WATCH1_BIT |
+				  TXCATCH0_WATCH0_BIT)))) {
+		State = __TBIUnExpXXX(State, SigNum, Triggers, Inst,
+				      pTBI);
+		return tail_end(State);
+	}
+
+	local_irq_enable();
+
+	data_address = fault_address(pcbuf);
+
+	switch (SigNum) {
+	case TBIXXF_SIGNUM_IGF:
+		/* 1st-level entry invalid (instruction fetch) */
+	case TBIXXF_SIGNUM_IPF: {
+		/* 2nd-level entry invalid (instruction fetch) */
+		unsigned long addr = get_inst_fault_address(regs);
+		do_page_fault(regs, addr, 0, SigNum);
+		break;
+	}
+
+	case TBIXXF_SIGNUM_DGF:
+		/* 1st-level entry invalid (data access) */
+	case TBIXXF_SIGNUM_DPF:
+		/* 2nd-level entry invalid (data access) */
+	case TBIXXF_SIGNUM_DWF:
+		/* Write to read only page */
+		handle_data_fault(pcbuf, regs, data_address, SigNum);
+		break;
+
+	case TBIXXF_SIGNUM_IIF:
+		/* Illegal instruction */
+		unhandled_fault(regs, regs->ctx.CurrPC, SIGILL, ILL_ILLOPC,
+				SigNum);
+		break;
+
+	case TBIXXF_SIGNUM_DHF:
+		/* Unaligned access */
+		unhandled_fault(regs, data_address, SIGBUS, BUS_ADRALN,
+				SigNum);
+		break;
+	case TBIXXF_SIGNUM_PGF:
+		/* Privilege violation */
+		unhandled_fault(regs, data_address, SIGSEGV, SEGV_ACCERR,
+				SigNum);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return tail_end(State);
+}
+
+static bool switch_is_syscall(unsigned int inst)
+{
+	return inst == __METAG_SW_ENCODING(SYS);
+}
+
+static bool switch_is_legacy_syscall(unsigned int inst)
+{
+	return inst == __METAG_SW_ENCODING(SYS_LEGACY);
+}
+
+static inline void step_over_switch(struct pt_regs *regs, unsigned int inst)
+{
+	regs->ctx.CurrPC += 4;
+}
+
+static inline int test_syscall_work(void)
+{
+	return current_thread_info()->flags & _TIF_WORK_SYSCALL_MASK;
+}
+
+TBIRES switch1_handler(TBIRES State, int SigNum, int Triggers,
+		       int Inst, PTBI pTBI)
+{
+	struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+	unsigned int sysnumber;
+	unsigned long long a1_a2, a3_a4, a5_a6;
+	LPSYSCALL syscall_entry;
+	int restart;
+
+	head_end(State, ~INTS_OFF_MASK);
+
+	/*
+	 * If this is not a syscall SWITCH it could be a breakpoint.
+	 */
+	if (!switch_is_syscall(Inst)) {
+		/*
+		 * Alert the user if they're trying to use legacy system
+		 * calls. This suggests they need to update their C
+		 * library and build against up to date kernel headers.
+		 */
+		if (switch_is_legacy_syscall(Inst))
+			pr_warn_once("WARNING: A legacy syscall was made. Your userland needs updating.\n");
+		/*
+		 * We don't know how to handle the SWITCH and cannot
+		 * safely ignore it, so treat all unknown switches
+		 * (including breakpoints) as traps.
+		 */
+		force_sig(SIGTRAP, current);
+		return tail_end(State);
+	}
+
+	local_irq_enable();
+
+restart_syscall:
+	restart = 0;
+	sysnumber = regs->ctx.DX[0].U1;
+
+	if (test_syscall_work())
+		sysnumber = syscall_trace_enter(regs);
+
+	/* Skip over the SWITCH instruction - or you just get 'stuck' on it! */
+	step_over_switch(regs, Inst);
+
+	if (sysnumber >= __NR_syscalls) {
+		pr_debug("unknown syscall number: %d\n", sysnumber);
+		syscall_entry = (LPSYSCALL) sys_ni_syscall;
+	} else {
+		syscall_entry = (LPSYSCALL) sys_call_table[sysnumber];
+	}
+
+	/* Use 64bit loads for speed. */
+	a5_a6 = *(unsigned long long *)&regs->ctx.DX[1];
+	a3_a4 = *(unsigned long long *)&regs->ctx.DX[2];
+	a1_a2 = *(unsigned long long *)&regs->ctx.DX[3];
+
+	/* here is the actual call to the syscall handler functions */
+	regs->ctx.DX[0].U0 = syscall_entry(a1_a2, a3_a4, a5_a6);
+
+	if (test_syscall_work())
+		syscall_trace_leave(regs);
+
+	State = tail_end_sys(State, sysnumber, &restart);
+	/* Handlerless restarts shouldn't go via userland */
+	if (restart)
+		goto restart_syscall;
+	return State;
+}
+
+TBIRES switchx_handler(TBIRES State, int SigNum, int Triggers,
+		       int Inst, PTBI pTBI)
+{
+	struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+
+	/*
+	 * This can be caused by any user process simply executing an unusual
+	 * SWITCH instruction. If there's no DA, __TBIUnExpXXX will cause the
+	 * thread to stop, so signal a SIGTRAP instead.
+	 */
+	head_end(State, ~INTS_OFF_MASK);
+	if (user_mode(regs))
+		force_sig(SIGTRAP, current);
+	else
+		State = __TBIUnExpXXX(State, SigNum, Triggers, Inst, pTBI);
+	return tail_end(State);
+}
+
+#ifdef CONFIG_METAG_META21
+TBIRES fpe_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+	struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+	unsigned int error_state = Triggers;
+	siginfo_t info;
+
+	head_end(State, ~INTS_OFF_MASK);
+
+	local_irq_enable();
+
+	info.si_signo = SIGFPE;
+
+	if (error_state & TXSTAT_FPE_INVALID_BIT)
+		info.si_code = FPE_FLTINV;
+	else if (error_state & TXSTAT_FPE_DIVBYZERO_BIT)
+		info.si_code = FPE_FLTDIV;
+	else if (error_state & TXSTAT_FPE_OVERFLOW_BIT)
+		info.si_code = FPE_FLTOVF;
+	else if (error_state & TXSTAT_FPE_UNDERFLOW_BIT)
+		info.si_code = FPE_FLTUND;
+	else if (error_state & TXSTAT_FPE_INEXACT_BIT)
+		info.si_code = FPE_FLTRES;
+	else
+		info.si_code = 0;
+	info.si_errno = 0;
+	info.si_addr = (__force void __user *)regs->ctx.CurrPC;
+	force_sig_info(SIGFPE, &info, current);
+
+	return tail_end(State);
+}
+#endif
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+struct traps_context {
+	PTBIAPIFN fnSigs[TBID_SIGNUM_MAX + 1];
+};
+
+static struct traps_context *metag_traps_context;
+
+int traps_save_context(void)
+{
+	unsigned long cpu = smp_processor_id();
+	PTBI _pTBI = per_cpu(pTBI, cpu);
+	struct traps_context *context;
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return -ENOMEM;
+
+	memcpy(context->fnSigs, (void *)_pTBI->fnSigs, sizeof(context->fnSigs));
+
+	metag_traps_context = context;
+	return 0;
+}
+
+int traps_restore_context(void)
+{
+	unsigned long cpu = smp_processor_id();
+	PTBI _pTBI = per_cpu(pTBI, cpu);
+	struct traps_context *context = metag_traps_context;
+
+	metag_traps_context = NULL;
+
+	memcpy((void *)_pTBI->fnSigs, context->fnSigs, sizeof(context->fnSigs));
+
+	kfree(context);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+static inline unsigned int _get_trigger_mask(void)
+{
+	unsigned long cpu = smp_processor_id();
+	return per_cpu(trigger_mask, cpu);
+}
+
+unsigned int get_trigger_mask(void)
+{
+	return _get_trigger_mask();
+}
+EXPORT_SYMBOL(get_trigger_mask);
+
+static void set_trigger_mask(unsigned int mask)
+{
+	unsigned long cpu = smp_processor_id();
+	per_cpu(trigger_mask, cpu) = mask;
+}
+
+void arch_local_irq_enable(void)
+{
+	preempt_disable();
+	arch_local_irq_restore(_get_trigger_mask());
+	preempt_enable_no_resched();
+}
+EXPORT_SYMBOL(arch_local_irq_enable);
+#else
+static void set_trigger_mask(unsigned int mask)
+{
+	global_trigger_mask = mask;
+}
+#endif
+
+void __cpuinit per_cpu_trap_init(unsigned long cpu)
+{
+	TBIRES int_context;
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+
+	set_trigger_mask(TBI_INTS_INIT(thread) | /* interrupts */
+			 TBI_TRIG_BIT(TBID_SIGNUM_LWK) | /* low level kick */
+			 TBI_TRIG_BIT(TBID_SIGNUM_SW1) |
+			 TBI_TRIG_BIT(TBID_SIGNUM_SWS));
+
+	/* non-priv - use current stack */
+	int_context.Sig.pCtx = NULL;
+	/* Start with interrupts off */
+	int_context.Sig.TrigMask = INTS_OFF_MASK;
+	int_context.Sig.SaveMask = 0;
+
+	/* And call __TBIASyncTrigger() */
+	__TBIASyncTrigger(int_context);
+}
+
+void __init trap_init(void)
+{
+	unsigned long cpu = smp_processor_id();
+	PTBI _pTBI = per_cpu(pTBI, cpu);
+
+	_pTBI->fnSigs[TBID_SIGNUM_XXF] = fault_handler;
+	_pTBI->fnSigs[TBID_SIGNUM_SW0] = switchx_handler;
+	_pTBI->fnSigs[TBID_SIGNUM_SW1] = switch1_handler;
+	_pTBI->fnSigs[TBID_SIGNUM_SW2] = switchx_handler;
+	_pTBI->fnSigs[TBID_SIGNUM_SW3] = switchx_handler;
+	_pTBI->fnSigs[TBID_SIGNUM_SWK] = kick_handler;
+
+#ifdef CONFIG_METAG_META21
+	_pTBI->fnSigs[TBID_SIGNUM_DFR] = __TBIHandleDFR;
+	_pTBI->fnSigs[TBID_SIGNUM_FPE] = fpe_handler;
+#endif
+
+	per_cpu_trap_init(cpu);
+}
+
+void tbi_startup_interrupt(int irq)
+{
+	unsigned long cpu = smp_processor_id();
+	PTBI _pTBI = per_cpu(pTBI, cpu);
+
+	BUG_ON(irq > TBID_SIGNUM_MAX);
+
+	/* For TR1 and TR2, the thread id is encoded in the irq number */
+	if (irq >= TBID_SIGNUM_T10 && irq < TBID_SIGNUM_TR3)
+		cpu = hwthread_id_2_cpu[(irq - TBID_SIGNUM_T10) % 4];
+
+	set_trigger_mask(get_trigger_mask() | TBI_TRIG_BIT(irq));
+
+	_pTBI->fnSigs[irq] = trigger_handler;
+}
+
+void tbi_shutdown_interrupt(int irq)
+{
+	unsigned long cpu = smp_processor_id();
+	PTBI _pTBI = per_cpu(pTBI, cpu);
+
+	BUG_ON(irq > TBID_SIGNUM_MAX);
+
+	set_trigger_mask(get_trigger_mask() & ~TBI_TRIG_BIT(irq));
+
+	_pTBI->fnSigs[irq] = __TBIUnExpXXX;
+}
+
+int ret_from_fork(TBIRES arg)
+{
+	struct task_struct *prev = arg.Switch.pPara;
+	struct task_struct *tsk = current;
+	struct pt_regs *regs = task_pt_regs(tsk);
+	int (*fn)(void *);
+	TBIRES Next;
+
+	schedule_tail(prev);
+
+	if (tsk->flags & PF_KTHREAD) {
+		fn = (void *)regs->ctx.DX[4].U1;
+		BUG_ON(!fn);
+
+		fn((void *)regs->ctx.DX[3].U1);
+	}
+
+	if (test_syscall_work())
+		syscall_trace_leave(regs);
+
+	preempt_disable();
+
+	Next.Sig.TrigMask = get_trigger_mask();
+	Next.Sig.SaveMask = 0;
+	Next.Sig.pCtx = &regs->ctx;
+
+	set_gateway_tls(current->thread.tls_ptr);
+
+	preempt_enable_no_resched();
+
+	/* And interrupts should come back on when we resume the real usermode
+	 * code. Call __TBIASyncResume()
+	 */
+	__TBIASyncResume(tail_end(Next));
+	/* ASyncResume should NEVER return */
+	BUG();
+	return 0;
+}
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+		struct pt_regs *regs)
+{
+	unsigned long addr;
+#ifdef CONFIG_FRAME_POINTER
+	unsigned long fp, fpnew;
+	unsigned long stack;
+#endif
+
+	if (regs && user_mode(regs))
+		return;
+
+	printk("\nCall trace: ");
+#ifdef CONFIG_KALLSYMS
+	printk("\n");
+#endif
+
+	if (!tsk)
+		tsk = current;
+
+#ifdef CONFIG_FRAME_POINTER
+	if (regs) {
+		print_ip_sym(regs->ctx.CurrPC);
+		fp = regs->ctx.AX[1].U0;
+	} else {
+		fp = __core_reg_get(A0FrP);
+	}
+
+	/* detect when the frame pointer has been used for other purposes and
+	 * doesn't point to the stack (it may point completely elsewhere which
+	 * kstack_end may not detect).
+	 */
+	stack = (unsigned long)task_stack_page(tsk);
+	while (fp >= stack && fp + 8 <= stack + THREAD_SIZE) {
+		addr = __raw_readl((unsigned long *)(fp + 4)) - 4;
+		if (kernel_text_address(addr))
+			print_ip_sym(addr);
+		else
+			break;
+		/* stack grows up, so frame pointers must decrease */
+		fpnew = __raw_readl((unsigned long *)(fp + 0));
+		if (fpnew >= fp)
+			break;
+		fp = fpnew;
+	}
+#else
+	while (!kstack_end(sp)) {
+		addr = (*sp--) - 4;
+		if (kernel_text_address(addr))
+			print_ip_sym(addr);
+	}
+#endif
+
+	printk("\n");
+
+	debug_show_held_locks(tsk);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	if (!tsk)
+		tsk = current;
+	if (tsk == current)
+		sp = (unsigned long *)current_stack_pointer;
+	else
+		sp = (unsigned long *)tsk->thread.kernel_context->AX[0].U0;
+
+	show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+	show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
diff --git a/arch/metag/kernel/user_gateway.S b/arch/metag/kernel/user_gateway.S
new file mode 100644
index 0000000..7167f3e
--- /dev/null
+++ b/arch/metag/kernel/user_gateway.S
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ *
+ * This file contains code that can be accessed from userspace and can
+ * access certain kernel data structures without the overhead of a system
+ * call.
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/user_gateway.h>
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory.  This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in some Meta CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries.  The kernel reserves the right to change this code as needed
+ * without warning. Only the entry points and their results are guaranteed
+ * to be stable.
+ *
+ * Each segment is 64-byte aligned.  This mechanism should be used only for
+ * for things that are really small and justified, and not be abused freely.
+ */
+	.text
+	.global	___user_gateway_start
+___user_gateway_start:
+
+	/* get_tls
+	 * Offset:	 0
+	 * Description:	 Get the TLS pointer for this process.
+	 */
+	.global	___kuser_get_tls
+	.type	___kuser_get_tls,function
+___kuser_get_tls:
+	MOVT	D1Ar1,#HI(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+	ADD	D1Ar1,D1Ar1,#LO(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+	MOV	D1Ar3,TXENABLE
+	AND	D1Ar3,D1Ar3,#(TXENABLE_THREAD_BITS)
+	LSR	D1Ar3,D1Ar3,#(TXENABLE_THREAD_S - 2)
+	GETD	D0Re0,[D1Ar1+D1Ar3]
+___kuser_get_tls_end:		/* Beyond this point the read will complete */
+	MOV	PC,D1RtP
+	.size	___kuser_get_tls,.-___kuser_get_tls
+	.global	___kuser_get_tls_end
+
+	/* cmpxchg
+	 * Offset:	 64
+	 * Description:  Replace the value at 'ptr' with 'newval' if the current
+	 *		 value is 'oldval'. Return zero if we succeeded,
+	 *		 non-zero otherwise.
+	 *
+	 * Reference prototype:
+	 *
+	 *	int __kuser_cmpxchg(int oldval, int newval, unsigned long *ptr)
+	 *
+	 */
+	.balign 64
+	.global ___kuser_cmpxchg
+	.type   ___kuser_cmpxchg,function
+___kuser_cmpxchg:
+#ifdef CONFIG_SMP
+	/*
+	 * We must use LNKGET/LNKSET with an SMP kernel because the other method
+	 * does not provide atomicity across multiple CPUs.
+	 */
+0:	LNKGETD	D0Re0,[D1Ar3]
+	CMP	D0Re0,D1Ar1
+	LNKSETDZ [D1Ar3],D0Ar2
+	BNZ	1f
+	DEFR	D0Re0,TXSTAT
+	ANDT	D0Re0,D0Re0,#HI(0x3f000000)
+	CMPT	D0Re0,#HI(0x02000000)
+	BNE	0b
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+	DCACHE  [D1Ar3], D0Re0
+#endif
+1:	MOV	D0Re0,#1
+	XORZ	D0Re0,D0Re0,D0Re0
+	MOV	PC,D1RtP
+#else
+	GETD	D0Re0,[D1Ar3]
+	CMP	D0Re0,D1Ar1
+	SETDZ	[D1Ar3],D0Ar2
+___kuser_cmpxchg_end:		/* Beyond this point the write will complete */
+	MOV	D0Re0,#1
+	XORZ	D0Re0,D0Re0,D0Re0
+	MOV	PC,D1RtP
+#endif /* CONFIG_SMP */
+	.size	___kuser_cmpxchg,.-___kuser_cmpxchg
+	.global	___kuser_cmpxchg_end
+
+	.global	___user_gateway_end
+___user_gateway_end:
diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
new file mode 100644
index 0000000..e12055e
--- /dev/null
+++ b/arch/metag/kernel/vmlinux.lds.S
@@ -0,0 +1,71 @@
+/* ld script to make Meta Linux kernel */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-metag", "elf32-metag", "elf32-metag")
+OUTPUT_ARCH(metag)
+ENTRY(__start)
+
+_jiffies = _jiffies_64;
+SECTIONS
+{
+  . = CONFIG_PAGE_OFFSET;
+  _text = .;
+  __text = .;
+  __stext = .;
+  HEAD_TEXT_SECTION
+  .text : {
+	TEXT_TEXT
+	SCHED_TEXT
+	LOCK_TEXT
+	KPROBES_TEXT
+	IRQENTRY_TEXT
+	*(.text.*)
+	*(.gnu.warning)
+	}
+
+  __etext = .;			/* End of text section */
+
+  __sdata = .;
+  RO_DATA_SECTION(PAGE_SIZE)
+  RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+  __edata = .;			/* End of data section */
+
+  EXCEPTION_TABLE(16)
+  NOTES
+
+  . = ALIGN(PAGE_SIZE);		/* Init code and data */
+  ___init_begin = .;
+  INIT_TEXT_SECTION(PAGE_SIZE)
+  INIT_DATA_SECTION(16)
+
+  .init.arch.info : {
+	  ___arch_info_begin = .;
+	  *(.arch.info.init)
+	  ___arch_info_end = .;
+  }
+
+  PERCPU_SECTION(L1_CACHE_BYTES)
+
+  ___init_end = .;
+
+  BSS_SECTION(0, PAGE_SIZE, 0)
+
+  __end = .;
+
+  . = ALIGN(PAGE_SIZE);
+  __heap_start = .;
+
+  DWARF_DEBUG
+
+  /* When something in the kernel is NOT compiled as a module, the
+   * module cleanup code and data are put into these segments.  Both
+   * can then be thrown away, as cleanup code is never called unless
+   * it's a module.
+   */
+  DISCARDS
+}
diff --git a/arch/metag/lib/Makefile b/arch/metag/lib/Makefile
new file mode 100644
index 0000000..a41d24e
--- /dev/null
+++ b/arch/metag/lib/Makefile
@@ -0,0 +1,22 @@
+#
+# Makefile for Meta-specific library files.
+#
+
+lib-y += usercopy.o
+lib-y += copy_page.o
+lib-y += clear_page.o
+lib-y += memcpy.o
+lib-y += memmove.o
+lib-y += memset.o
+lib-y += delay.o
+lib-y += div64.o
+lib-y += muldi3.o
+lib-y += ashrdi3.o
+lib-y += ashldi3.o
+lib-y += lshrdi3.o
+lib-y += divsi3.o
+lib-y += modsi3.o
+lib-y += cmpdi2.o
+lib-y += ucmpdi2.o
+lib-y += ip_fast_csum.o
+lib-y += checksum.o
diff --git a/arch/metag/lib/ashldi3.S b/arch/metag/lib/ashldi3.S
new file mode 100644
index 0000000..78d6974
--- /dev/null
+++ b/arch/metag/lib/ashldi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift left routine.
+!
+
+	.text
+	.global ___ashldi3
+	.type   ___ashldi3,function
+
+___ashldi3:
+	MOV     D0Re0,D0Ar2
+	MOV     D1Re0,D1Ar1
+	CMP     D1Ar3,#0                ! COUNT == 0
+	MOVEQ   PC,D1RtP                ! Yes, return
+
+	SUBS    D0Ar4,D1Ar3,#32         ! N = COUNT - 32
+	BGE     $L10
+
+!! Shift < 32
+	NEG     D0Ar4,D0Ar4             ! N = - N
+	LSL     D1Re0,D1Re0,D1Ar3       ! HI = HI << COUNT
+	LSR     D0Ar6,D0Re0,D0Ar4       ! TMP= LO >> -(COUNT - 32)
+	OR      D1Re0,D1Re0,D0Ar6       ! HI = HI | TMP
+	SWAP    D0Ar4,D1Ar3
+	LSL     D0Re0,D0Re0,D0Ar4       ! LO = LO << COUNT
+	MOV     PC,D1RtP
+
+$L10:
+!! Shift >= 32
+	LSL     D1Re0,D0Re0,D0Ar4       ! HI = LO << N
+	MOV     D0Re0,#0                ! LO = 0
+	MOV     PC,D1RtP
+	.size ___ashldi3,.-___ashldi3
diff --git a/arch/metag/lib/ashrdi3.S b/arch/metag/lib/ashrdi3.S
new file mode 100644
index 0000000..7cb7ed3
--- /dev/null
+++ b/arch/metag/lib/ashrdi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift right routine.
+!
+
+	.text
+	.global ___ashrdi3
+	.type   ___ashrdi3,function
+
+___ashrdi3:
+	MOV     D0Re0,D0Ar2
+	MOV     D1Re0,D1Ar1
+	CMP     D1Ar3,#0                ! COUNT == 0
+	MOVEQ   PC,D1RtP                ! Yes, return
+
+	MOV     D0Ar4,D1Ar3
+	SUBS    D1Ar3,D1Ar3,#32         ! N = COUNT - 32
+	BGE     $L20
+
+!! Shift < 32
+	NEG     D1Ar3,D1Ar3             ! N = - N
+	LSR     D0Re0,D0Re0,D0Ar4       ! LO = LO >> COUNT
+	LSL     D0Ar6,D1Re0,D1Ar3       ! TMP= HI << -(COUNT - 32)
+	OR      D0Re0,D0Re0,D0Ar6       ! LO = LO | TMP
+	SWAP    D1Ar3,D0Ar4
+	ASR     D1Re0,D1Re0,D1Ar3       ! HI = HI >> COUNT
+	MOV     PC,D1RtP
+$L20:
+!! Shift >= 32
+	ASR     D0Re0,D1Re0,D1Ar3       ! LO = HI >> N
+	ASR     D1Re0,D1Re0,#31         ! HI = HI >> 31
+	MOV     PC,D1RtP
+	.size ___ashrdi3,.-___ashrdi3
diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c
new file mode 100644
index 0000000..44d2e19
--- /dev/null
+++ b/arch/metag/lib/checksum.c
@@ -0,0 +1,168 @@
+/*
+ *
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
+ *		Fixed some nasty bugs, causing some horrible crashes.
+ *		A: At some points, the sum (%0) was used as
+ *		length-counter instead of the length counter
+ *		(%1). Thanks to Roman Hodek for pointing this out.
+ *		B: GCC seems to mess up if one uses too many
+ *		data-registers to hold input values and one tries to
+ *		specify d0 and d1 as scratch registers. Letting gcc
+ *		choose these registers itself solves the problem.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from32to16(unsigned int x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+	int odd;
+	unsigned int result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+#ifdef __LITTLE_ENDIAN
+		result += (*buff << 8);
+#else
+		result = *buff;
+#endif
+		len--;
+		buff++;
+	}
+	if (len >= 2) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			len -= 2;
+			buff += 2;
+		}
+		if (len >= 4) {
+			const unsigned char *end = buff + ((unsigned)len & ~3);
+			unsigned int carry = 0;
+			do {
+				unsigned int w = *(unsigned int *) buff;
+				buff += 4;
+				result += carry;
+				result += w;
+				carry = (w > result);
+			} while (buff < end);
+			result += carry;
+			result = (result & 0xffff) + (result >> 16);
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+#ifdef __LITTLE_ENDIAN
+		result += *buff;
+#else
+		result += (*buff << 8);
+#endif
+	result = from32to16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+	return result;
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+	unsigned int sum = (__force unsigned int)wsum;
+	unsigned int result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += sum;
+	if (sum > result)
+		result += 1;
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+						__wsum sum, int *csum_err)
+{
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
+		*csum_err = 0;
+
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/metag/lib/clear_page.S b/arch/metag/lib/clear_page.S
new file mode 100644
index 0000000..43144ee
--- /dev/null
+++ b/arch/metag/lib/clear_page.S
@@ -0,0 +1,17 @@
+        ! Copyright 2007,2008,2009 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+        .text
+        .global	_clear_page
+        .type   _clear_page,function
+	!! D1Ar1 - page
+_clear_page:
+	MOV  TXRPT,#((PAGE_SIZE / 8) - 1)
+	MOV  D0Re0,#0
+	MOV  D1Re0,#0
+$Lclear_page_loop:
+	SETL [D1Ar1++],D0Re0,D1Re0
+	BR   $Lclear_page_loop
+	MOV  PC,D1RtP
+        .size	_clear_page,.-_clear_page
diff --git a/arch/metag/lib/cmpdi2.S b/arch/metag/lib/cmpdi2.S
new file mode 100644
index 0000000..9c5c663
--- /dev/null
+++ b/arch/metag/lib/cmpdi2.S
@@ -0,0 +1,32 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit signed compare routine.
+!
+
+	.text
+	.global ___cmpdi2
+	.type   ___cmpdi2,function
+
+!         low    high
+! s64 a  (D0Ar2, D1Ar1)
+! s64 b  (D0Ar4, D1Ar3)
+___cmpdi2:
+	! start at 1 (equal) and conditionally increment or decrement
+	MOV	D0Re0,#1
+
+	! high words differ?
+	CMP	D1Ar1,D1Ar3
+	BNE	$Lhigh_differ
+
+	! unsigned compare low words
+	CMP	D0Ar2,D0Ar4
+	SUBLO	D0Re0,D0Re0,#1
+	ADDHI	D0Re0,D0Re0,#1
+	MOV	PC,D1RtP
+
+$Lhigh_differ:
+	! signed compare high words
+	SUBLT	D0Re0,D0Re0,#1
+	ADDGT	D0Re0,D0Re0,#1
+	MOV	PC,D1RtP
+	.size ___cmpdi2,.-___cmpdi2
diff --git a/arch/metag/lib/copy_page.S b/arch/metag/lib/copy_page.S
new file mode 100644
index 0000000..91f7d46
--- /dev/null
+++ b/arch/metag/lib/copy_page.S
@@ -0,0 +1,20 @@
+        ! Copyright 2007,2008 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+        .text
+        .global	_copy_page
+        .type   _copy_page,function
+	!! D1Ar1 - to
+	!! D0Ar2 - from
+_copy_page:
+	MOV  D0FrT,#PAGE_SIZE
+$Lcopy_page_loop:
+	GETL D0Re0,D1Re0,[D0Ar2++]
+	GETL D0Ar6,D1Ar5,[D0Ar2++]
+	SETL [D1Ar1++],D0Re0,D1Re0
+	SETL [D1Ar1++],D0Ar6,D1Ar5
+	SUBS D0FrT,D0FrT,#16
+	BNZ  $Lcopy_page_loop
+	MOV  PC,D1RtP
+        .size	_copy_page,.-_copy_page
diff --git a/arch/metag/lib/delay.c b/arch/metag/lib/delay.c
new file mode 100644
index 0000000..0b308f4
--- /dev/null
+++ b/arch/metag/lib/delay.c
@@ -0,0 +1,56 @@
+/*
+ *	Precise Delay Loops for Meta
+ *
+ *	Copyright (C) 1993 Linus Torvalds
+ *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *	Copyright (C) 2007,2009 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+#include <asm/core_reg.h>
+#include <asm/processor.h>
+
+/*
+ * TXTACTCYC is only 24 bits, so on chips with fast clocks it will wrap
+ * many times per-second. If it does wrap __delay will return prematurely,
+ * but this is only likely with large delay values.
+ *
+ * We also can't implement read_current_timer() with TXTACTCYC due to
+ * this wrapping behaviour.
+ */
+#define rdtimer(t) t = __core_reg_get(TXTACTCYC)
+
+void __delay(unsigned long loops)
+{
+	unsigned long bclock, now;
+
+	rdtimer(bclock);
+	do {
+		asm("NOP");
+		rdtimer(now);
+	} while ((now-bclock) < loops);
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+	u64 loops = (u64)xloops * (u64)loops_per_jiffy * HZ;
+	__delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/metag/lib/div64.S b/arch/metag/lib/div64.S
new file mode 100644
index 0000000..1cfc934
--- /dev/null
+++ b/arch/metag/lib/div64.S
@@ -0,0 +1,108 @@
+! Copyright (C) 2012 Imagination Technologies Ltd.
+!
+! Signed/unsigned 64-bit division routines.
+!
+
+	.text
+	.global _div_u64
+	.type   _div_u64,function
+
+_div_u64:
+$L1:
+	ORS     A0.3,D1Ar3,D0Ar4
+	BNE     $L3
+$L2:
+	MOV     D0Re0,D0Ar2
+	MOV     D1Re0,D1Ar1
+	MOV     PC,D1RtP
+$L3:
+	CMP     D1Ar3,D1Ar1
+	CMPEQ   D0Ar4,D0Ar2
+	MOV     D0Re0,#1
+	MOV     D1Re0,#0
+	BHS     $L6
+$L4:
+	ADDS    D0Ar6,D0Ar4,D0Ar4
+	ADD     D1Ar5,D1Ar3,D1Ar3
+	ADDCS   D1Ar5,D1Ar5,#1
+	CMP     D1Ar5,D1Ar3
+	CMPEQ   D0Ar6,D0Ar4
+	BLO     $L6
+$L5:
+	MOV     D0Ar4,D0Ar6
+	MOV     D1Ar3,D1Ar5
+	ADDS    D0Re0,D0Re0,D0Re0
+	ADD     D1Re0,D1Re0,D1Re0
+	ADDCS   D1Re0,D1Re0,#1
+	CMP     D1Ar3,D1Ar1
+	CMPEQ   D0Ar4,D0Ar2
+	BLO     $L4
+$L6:
+	ORS     A0.3,D1Re0,D0Re0
+	MOV     D0Ar6,#0
+	MOV     D1Ar5,D0Ar6
+	BEQ     $L10
+$L7:
+	CMP     D1Ar1,D1Ar3
+	CMPEQ   D0Ar2,D0Ar4
+	BLO     $L9
+$L8:
+	ADDS    D0Ar6,D0Ar6,D0Re0
+	ADD     D1Ar5,D1Ar5,D1Re0
+	ADDCS   D1Ar5,D1Ar5,#1
+
+	SUBS    D0Ar2,D0Ar2,D0Ar4
+	SUB     D1Ar1,D1Ar1,D1Ar3
+	SUBCS   D1Ar1,D1Ar1,#1
+$L9:
+	LSL     A0.3,D1Re0,#31
+	LSR     D0Re0,D0Re0,#1
+	LSR     D1Re0,D1Re0,#1
+	OR      D0Re0,D0Re0,A0.3
+	LSL     A0.3,D1Ar3,#31
+	LSR     D0Ar4,D0Ar4,#1
+	LSR     D1Ar3,D1Ar3,#1
+	OR      D0Ar4,D0Ar4,A0.3
+	ORS     A0.3,D1Re0,D0Re0
+	BNE     $L7
+$L10:
+	MOV     D0Re0,D0Ar6
+	MOV     D1Re0,D1Ar5
+	MOV     PC,D1RtP
+	.size _div_u64,.-_div_u64
+
+	.text
+	.global _div_s64
+	.type   _div_s64,function
+_div_s64:
+	MSETL   [A0StP],D0FrT,D0.5
+	XOR     D0.5,D0Ar2,D0Ar4
+	XOR     D1.5,D1Ar1,D1Ar3
+	TSTT    D1Ar1,#HI(0x80000000)
+	BZ      $L25
+
+	NEGS    D0Ar2,D0Ar2
+	NEG     D1Ar1,D1Ar1
+	SUBCS   D1Ar1,D1Ar1,#1
+$L25:
+	TSTT    D1Ar3,#HI(0x80000000)
+	BZ      $L27
+
+	NEGS    D0Ar4,D0Ar4
+	NEG     D1Ar3,D1Ar3
+	SUBCS   D1Ar3,D1Ar3,#1
+$L27:
+	CALLR   D1RtP,_div_u64
+	TSTT    D1.5,#HI(0x80000000)
+	BZ      $L29
+
+	NEGS    D0Re0,D0Re0
+	NEG     D1Re0,D1Re0
+	SUBCS   D1Re0,D1Re0,#1
+$L29:
+
+	GETL    D0FrT,D1RtP,[A0StP+#(-16)]
+	GETL    D0.5,D1.5,[A0StP+#(-8)]
+	SUB     A0StP,A0StP,#16
+	MOV     PC,D1RtP
+	.size _div_s64,.-_div_s64
diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S
new file mode 100644
index 0000000..7c8a8ae
--- /dev/null
+++ b/arch/metag/lib/divsi3.S
@@ -0,0 +1,100 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+!               Imagination Technologies Ltd
+!
+! Integer divide routines.
+!
+
+	.text
+	.global ___udivsi3
+	.type   ___udivsi3,function
+	.align  2
+___udivsi3:
+!!
+!! Since core is signed divide case, just set control variable
+!!
+	MOV     D1Re0,D0Ar2             ! Au already in A1Ar1, Bu -> D1Re0
+	MOV     D0Re0,#0                ! Result is 0
+	MOV     D0Ar4,#0                ! Return positive result
+	B       $LIDMCUStart
+	.size   ___udivsi3,.-___udivsi3
+
+!!
+!! 32-bit division signed i/p - passed signed 32-bit numbers
+!!
+	.global ___divsi3
+	.type   ___divsi3,function
+	.align  2
+___divsi3:
+!!
+!! A already in D1Ar1, B already in D0Ar2 -> make B abs(B)
+!!
+	MOV     D1Re0,D0Ar2             ! A already in A1Ar1, B -> D1Re0
+	MOV     D0Re0,#0                ! Result is 0
+	XOR     D0Ar4,D1Ar1,D1Re0       ! D0Ar4 -ive if result is -ive
+	ABS     D1Ar1,D1Ar1             ! abs(A) -> Au
+	ABS     D1Re0,D1Re0             ! abs(B) -> Bu
+$LIDMCUStart:
+	CMP     D1Ar1,D1Re0             ! Is ( Au > Bu )?
+	LSR     D1Ar3,D1Ar1,#2          ! Calculate (Au & (~3)) >> 2
+	CMPHI   D1Re0,D1Ar3             ! OR ( (Au & (~3)) <= (Bu << 2) )?
+	LSLSHI  D1Ar3,D1Re0,#1          ! Buq = Bu << 1
+	BLS     $LIDMCUSetup            ! Yes: Do normal divide
+!!
+!! Quick divide setup can assume that CurBit only needs to start at 2
+!!
+$LIDMCQuick:
+	CMP     D1Ar1,D1Ar3             ! ( A >= Buq )?
+	ADDCC   D0Re0,D0Re0,#2          ! If yes result += 2
+	SUBCC   D1Ar1,D1Ar1,D1Ar3       !        and A -= Buq
+	CMP     D1Ar1,D1Re0             ! ( A >= Bu )?
+	ADDCC   D0Re0,D0Re0,#1          ! If yes result += 1
+	SUBCC   D1Ar1,D1Ar1,D1Re0       !        and A -= Bu
+	ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
+	NEG     D0Ar2,D0Re0             ! Calulate neg result
+	MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
+$LIDMCRet:
+	MOV     PC,D1RtP
+!!
+!!  Setup for general unsigned divide code
+!!
+!!      D0Re0 is used to form the result, already set to Zero
+!!      D1Re0 is the input Bu value, this gets trashed
+!!      D0Ar6 is curbit which is set to 1 at the start and shifted up
+!!      D0Ar4 is negative if we should return a negative result
+!!      D1Ar1 is the input Au value, eventually this holds the remainder
+!!
+$LIDMCUSetup:
+	CMP     D1Ar1,D1Re0             ! Is ( Au < Bu )?
+	MOV     D0Ar6,#1                ! Set curbit to 1
+	BCS     $LIDMCRet               ! Yes: Return 0 remainder Au
+!!
+!! Calculate alignment using FFB instruction
+!!
+	FFB     D1Ar5,D1Ar1             ! Find first bit of Au
+	ANDN    D1Ar5,D1Ar5,#31         ! Handle exceptional case.
+	ORN     D1Ar5,D1Ar5,#31         ! if N bit set, set to 31
+	FFB     D1Ar3,D1Re0             ! Find first bit of Bu
+	ANDN    D1Ar3,D1Ar3,#31         ! Handle exceptional case.
+	ORN     D1Ar3,D1Ar3,#31         ! if N bit set, set to 31
+	SUBS    D1Ar3,D1Ar5,D1Ar3       ! calculate diff, ffbA - ffbB
+	MOV     D0Ar2,D1Ar3             ! copy into bank 0
+	LSLGT   D1Re0,D1Re0,D1Ar3       ! ( > 0) ? left shift B
+	LSLGT   D0Ar6,D0Ar6,D0Ar2       ! ( > 0) ? left shift curbit
+!!
+!! Now we start the divide proper, logic is
+!!
+!!       if ( A >= B ) add curbit to result and subtract B from A
+!!       shift curbit and B down by 1 in either case
+!!
+$LIDMCLoop:
+	CMP     D1Ar1, D1Re0            ! ( A >= B )?
+	ADDCC   D0Re0, D0Re0, D0Ar6     ! If yes result += curbit
+	SUBCC   D1Ar1, D1Ar1, D1Re0     ! and A -= B
+	LSRS    D0Ar6, D0Ar6, #1        ! Shift down curbit, is it zero?
+	LSR     D1Re0, D1Re0, #1        ! Shift down B
+	BNZ     $LIDMCLoop               ! Was single bit in curbit lost?
+	ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
+	NEG     D0Ar2,D0Re0             ! Calulate neg result
+	MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
+	MOV     PC,D1RtP
+	.size   ___divsi3,.-___divsi3
diff --git a/arch/metag/lib/ip_fast_csum.S b/arch/metag/lib/ip_fast_csum.S
new file mode 100644
index 0000000..533b1e7
--- /dev/null
+++ b/arch/metag/lib/ip_fast_csum.S
@@ -0,0 +1,32 @@
+
+	.text
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ *
+ * extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+ *
+ */
+	.global _ip_fast_csum
+	.type   _ip_fast_csum,function
+_ip_fast_csum:
+	!! TXRPT needs loops - 1
+	SUBS	TXRPT,D0Ar2,#1
+	MOV	D0Re0,#0
+	BLO	$Lfast_csum_exit
+$Lfast_csum_loop:
+	GETD	D1Ar3,[D1Ar1++]
+	ADDS	D0Re0,D0Re0,D1Ar3
+	ADDCS   D0Re0,D0Re0,#1
+	BR	$Lfast_csum_loop
+	LSR	D0Ar4,D0Re0,#16
+	AND	D0Re0,D0Re0,#0xffff
+	AND	D0Ar4,D0Ar4,#0xffff
+	ADD	D0Re0,D0Re0,D0Ar4
+	LSR	D0Ar4,D0Re0,#16
+	ADD	D0Re0,D0Re0,D0Ar4
+	XOR	D0Re0,D0Re0,#-1
+	AND	D0Re0,D0Re0,#0xffff
+$Lfast_csum_exit:
+	MOV	PC,D1RtP
+	.size _ip_fast_csum,.-_ip_fast_csum
diff --git a/arch/metag/lib/lshrdi3.S b/arch/metag/lib/lshrdi3.S
new file mode 100644
index 0000000..47f7202
--- /dev/null
+++ b/arch/metag/lib/lshrdi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit logical shift right routine.
+!
+
+	.text
+	.global ___lshrdi3
+	.type   ___lshrdi3,function
+
+___lshrdi3:
+	MOV     D0Re0,D0Ar2
+	MOV     D1Re0,D1Ar1
+	CMP     D1Ar3,#0                ! COUNT == 0
+	MOVEQ   PC,D1RtP                ! Yes, return
+
+	MOV     D0Ar4,D1Ar3
+	SUBS    D1Ar3,D1Ar3,#32         ! N = COUNT - 32
+	BGE     $L30
+
+!! Shift < 32
+	NEG     D1Ar3,D1Ar3             ! N = - N
+	LSR     D0Re0,D0Re0,D0Ar4       ! LO = LO >> COUNT
+	LSL     D0Ar6,D1Re0,D1Ar3       ! TMP= HI << -(COUNT - 32)
+	OR      D0Re0,D0Re0,D0Ar6       ! LO = LO | TMP
+	SWAP    D1Ar3,D0Ar4
+	LSR     D1Re0,D1Re0,D1Ar3       ! HI = HI >> COUNT
+	MOV     PC,D1RtP
+$L30:
+!! Shift >= 32
+	LSR     D0Re0,D1Re0,D1Ar3       ! LO = HI >> N
+	MOV     D1Re0,#0                ! HI = 0
+	MOV     PC,D1RtP
+	.size ___lshrdi3,.-___lshrdi3
diff --git a/arch/metag/lib/memcpy.S b/arch/metag/lib/memcpy.S
new file mode 100644
index 0000000..46b7a2b
--- /dev/null
+++ b/arch/metag/lib/memcpy.S
@@ -0,0 +1,185 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+	.text
+	.global	_memcpy
+	.type	_memcpy,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memcpy:
+	CMP 	D1Ar3, #16
+	MOV 	A1.2, D0Ar2		! source pointer
+	MOV 	A0.2, D1Ar1		! destination pointer
+	MOV 	A0.3, D1Ar1		! for return value
+! If there are less than 16 bytes to copy use the byte copy loop
+	BGE 	$Llong_copy
+
+$Lbyte_copy:
+! Simply copy a byte at a time
+	SUBS	TXRPT, D1Ar3, #1
+	BLT	$Lend
+$Lloop_byte:
+	GETB 	D1Re0, [A1.2++]
+	SETB 	[A0.2++], D1Re0
+	BR	$Lloop_byte
+
+$Lend:
+! Finally set return value and return
+	MOV 	D0Re0, A0.3
+	MOV 	PC, D1RtP
+
+$Llong_copy:
+	ANDS 	D1Ar5, D1Ar1, #7	! test destination alignment
+	BZ	$Laligned_dst
+
+! The destination address is not 8 byte aligned. We will copy bytes from
+! the source to the destination until the remaining data has an 8 byte
+! destination address alignment (i.e we should never copy more than 7
+! bytes here).
+$Lalign_dst:
+	GETB 	D0Re0, [A1.2++]
+	ADD 	D1Ar5, D1Ar5, #1	! dest is aligned when D1Ar5 reaches #8
+	SUB 	D1Ar3, D1Ar3, #1	! decrement count of remaining bytes
+	SETB 	[A0.2++], D0Re0
+	CMP 	D1Ar5, #8
+	BNE 	$Lalign_dst
+
+! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
+! blocks, then jump to the unaligned copy loop or fall through to the aligned
+! copy loop as appropriate.
+$Laligned_dst:
+	MOV	D0Ar4, A1.2
+	LSR 	D1Ar5, D1Ar3, #3	! D1Ar5 = number of 8 byte blocks
+	ANDS 	D0Ar4, D0Ar4, #7	! test source alignment
+	BNZ 	$Lunaligned_copy	! if unaligned, use unaligned copy loop
+
+! Both source and destination are 8 byte aligned - the easy case.
+$Laligned_copy:
+	LSRS	D1Ar5, D1Ar3, #5	! D1Ar5 = number of 32 byte blocks
+	BZ	$Lbyte_copy
+	SUB	TXRPT, D1Ar5, #1
+
+$Laligned_32:
+	GETL 	D0Re0, D1Re0, [A1.2++]
+	GETL 	D0Ar6, D1Ar5, [A1.2++]
+	SETL 	[A0.2++], D0Re0, D1Re0
+	SETL 	[A0.2++], D0Ar6, D1Ar5
+	GETL 	D0Re0, D1Re0, [A1.2++]
+	GETL 	D0Ar6, D1Ar5, [A1.2++]
+	SETL 	[A0.2++], D0Re0, D1Re0
+	SETL 	[A0.2++], D0Ar6, D1Ar5
+	BR	$Laligned_32
+
+! If there are any remaining bytes use the byte copy loop, otherwise we are done
+	ANDS 	D1Ar3, D1Ar3, #0x1f
+	BNZ	$Lbyte_copy
+	B	$Lend
+
+! The destination is 8 byte aligned but the source is not, and there are 8
+! or more bytes to be copied.
+$Lunaligned_copy:
+! Adjust the source pointer (A1.2) to the 8 byte boundary before its
+! current value
+	MOV 	D0Ar4, A1.2
+	MOV 	D0Ar6, A1.2
+	ANDMB 	D0Ar4, D0Ar4, #0xfff8
+	MOV 	A1.2, D0Ar4
+! Save the number of bytes of mis-alignment in D0Ar4 for use later
+	SUBS 	D0Ar6, D0Ar6, D0Ar4
+	MOV	D0Ar4, D0Ar6
+! if there is no mis-alignment after all, use the aligned copy loop
+	BZ 	$Laligned_copy
+
+! prefetch 8 bytes
+	GETL 	D0Re0, D1Re0, [A1.2]
+
+	SUB	TXRPT, D1Ar5, #1
+
+! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
+! 4 bytes, and more than 4 bytes.
+	CMP 	D0Ar6, #4
+	BLT 	$Lunaligned_1_2_3	! use 1-3 byte mis-alignment loop
+	BZ 	$Lunaligned_4		! use 4 byte mis-alignment loop
+
+! The mis-alignment is more than 4 bytes
+$Lunaligned_5_6_7:
+	SUB 	D0Ar6, D0Ar6, #4
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+	MULW 	D0Ar6, D0Ar6, #8
+	MOV	D1Ar5, #32
+	SUB	D1Ar5, D1Ar5, D0Ar6
+! Move data 4 bytes before we enter the main loop
+	MOV 	D0Re0, D1Re0
+
+$Lloop_5_6_7:
+	GETL 	D0Ar2, D1Ar1, [++A1.2]
+! form 64-bit data in D0Re0, D1Re0
+	LSR 	D0Re0, D0Re0, D0Ar6
+	MOV 	D1Re0, D0Ar2
+	LSL 	D1Re0, D1Re0, D1Ar5
+	ADD 	D0Re0, D0Re0, D1Re0
+
+	LSR 	D0Ar2, D0Ar2, D0Ar6
+	LSL 	D1Re0, D1Ar1, D1Ar5
+	ADD 	D1Re0, D1Re0, D0Ar2
+
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D1Ar1
+	BR	$Lloop_5_6_7
+
+	B 	$Lunaligned_end
+
+$Lunaligned_1_2_3:
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+	MULW 	D0Ar6, D0Ar6, #8
+	MOV	D1Ar5, #32
+	SUB	D1Ar5, D1Ar5, D0Ar6
+
+$Lloop_1_2_3:
+! form 64-bit data in D0Re0,D1Re0
+	LSR 	D0Re0, D0Re0, D0Ar6
+	LSL 	D1Ar1, D1Re0, D1Ar5
+	ADD 	D0Re0, D0Re0, D1Ar1
+	MOV	D0Ar2, D1Re0
+	LSR 	D0FrT, D0Ar2, D0Ar6
+	GETL 	D0Ar2, D1Ar1, [++A1.2]
+
+	MOV 	D1Re0, D0Ar2
+	LSL 	D1Re0, D1Re0, D1Ar5
+	ADD 	D1Re0, D1Re0, D0FrT
+
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D0Ar2
+	MOV 	D1Re0, D1Ar1
+	BR	$Lloop_1_2_3
+
+	B 	$Lunaligned_end
+
+! The 4 byte mis-alignment case - this does not require any shifting, just a
+! shuffling of registers.
+$Lunaligned_4:
+	MOV 	D0Re0, D1Re0
+$Lloop_4:
+	GETL 	D0Ar2, D1Ar1, [++A1.2]
+	MOV 	D1Re0, D0Ar2
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D1Ar1
+	BR	$Lloop_4
+
+$Lunaligned_end:
+! If there are no remaining bytes to copy, we are done.
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ	$Lend
+! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
+! address of the remaining bytes, and fall through to the byte copy loop.
+	MOV 	D0Ar6, A1.2
+	ADD 	D1Ar5, D0Ar4, D0Ar6
+	MOV 	A1.2, D1Ar5
+	B	$Lbyte_copy
+
+	.size _memcpy,.-_memcpy
diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S
new file mode 100644
index 0000000..228ea04
--- /dev/null
+++ b/arch/metag/lib/memmove.S
@@ -0,0 +1,345 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+	.text
+	.global	_memmove
+	.type	_memmove,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memmove:
+	CMP 	D1Ar3, #0
+	MOV 	D0Re0, D1Ar1
+	BZ 	$LEND2
+	MSETL 	[A0StP], D0.5, D0.6, D0.7
+	MOV 	D1Ar5, D0Ar2
+	CMP 	D1Ar1, D1Ar5
+	BLT 	$Lforwards_copy
+	SUB 	D0Ar4, D1Ar1, D1Ar3
+	ADD 	D0Ar4, D0Ar4, #1
+	CMP 	D0Ar2, D0Ar4
+	BLT 	$Lforwards_copy
+	! should copy backwards
+	MOV 	D1Re0, D0Ar2
+	! adjust pointer to the end of mem
+	ADD 	D0Ar2, D1Re0, D1Ar3
+	ADD 	D1Ar1, D1Ar1, D1Ar3
+
+	MOV 	A1.2, D0Ar2
+	MOV 	A0.2, D1Ar1
+	CMP 	D1Ar3, #8
+	BLT 	$Lbbyte_loop
+
+	MOV 	D0Ar4, D0Ar2
+	MOV 	D1Ar5, D1Ar1
+
+	! test 8 byte alignment
+	ANDS 	D1Ar5, D1Ar5, #7
+	BNE 	$Lbdest_unaligned
+
+	ANDS 	D0Ar4, D0Ar4, #7
+	BNE 	$Lbsrc_unaligned
+
+	LSR 	D1Ar5, D1Ar3, #3
+
+$Lbaligned_loop:
+	GETL 	D0Re0, D1Re0, [--A1.2]
+	SETL 	[--A0.2], D0Re0, D1Re0
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lbaligned_loop
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lbbyte_loop_exit
+$Lbbyte_loop:
+	GETB 	D1Re0, [--A1.2]
+	SETB 	[--A0.2], D1Re0
+	SUBS 	D1Ar3, D1Ar3, #1
+	BNE 	$Lbbyte_loop
+$Lbbyte_loop_exit:
+	MOV 	D0Re0, A0.2
+$LEND:
+	SUB 	A0.2, A0StP, #24
+	MGETL 	D0.5, D0.6, D0.7, [A0.2]
+	SUB 	A0StP, A0StP, #24
+$LEND2:
+	MOV 	PC, D1RtP
+
+$Lbdest_unaligned:
+	GETB 	D0Re0, [--A1.2]
+	SETB 	[--A0.2], D0Re0
+	SUBS 	D1Ar5, D1Ar5, #1
+	SUB 	D1Ar3, D1Ar3, #1
+	BNE 	$Lbdest_unaligned
+	CMP 	D1Ar3, #8
+	BLT 	$Lbbyte_loop
+$Lbsrc_unaligned:
+	LSR 	D1Ar5, D1Ar3, #3
+	! adjust A1.2
+	MOV 	D0Ar4, A1.2
+	! save original address
+	MOV 	D0Ar6, A1.2
+
+	ADD 	D0Ar4, D0Ar4, #7
+	ANDMB 	D0Ar4, D0Ar4, #0xfff8
+	! new address is the 8-byte aligned one above the original
+	MOV 	A1.2, D0Ar4
+
+	! A0.2 dst 64-bit is aligned
+	! measure the gap size
+	SUB 	D0Ar6, D0Ar4, D0Ar6
+	MOVS 	D0Ar4, D0Ar6
+	! keep this information for the later adjustment
+	! both aligned
+	BZ 	$Lbaligned_loop
+
+	! prefetch
+	GETL 	D0Re0, D1Re0, [--A1.2]
+
+	CMP 	D0Ar6, #4
+	BLT 	$Lbunaligned_1_2_3
+	! 32-bit aligned
+	BZ 	$Lbaligned_4
+
+	SUB 	D0Ar6, D0Ar6, #4
+	! D1.6 stores the gap size in bits
+	MULW 	D1.6, D0Ar6, #8
+	MOV 	D0.6, #32
+	! D0.6 stores the complement of the gap size
+	SUB 	D0.6, D0.6, D1.6
+
+$Lbunaligned_5_6_7:
+	GETL 	D0.7, D1.7, [--A1.2]
+	! form 64-bit data in D0Re0, D1Re0
+	MOV 	D1Re0, D0Re0
+	! D1Re0 << gap-size
+	LSL 	D1Re0, D1Re0, D1.6
+	MOV 	D0Re0, D1.7
+	! D0Re0 >> complement
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D1.5, D0Re0
+	! combine the both
+	ADD 	D1Re0, D1Re0, D1.5
+
+	MOV 	D1.5, D1.7
+	LSL 	D1.5, D1.5, D1.6
+	MOV 	D0Re0, D0.7
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D0.5, D1.5
+	ADD 	D0Re0, D0Re0, D0.5
+
+	SETL 	[--A0.2], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lbunaligned_5_6_7
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lbbyte_loop_exit
+	! Adjust A1.2
+	! A1.2 <- A1.2 +8 - gapsize
+	ADD 	A1.2, A1.2, #8
+	SUB 	A1.2, A1.2, D0Ar4
+	B 	$Lbbyte_loop
+
+$Lbunaligned_1_2_3:
+	MULW 	D1.6, D0Ar6, #8
+	MOV 	D0.6, #32
+	SUB 	D0.6, D0.6, D1.6
+
+$Lbunaligned_1_2_3_loop:
+	GETL 	D0.7, D1.7, [--A1.2]
+	! form 64-bit data in D0Re0, D1Re0
+	LSL 	D1Re0, D1Re0, D1.6
+	! save D0Re0 for later use
+	MOV 	D0.5, D0Re0
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D1.5, D0Re0
+	ADD 	D1Re0, D1Re0, D1.5
+
+	! orignal data in D0Re0
+	MOV 	D1.5, D0.5
+	LSL 	D1.5, D1.5, D1.6
+	MOV 	D0Re0, D1.7
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D0.5, D1.5
+	ADD 	D0Re0, D0Re0, D0.5
+
+	SETL 	[--A0.2], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lbunaligned_1_2_3_loop
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lbbyte_loop_exit
+	! Adjust A1.2
+	ADD 	A1.2, A1.2, #8
+	SUB 	A1.2, A1.2, D0Ar4
+	B 	$Lbbyte_loop
+
+$Lbaligned_4:
+	GETL 	D0.7, D1.7, [--A1.2]
+	MOV 	D1Re0, D0Re0
+	MOV 	D0Re0, D1.7
+	SETL 	[--A0.2], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lbaligned_4
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lbbyte_loop_exit
+	! Adjust A1.2
+	ADD 	A1.2, A1.2, #8
+	SUB 	A1.2, A1.2, D0Ar4
+	B 	$Lbbyte_loop
+
+$Lforwards_copy:
+	MOV 	A1.2, D0Ar2
+	MOV 	A0.2, D1Ar1
+	CMP 	D1Ar3, #8
+	BLT 	$Lfbyte_loop
+
+	MOV 	D0Ar4, D0Ar2
+	MOV 	D1Ar5, D1Ar1
+
+	ANDS 	D1Ar5, D1Ar5, #7
+	BNE 	$Lfdest_unaligned
+
+	ANDS 	D0Ar4, D0Ar4, #7
+	BNE 	$Lfsrc_unaligned
+
+	LSR 	D1Ar5, D1Ar3, #3
+
+$Lfaligned_loop:
+	GETL 	D0Re0, D1Re0, [A1.2++]
+	SUBS 	D1Ar5, D1Ar5, #1
+	SETL 	[A0.2++], D0Re0, D1Re0
+	BNE 	$Lfaligned_loop
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lfbyte_loop_exit
+$Lfbyte_loop:
+	GETB 	D1Re0, [A1.2++]
+	SETB 	[A0.2++], D1Re0
+	SUBS 	D1Ar3, D1Ar3, #1
+	BNE 	$Lfbyte_loop
+$Lfbyte_loop_exit:
+	MOV 	D0Re0, D1Ar1
+	B 	$LEND
+
+$Lfdest_unaligned:
+	GETB 	D0Re0, [A1.2++]
+	ADD 	D1Ar5, D1Ar5, #1
+	SUB 	D1Ar3, D1Ar3, #1
+	SETB 	[A0.2++], D0Re0
+	CMP 	D1Ar5, #8
+	BNE 	$Lfdest_unaligned
+	CMP 	D1Ar3, #8
+	BLT 	$Lfbyte_loop
+$Lfsrc_unaligned:
+	! adjust A1.2
+	LSR 	D1Ar5, D1Ar3, #3
+
+	MOV 	D0Ar4, A1.2
+	MOV 	D0Ar6, A1.2
+	ANDMB 	D0Ar4, D0Ar4, #0xfff8
+	MOV 	A1.2, D0Ar4
+
+	! A0.2 dst 64-bit is aligned
+	SUB 	D0Ar6, D0Ar6, D0Ar4
+	! keep the information for the later adjustment
+	MOVS 	D0Ar4, D0Ar6
+
+	! both aligned
+	BZ 	$Lfaligned_loop
+
+	! prefetch
+	GETL 	D0Re0, D1Re0, [A1.2]
+
+	CMP 	D0Ar6, #4
+	BLT 	$Lfunaligned_1_2_3
+	BZ 	$Lfaligned_4
+
+	SUB 	D0Ar6, D0Ar6, #4
+	MULW 	D0.6, D0Ar6, #8
+	MOV 	D1.6, #32
+	SUB 	D1.6, D1.6, D0.6
+
+$Lfunaligned_5_6_7:
+	GETL 	D0.7, D1.7, [++A1.2]
+	! form 64-bit data in D0Re0, D1Re0
+	MOV 	D0Re0, D1Re0
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D1Re0, D0.7
+	LSL 	D1Re0, D1Re0, D1.6
+	MOV 	D0.5, D1Re0
+	ADD 	D0Re0, D0Re0, D0.5
+
+	MOV 	D0.5, D0.7
+	LSR 	D0.5, D0.5, D0.6
+	MOV 	D1Re0, D1.7
+	LSL 	D1Re0, D1Re0, D1.6
+	MOV 	D1.5, D0.5
+	ADD 	D1Re0, D1Re0, D1.5
+
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lfunaligned_5_6_7
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lfbyte_loop_exit
+	! Adjust A1.2
+	ADD	A1.2, A1.2, D0Ar4
+	B 	$Lfbyte_loop
+
+$Lfunaligned_1_2_3:
+	MULW 	D0.6, D0Ar6, #8
+	MOV 	D1.6, #32
+	SUB 	D1.6, D1.6, D0.6
+
+$Lfunaligned_1_2_3_loop:
+	GETL 	D0.7, D1.7, [++A1.2]
+	! form 64-bit data in D0Re0, D1Re0
+	LSR 	D0Re0, D0Re0, D0.6
+	MOV 	D1.5, D1Re0
+	LSL 	D1Re0, D1Re0, D1.6
+	MOV 	D0.5, D1Re0
+	ADD 	D0Re0, D0Re0, D0.5
+
+	MOV 	D0.5, D1.5
+	LSR 	D0.5, D0.5, D0.6
+	MOV 	D1Re0, D0.7
+	LSL 	D1Re0, D1Re0, D1.6
+	MOV 	D1.5, D0.5
+	ADD 	D1Re0, D1Re0, D1.5
+
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lfunaligned_1_2_3_loop
+
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lfbyte_loop_exit
+	! Adjust A1.2
+	ADD	A1.2, A1.2, D0Ar4
+	B 	$Lfbyte_loop
+
+$Lfaligned_4:
+	GETL 	D0.7, D1.7, [++A1.2]
+	MOV 	D0Re0, D1Re0
+	MOV 	D1Re0, D0.7
+	SETL 	[A0.2++], D0Re0, D1Re0
+	MOV 	D0Re0, D0.7
+	MOV 	D1Re0, D1.7
+	SUBS 	D1Ar5, D1Ar5, #1
+	BNE 	$Lfaligned_4
+	ANDS 	D1Ar3, D1Ar3, #7
+	BZ 	$Lfbyte_loop_exit
+	! Adjust A1.2
+	ADD	A1.2, A1.2, D0Ar4
+	B 	$Lfbyte_loop
+
+	.size _memmove,.-_memmove
diff --git a/arch/metag/lib/memset.S b/arch/metag/lib/memset.S
new file mode 100644
index 0000000..721085b
--- /dev/null
+++ b/arch/metag/lib/memset.S
@@ -0,0 +1,86 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+	.text
+	.global	_memset
+	.type	_memset,function
+! D1Ar1 dst
+! D0Ar2 c
+! D1Ar3 cnt
+! D0Re0 dst
+_memset:
+	AND	D0Ar2,D0Ar2,#0xFF	! Ensure a byte input value
+	MULW 	D0Ar2,D0Ar2,#0x0101	! Duplicate byte value into  0-15
+	ANDS	D0Ar4,D1Ar1,#7		! Extract bottom LSBs of dst
+	LSL 	D0Re0,D0Ar2,#16		! Duplicate byte value into 16-31
+	ADD	A0.2,D0Ar2,D0Re0	! Duplicate byte value into 4 (A0.2)
+	MOV	D0Re0,D1Ar1		! Return dst
+	BZ	$LLongStub		! if start address is aligned
+	! start address is not aligned on an 8 byte boundary, so we
+	! need the number of bytes up to the next 8 byte address
+	! boundary, or the length of the string if less than 8, in D1Ar5
+	MOV	D0Ar2,#8		! Need 8 - N in D1Ar5 ...
+	SUB	D1Ar5,D0Ar2,D0Ar4	!            ... subtract N
+	CMP	D1Ar3,D1Ar5
+	MOVMI	D1Ar5,D1Ar3
+	B	$LByteStub		! dst is mis-aligned, do $LByteStub
+
+!
+! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles)
+!
+$LLongStub:
+	LSRS	D0Ar2,D1Ar3,#5
+	AND	D1Ar3,D1Ar3,#0x1F
+	MOV	A1.2,A0.2
+	BEQ	$LLongishStub
+	SUB	TXRPT,D0Ar2,#1
+	CMP	D1Ar3,#0
+$LLongLoop:
+	SETL 	[D1Ar1++],A0.2,A1.2
+	SETL 	[D1Ar1++],A0.2,A1.2
+	SETL 	[D1Ar1++],A0.2,A1.2
+	SETL 	[D1Ar1++],A0.2,A1.2
+	BR	$LLongLoop
+	BZ	$Lexit
+!
+! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles)
+!
+$LLongishStub:
+	LSRS	D0Ar2,D1Ar3,#3
+	AND	D1Ar3,D1Ar3,#0x7
+	MOV	D1Ar5,D1Ar3
+	BEQ	$LByteStub
+	SUB	TXRPT,D0Ar2,#1
+	CMP	D1Ar3,#0
+$LLongishLoop:
+	SETL 	[D1Ar1++],A0.2,A1.2
+	BR	$LLongishLoop
+	BZ	$Lexit
+!
+! This does a byte structured burst of up to 7 bytes
+!
+!	D1Ar1 should point to the location required
+!	D1Ar3 should be the remaining total byte count
+!	D1Ar5 should be burst size (<= D1Ar3)
+!
+$LByteStub:
+	SUBS	D1Ar3,D1Ar3,D1Ar5	! Reduce count
+	ADD	D1Ar1,D1Ar1,D1Ar5	! Advance pointer to end of area
+	MULW	D1Ar5,D1Ar5,#4		! Scale to (1*4), (2*4), (3*4)
+	SUB	D1Ar5,D1Ar5,#(8*4)	! Rebase to -(7*4), -(6*4), -(5*4), ...
+	MOV	A1.2,D1Ar5
+	SUB	PC,CPC1,A1.2		! Jump into table below
+	SETB 	[D1Ar1+#(-7)],A0.2
+	SETB 	[D1Ar1+#(-6)],A0.2
+	SETB 	[D1Ar1+#(-5)],A0.2
+	SETB 	[D1Ar1+#(-4)],A0.2
+	SETB 	[D1Ar1+#(-3)],A0.2
+	SETB 	[D1Ar1+#(-2)],A0.2
+	SETB 	[D1Ar1+#(-1)],A0.2
+!
+! Return if all data has been output, otherwise do $LLongStub
+!
+	BNZ	$LLongStub
+$Lexit:
+	MOV	PC,D1RtP
+        .size _memset,.-_memset
+
diff --git a/arch/metag/lib/modsi3.S b/arch/metag/lib/modsi3.S
new file mode 100644
index 0000000..210cfa8
--- /dev/null
+++ b/arch/metag/lib/modsi3.S
@@ -0,0 +1,38 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+!               Imagination Technologies Ltd
+!
+! Integer modulus routines.
+!
+!!
+!! 32-bit modulus unsigned i/p - passed unsigned 32-bit numbers
+!!
+	.text
+	.global ___umodsi3
+	.type   ___umodsi3,function
+	.align  2
+___umodsi3:
+	MOV     D0FrT,D1RtP             ! Save original return address
+	CALLR   D1RtP,___udivsi3
+	MOV     D1RtP,D0FrT             ! Recover return address
+	MOV     D0Re0,D1Ar1             ! Return remainder
+	MOV     PC,D1RtP
+	.size   ___umodsi3,.-___umodsi3
+
+!!
+!! 32-bit modulus signed i/p - passed signed 32-bit numbers
+!!
+	.global ___modsi3
+	.type   ___modsi3,function
+	.align  2
+___modsi3:
+	MOV     D0FrT,D1RtP             ! Save original return address
+	MOV     A0.2,D1Ar1              ! Save A in A0.2
+	CALLR   D1RtP,___divsi3
+	MOV     D1RtP,D0FrT             ! Recover return address
+	MOV     D1Re0,A0.2              ! Recover A
+	MOV     D0Re0,D1Ar1             ! Return remainder
+	ORS     D1Re0,D1Re0,D1Re0       ! Was A negative?
+	NEG     D1Ar1,D1Ar1             ! Negate remainder
+	MOVMI   D0Re0,D1Ar1             ! Return neg remainder
+	MOV     PC, D1RtP
+	.size   ___modsi3,.-___modsi3
diff --git a/arch/metag/lib/muldi3.S b/arch/metag/lib/muldi3.S
new file mode 100644
index 0000000..ee66ca8
--- /dev/null
+++ b/arch/metag/lib/muldi3.S
@@ -0,0 +1,44 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit multiply routine.
+!
+
+!
+! 64-bit signed/unsigned multiply
+!
+! A = D1Ar1:D0Ar2 = a 2^48 + b 2^32 +  c 2^16 + d 2^0
+!
+! B = D1Ar3:D0Ar4 = w 2^48 + x 2^32 +  y 2^16 + z 2^0
+!
+	.text
+	.global ___muldi3
+	.type   ___muldi3,function
+
+___muldi3:
+	MULD    D1Re0,D1Ar1,D0Ar4       ! (a 2^48 + b 2^32)(y 2^16 + z 2^0)
+	MULD    D0Re0,D0Ar2,D1Ar3       ! (w 2^48 + x 2^32)(c 2^16 + d 2^0)
+	ADD     D1Re0,D1Re0,D0Re0
+
+	MULW    D0Re0,D0Ar2,D0Ar4       ! (d 2^0)  * (z 2^0)
+
+	RTDW    D0Ar2,D0Ar2
+	MULW    D0Ar6,D0Ar2,D0Ar4       ! (c 2^16)(z 2^0)
+	LSR     D1Ar5,D0Ar6,#16
+	LSL     D0Ar6,D0Ar6,#16
+	ADDS    D0Re0,D0Re0,D0Ar6
+	ADDCS   D1Re0,D1Re0,#1
+	RTDW    D0Ar4,D0Ar4
+	ADD     D1Re0,D1Re0,D1Ar5
+
+	MULW    D0Ar6,D0Ar2,D0Ar4       ! (c 2^16)(y 2^16)
+	ADD     D1Re0,D1Re0,D0Ar6
+
+	RTDW    D0Ar2,D0Ar2
+	MULW    D0Ar6,D0Ar2,D0Ar4       ! (d 2^0)(y 2^16)
+	LSR     D1Ar5,D0Ar6,#16
+	LSL     D0Ar6,D0Ar6,#16
+	ADDS    D0Re0,D0Re0,D0Ar6
+	ADD     D1Re0,D1Re0,D1Ar5
+	ADDCS   D1Re0,D1Re0,#1
+	MOV     PC, D1RtP
+	.size ___muldi3,.-___muldi3
diff --git a/arch/metag/lib/ucmpdi2.S b/arch/metag/lib/ucmpdi2.S
new file mode 100644
index 0000000..6f3347f
--- /dev/null
+++ b/arch/metag/lib/ucmpdi2.S
@@ -0,0 +1,27 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit unsigned compare routine.
+!
+
+	.text
+	.global ___ucmpdi2
+	.type   ___ucmpdi2,function
+
+!         low    high
+! u64 a  (D0Ar2, D1Ar1)
+! u64 b  (D0Ar4, D1Ar3)
+___ucmpdi2:
+	! start at 1 (equal) and conditionally increment or decrement
+	MOV	D0Re0,#1
+
+	! high words
+	CMP	D1Ar1,D1Ar3
+	! or if equal, low words
+	CMPEQ	D0Ar2,D0Ar4
+
+	! unsigned compare
+	SUBLO	D0Re0,D0Re0,#1
+	ADDHI	D0Re0,D0Re0,#1
+
+	MOV	PC,D1RtP
+	.size ___ucmpdi2,.-___ucmpdi2
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
new file mode 100644
index 0000000..b3ebfe9
--- /dev/null
+++ b/arch/metag/lib/usercopy.c
@@ -0,0 +1,1354 @@
+/*
+ * User address space access functions.
+ * The non-inlined parts of asm-metag/uaccess.h are here.
+ *
+ * Copyright (C) 2006, Imagination Technologies.
+ * Copyright (C) 2000, Axis Communications AB.
+ *
+ * Written by Hans-Peter Nilsson.
+ * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
+ * Modified for Meta by Will Newton.
+ */
+
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <asm/cache.h>			/* def of L1_CACHE_BYTES */
+
+#define USE_RAPF
+#define RAPF_MIN_BUF_SIZE	(3*L1_CACHE_BYTES)
+
+
+/* The "double write" in this code is because the Meta will not fault
+ * immediately unless the memory pipe is forced to by e.g. a data stall or
+ * another memory op. The second write should be discarded by the write
+ * combiner so should have virtually no cost.
+ */
+
+#define __asm_copy_user_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	asm volatile (						 \
+		COPY						 \
+		"1:\n"						 \
+		"	.section .fixup,\"ax\"\n"		 \
+		"	MOV D1Ar1,#0\n"				 \
+		FIXUP						 \
+		"	MOVT    D1Ar1,#HI(1b)\n"		 \
+		"	JUMP    D1Ar1,#LO(1b)\n"		 \
+		"	.previous\n"				 \
+		"	.section __ex_table,\"a\"\n"		 \
+		TENTRY						 \
+		"	.previous\n"				 \
+		: "=r" (to), "=r" (from), "=r" (ret)		 \
+		: "0" (to), "1" (from), "2" (ret)		 \
+		: "D1Ar1", "memory")
+
+
+#define __asm_copy_to_user_1(to, from, ret)	\
+	__asm_copy_user_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"	\
+		"	SETB [%0],D1Ar1\n"	\
+		"2:	SETB [%0++],D1Ar1\n",	\
+		"3:	ADD  %2,%2,#1\n",	\
+		"	.long 2b,3b\n")
+
+#define __asm_copy_to_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_user_cont(to, from, ret,		\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"	SETW [%0],D1Ar1\n"		\
+		"2:	SETW [%0++],D1Ar1\n" COPY,	\
+		"3:	ADD  %2,%2,#2\n" FIXUP,		\
+		"	.long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_2(to, from, ret) \
+	__asm_copy_to_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_3(to, from, ret) \
+	__asm_copy_to_user_2x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"4:	SETB [%0++],D1Ar1\n",		\
+		"5:	ADD  %2,%2,#1\n",		\
+		"	.long 4b,5b\n")
+
+#define __asm_copy_to_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_user_cont(to, from, ret,		\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"	SETD [%0],D1Ar1\n"		\
+		"2:	SETD [%0++],D1Ar1\n" COPY,	\
+		"3:	ADD  %2,%2,#4\n" FIXUP,		\
+		"	.long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_4(to, from, ret) \
+	__asm_copy_to_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_5(to, from, ret) \
+	__asm_copy_to_user_4x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"4:	SETB [%0++],D1Ar1\n",		\
+		"5:	ADD  %2,%2,#1\n",		\
+		"	.long 4b,5b\n")
+
+#define __asm_copy_to_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_4x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"	SETW [%0],D1Ar1\n"		\
+		"4:	SETW [%0++],D1Ar1\n" COPY,	\
+		"5:	ADD  %2,%2,#2\n" FIXUP,		\
+		"	.long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_6(to, from, ret) \
+	__asm_copy_to_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_7(to, from, ret) \
+	__asm_copy_to_user_6x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"6:	SETB [%0++],D1Ar1\n",		\
+		"7:	ADD  %2,%2,#1\n",		\
+		"	.long 6b,7b\n")
+
+#define __asm_copy_to_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_4x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"	SETD [%0],D1Ar1\n"		\
+		"4:	SETD [%0++],D1Ar1\n" COPY,	\
+		"5:	ADD  %2,%2,#4\n"  FIXUP,	\
+		"	.long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_8(to, from, ret) \
+	__asm_copy_to_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_9(to, from, ret) \
+	__asm_copy_to_user_8x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"6:	SETB [%0++],D1Ar1\n",		\
+		"7:	ADD  %2,%2,#1\n",		\
+		"	.long 6b,7b\n")
+
+#define __asm_copy_to_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_8x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"	SETW [%0],D1Ar1\n"		\
+		"6:	SETW [%0++],D1Ar1\n" COPY,	\
+		"7:	ADD  %2,%2,#2\n" FIXUP,		\
+		"	.long 6b,7b\n" TENTRY)
+
+#define __asm_copy_to_user_10(to, from, ret) \
+	__asm_copy_to_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_11(to, from, ret) \
+	__asm_copy_to_user_10x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"8:	SETB [%0++],D1Ar1\n",		\
+		"9:	ADD  %2,%2,#1\n",		\
+		"	.long 8b,9b\n")
+
+#define __asm_copy_to_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_8x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"	SETD [%0],D1Ar1\n"		\
+		"6:	SETD [%0++],D1Ar1\n" COPY,	\
+		"7:	ADD  %2,%2,#4\n" FIXUP,		\
+		"	.long 6b,7b\n" TENTRY)
+#define __asm_copy_to_user_12(to, from, ret) \
+	__asm_copy_to_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_13(to, from, ret) \
+	__asm_copy_to_user_12x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"8:	SETB [%0++],D1Ar1\n",		\
+		"9:	ADD  %2,%2,#1\n",		\
+		"	.long 8b,9b\n")
+
+#define __asm_copy_to_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_12x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"	SETW [%0],D1Ar1\n"		\
+		"8:	SETW [%0++],D1Ar1\n" COPY,	\
+		"9:	ADD  %2,%2,#2\n" FIXUP,		\
+		"	.long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_14(to, from, ret) \
+	__asm_copy_to_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_15(to, from, ret) \
+	__asm_copy_to_user_14x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"	SETB [%0],D1Ar1\n"		\
+		"10:	SETB [%0++],D1Ar1\n",		\
+		"11:	ADD  %2,%2,#1\n",		\
+		"	.long 10b,11b\n")
+
+#define __asm_copy_to_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_to_user_12x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"	SETD [%0],D1Ar1\n"		\
+		"8:	SETD [%0++],D1Ar1\n" COPY,	\
+		"9:	ADD  %2,%2,#4\n" FIXUP,		\
+		"	.long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_16(to, from, ret) \
+		__asm_copy_to_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_8x64(to, from, ret) \
+	asm volatile (					\
+		"	GETL D0Ar2,D1Ar1,[%1++]\n"	\
+		"	SETL [%0],D0Ar2,D1Ar1\n"	\
+		"2:	SETL [%0++],D0Ar2,D1Ar1\n"	\
+		"1:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"3:	ADD  %2,%2,#8\n"		\
+		"	MOVT    D0Ar2,#HI(1b)\n"	\
+		"	JUMP    D0Ar2,#LO(1b)\n"	\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.long 2b,3b\n"			\
+		"	.previous\n"			\
+		: "=r" (to), "=r" (from), "=r" (ret)	\
+		: "0" (to), "1" (from), "2" (ret)	\
+		: "D1Ar1", "D0Ar2", "memory")
+
+/*
+ *	optimized copying loop using RAPF when 64 bit aligned
+ *
+ *	n		will be automatically decremented inside the loop
+ *	ret		will be left intact. if error occurs we will rewind
+ *			so that the original non optimized code will fill up
+ *			this value correctly.
+ *
+ *	on fault:
+ *		>	n will hold total number of uncopied bytes
+ *
+ *		>	{'to','from'} will be rewind back so that
+ *			the non-optimized code will do the proper fix up
+ *
+ *	DCACHE drops the cacheline which helps in reducing cache
+ *	pollution.
+ *
+ *	We introduce an extra SETL at the end of the loop to
+ *	ensure we don't fall off the loop before we catch all
+ *	erros.
+ *
+ *	NOTICE:
+ *		LSM_STEP in TXSTATUS must be cleared in fix up code.
+ *		since we're using M{S,G}ETL, a fault might happen at
+ *		any address in the middle of M{S,G}ETL causing
+ *		the value of LSM_STEP to be incorrect which can
+ *		cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ *		ie: if LSM_STEP was 1 when a fault occurs, the
+ *		next call to M{S,G}ET{L,D} will skip the first
+ *		copy/getting as it think that the first 1 has already
+ *		been done.
+ *
+ */
+#define __asm_copy_user_64bit_rapf_loop(				\
+		to, from, ret, n, id, FIXUP)				\
+	asm volatile (							\
+		".balign 8\n"						\
+		"MOV	RAPF, %1\n"					\
+		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
+		"MOV	D0Ar6, #0\n"					\
+		"LSR	D1Ar5, %3, #6\n"				\
+		"SUB	TXRPT, D1Ar5, #2\n"				\
+		"MOV	RAPF, %1\n"					\
+		"$Lloop"id":\n"						\
+		"ADD	RAPF, %1, #64\n"				\
+		"21:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"22:\n"							\
+		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #32\n"					\
+		"23:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"24:\n"							\
+		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #32\n"					\
+		"DCACHE	[%1+#-64], D0Ar6\n"				\
+		"BR	$Lloop"id"\n"					\
+									\
+		"MOV	RAPF, %1\n"					\
+		"25:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"26:\n"							\
+		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #32\n"					\
+		"27:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"28:\n"							\
+		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%0, %0, #8\n"					\
+		"29:\n"							\
+		"SETL	[%0++], D0.7, D1.7\n"				\
+		"SUB	%3, %3, #32\n"					\
+		"1:"							\
+		"DCACHE	[%1+#-64], D0Ar6\n"				\
+		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
+		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
+		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
+		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
+		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
+		"SUB A0StP, A0StP, #40\n"				\
+		"	.section .fixup,\"ax\"\n"			\
+		"4:\n"							\
+		"	ADD	%0, %0, #8\n"				\
+		"3:\n"							\
+		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"	MOV	D1Ar1, TXSTATUS\n"			\
+		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
+		"	MOV	TXSTATUS, D1Ar1\n"			\
+			FIXUP						\
+		"	MOVT    D0Ar2,#HI(1b)\n"			\
+		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.long 21b,3b\n"					\
+		"	.long 22b,3b\n"					\
+		"	.long 23b,3b\n"					\
+		"	.long 24b,3b\n"					\
+		"	.long 25b,3b\n"					\
+		"	.long 26b,3b\n"					\
+		"	.long 27b,3b\n"					\
+		"	.long 28b,3b\n"					\
+		"	.long 29b,4b\n"					\
+		"	.previous\n"					\
+		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
+		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
+		: "D1Ar1", "D0Ar2", "memory")
+
+/*	rewind 'to' and 'from'  pointers when a fault occurs
+ *
+ *	Rationale:
+ *		A fault always occurs on writing to user buffer. A fault
+ *		is at a single address, so we need to rewind by only 4
+ *		bytes.
+ *		Since we do a complete read from kernel buffer before
+ *		writing, we need to rewind it also. The amount to be
+ *		rewind equals the number of faulty writes in MSETD
+ *		which is: [4 - (LSM_STEP-1)]*8
+ *		LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *		and stored in D0Ar2
+ *
+ *		NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *			LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *			a fault happens at the 4th write, LSM_STEP will be 0
+ *			instead of 4. The code copes with that.
+ *
+ *		n is updated by the number of successful writes, which is:
+ *		n = n - (LSM_STEP-1)*8
+ */
+#define __asm_copy_to_user_64bit_rapf_loop(to,	from, ret, n, id)\
+	__asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,		\
+		"LSR	D0Ar2, D0Ar2, #8\n"				\
+		"AND	D0Ar2, D0Ar2, #0x7\n"				\
+		"ADDZ	D0Ar2, D0Ar2, #4\n"				\
+		"SUB	D0Ar2, D0Ar2, #1\n"				\
+		"MOV	D1Ar1, #4\n"					\
+		"SUB	D0Ar2, D1Ar1, D0Ar2\n"				\
+		"LSL	D0Ar2, D0Ar2, #3\n"				\
+		"LSL	D1Ar1, D1Ar1, #3\n"				\
+		"SUB	D1Ar1, D1Ar1, D0Ar2\n"				\
+		"SUB	%0, %0, #8\n"					\
+		"SUB	%1,	%1,D0Ar2\n"				\
+		"SUB	%3, %3, D1Ar1\n")
+
+/*
+ *	optimized copying loop using RAPF when 32 bit aligned
+ *
+ *	n		will be automatically decremented inside the loop
+ *	ret		will be left intact. if error occurs we will rewind
+ *			so that the original non optimized code will fill up
+ *			this value correctly.
+ *
+ *	on fault:
+ *		>	n will hold total number of uncopied bytes
+ *
+ *		>	{'to','from'} will be rewind back so that
+ *			the non-optimized code will do the proper fix up
+ *
+ *	DCACHE drops the cacheline which helps in reducing cache
+ *	pollution.
+ *
+ *	We introduce an extra SETD at the end of the loop to
+ *	ensure we don't fall off the loop before we catch all
+ *	erros.
+ *
+ *	NOTICE:
+ *		LSM_STEP in TXSTATUS must be cleared in fix up code.
+ *		since we're using M{S,G}ETL, a fault might happen at
+ *		any address in the middle of M{S,G}ETL causing
+ *		the value of LSM_STEP to be incorrect which can
+ *		cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ *		ie: if LSM_STEP was 1 when a fault occurs, the
+ *		next call to M{S,G}ET{L,D} will skip the first
+ *		copy/getting as it think that the first 1 has already
+ *		been done.
+ *
+ */
+#define __asm_copy_user_32bit_rapf_loop(				\
+			to,	from, ret, n, id, FIXUP)		\
+	asm volatile (							\
+		".balign 8\n"						\
+		"MOV	RAPF, %1\n"					\
+		"MSETL	[A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"	\
+		"MOV	D0Ar6, #0\n"					\
+		"LSR	D1Ar5, %3, #6\n"				\
+		"SUB	TXRPT, D1Ar5, #2\n"				\
+		"MOV	RAPF, %1\n"					\
+	"$Lloop"id":\n"							\
+		"ADD	RAPF, %1, #64\n"				\
+		"21:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"22:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"23:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"24:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"25:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"26:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"27:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"28:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"DCACHE	[%1+#-64], D0Ar6\n"				\
+		"BR	$Lloop"id"\n"					\
+									\
+		"MOV	RAPF, %1\n"					\
+		"29:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"30:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"31:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"32:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"33:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"34:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%3, %3, #16\n"					\
+		"35:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"36:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"SUB	%0, %0, #4\n"					\
+		"37:\n"							\
+		"SETD	[%0++], D0.7\n"					\
+		"SUB	%3, %3, #16\n"					\
+		"1:"							\
+		"DCACHE	[%1+#-64], D0Ar6\n"				\
+		"GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"			\
+		"GETL    D0FrT, D1RtP, [A0StP+#-32]\n"			\
+		"GETL    D0.5, D1.5, [A0StP+#-24]\n"			\
+		"GETL    D0.6, D1.6, [A0StP+#-16]\n"			\
+		"GETL    D0.7, D1.7, [A0StP+#-8]\n"			\
+		"SUB A0StP, A0StP, #40\n"				\
+		"	.section .fixup,\"ax\"\n"			\
+		"4:\n"							\
+		"	ADD		%0, %0, #4\n"			\
+		"3:\n"							\
+		"	MOV	D0Ar2, TXSTATUS\n"			\
+		"	MOV	D1Ar1, TXSTATUS\n"			\
+		"	AND	D1Ar1, D1Ar1, #0xFFFFF8FF\n"		\
+		"	MOV	TXSTATUS, D1Ar1\n"			\
+			FIXUP						\
+		"	MOVT    D0Ar2,#HI(1b)\n"			\
+		"	JUMP    D0Ar2,#LO(1b)\n"			\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.long 21b,3b\n"					\
+		"	.long 22b,3b\n"					\
+		"	.long 23b,3b\n"					\
+		"	.long 24b,3b\n"					\
+		"	.long 25b,3b\n"					\
+		"	.long 26b,3b\n"					\
+		"	.long 27b,3b\n"					\
+		"	.long 28b,3b\n"					\
+		"	.long 29b,3b\n"					\
+		"	.long 30b,3b\n"					\
+		"	.long 31b,3b\n"					\
+		"	.long 32b,3b\n"					\
+		"	.long 33b,3b\n"					\
+		"	.long 34b,3b\n"					\
+		"	.long 35b,3b\n"					\
+		"	.long 36b,3b\n"					\
+		"	.long 37b,4b\n"					\
+		"	.previous\n"					\
+		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
+		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
+		: "D1Ar1", "D0Ar2", "memory")
+
+/*	rewind 'to' and 'from'  pointers when a fault occurs
+ *
+ *	Rationale:
+ *		A fault always occurs on writing to user buffer. A fault
+ *		is at a single address, so we need to rewind by only 4
+ *		bytes.
+ *		Since we do a complete read from kernel buffer before
+ *		writing, we need to rewind it also. The amount to be
+ *		rewind equals the number of faulty writes in MSETD
+ *		which is: [4 - (LSM_STEP-1)]*4
+ *		LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *		and stored in D0Ar2
+ *
+ *		NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *			LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *			a fault happens at the 4th write, LSM_STEP will be 0
+ *			instead of 4. The code copes with that.
+ *
+ *		n is updated by the number of successful writes, which is:
+ *		n = n - (LSM_STEP-1)*4
+ */
+#define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\
+	__asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,		\
+		"LSR	D0Ar2, D0Ar2, #8\n"				\
+		"AND	D0Ar2, D0Ar2, #0x7\n"				\
+		"ADDZ	D0Ar2, D0Ar2, #4\n"				\
+		"SUB	D0Ar2, D0Ar2, #1\n"				\
+		"MOV	D1Ar1, #4\n"					\
+		"SUB	D0Ar2, D1Ar1, D0Ar2\n"				\
+		"LSL	D0Ar2, D0Ar2, #2\n"				\
+		"LSL	D1Ar1, D1Ar1, #2\n"				\
+		"SUB	D1Ar1, D1Ar1, D0Ar2\n"				\
+		"SUB	%0, %0, #4\n"					\
+		"SUB	%1,	%1,	D0Ar2\n"			\
+		"SUB	%3, %3, D1Ar1\n")
+
+unsigned long __copy_user(void __user *pdst, const void *psrc,
+			  unsigned long n)
+{
+	register char __user *dst asm ("A0.2") = pdst;
+	register const char *src asm ("A1.2") = psrc;
+	unsigned long retn = 0;
+
+	if (n == 0)
+		return 0;
+
+	if ((unsigned long) src & 1) {
+		__asm_copy_to_user_1(dst, src, retn);
+		n--;
+	}
+	if ((unsigned long) dst & 1) {
+		/* Worst case - byte copy */
+		while (n > 0) {
+			__asm_copy_to_user_1(dst, src, retn);
+			n--;
+		}
+	}
+	if (((unsigned long) src & 2) && n >= 2) {
+		__asm_copy_to_user_2(dst, src, retn);
+		n -= 2;
+	}
+	if ((unsigned long) dst & 2) {
+		/* Second worst case - word copy */
+		while (n >= 2) {
+			__asm_copy_to_user_2(dst, src, retn);
+			n -= 2;
+		}
+	}
+
+#ifdef USE_RAPF
+	/* 64 bit copy loop */
+	if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+		if (n >= RAPF_MIN_BUF_SIZE) {
+			/* copy user using 64 bit rapf copy */
+			__asm_copy_to_user_64bit_rapf_loop(dst, src, retn,
+							n, "64cu");
+		}
+		while (n >= 8) {
+			__asm_copy_to_user_8x64(dst, src, retn);
+			n -= 8;
+		}
+	}
+	if (n >= RAPF_MIN_BUF_SIZE) {
+		/* copy user using 32 bit rapf copy */
+		__asm_copy_to_user_32bit_rapf_loop(dst, src, retn, n, "32cu");
+	}
+#else
+	/* 64 bit copy loop */
+	if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+		while (n >= 8) {
+			__asm_copy_to_user_8x64(dst, src, retn);
+			n -= 8;
+		}
+	}
+#endif
+
+	while (n >= 16) {
+		__asm_copy_to_user_16(dst, src, retn);
+		n -= 16;
+	}
+
+	while (n >= 4) {
+		__asm_copy_to_user_4(dst, src, retn);
+		n -= 4;
+	}
+
+	switch (n) {
+	case 0:
+		break;
+	case 1:
+		__asm_copy_to_user_1(dst, src, retn);
+		break;
+	case 2:
+		__asm_copy_to_user_2(dst, src, retn);
+		break;
+	case 3:
+		__asm_copy_to_user_3(dst, src, retn);
+		break;
+	}
+
+	return retn;
+}
+EXPORT_SYMBOL(__copy_user);
+
+#define __asm_copy_from_user_1(to, from, ret) \
+	__asm_copy_user_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"	\
+		"2:	SETB [%0++],D1Ar1\n",	\
+		"3:	ADD  %2,%2,#1\n"	\
+		"	SETB [%0++],D1Ar1\n",	\
+		"	.long 2b,3b\n")
+
+#define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_user_cont(to, from, ret,		\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"2:	SETW [%0++],D1Ar1\n" COPY,	\
+		"3:	ADD  %2,%2,#2\n"		\
+		"	SETW [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_2(to, from, ret) \
+	__asm_copy_from_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_3(to, from, ret)		\
+	__asm_copy_from_user_2x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"4:	SETB [%0++],D1Ar1\n",		\
+		"5:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 4b,5b\n")
+
+#define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_user_cont(to, from, ret,		\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"2:	SETD [%0++],D1Ar1\n" COPY,	\
+		"3:	ADD  %2,%2,#4\n"		\
+		"	SETD [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_4(to, from, ret) \
+	__asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_5(to, from, ret) \
+	__asm_copy_from_user_4x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"4:	SETB [%0++],D1Ar1\n",		\
+		"5:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 4b,5b\n")
+
+#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_4x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"4:	SETW [%0++],D1Ar1\n" COPY,	\
+		"5:	ADD  %2,%2,#2\n"		\
+		"	SETW [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_6(to, from, ret) \
+	__asm_copy_from_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_7(to, from, ret) \
+	__asm_copy_from_user_6x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"6:	SETB [%0++],D1Ar1\n",		\
+		"7:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 6b,7b\n")
+
+#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_4x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"4:	SETD [%0++],D1Ar1\n" COPY,	\
+		"5:	ADD  %2,%2,#4\n"			\
+		"	SETD [%0++],D1Ar1\n" FIXUP,		\
+		"	.long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_8(to, from, ret) \
+	__asm_copy_from_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_9(to, from, ret) \
+	__asm_copy_from_user_8x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"6:	SETB [%0++],D1Ar1\n",		\
+		"7:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 6b,7b\n")
+
+#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_8x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"6:	SETW [%0++],D1Ar1\n" COPY,	\
+		"7:	ADD  %2,%2,#2\n"		\
+		"	SETW [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_10(to, from, ret) \
+	__asm_copy_from_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_11(to, from, ret)		\
+	__asm_copy_from_user_10x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"8:	SETB [%0++],D1Ar1\n",		\
+		"9:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 8b,9b\n")
+
+#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_8x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"6:	SETD [%0++],D1Ar1\n" COPY,	\
+		"7:	ADD  %2,%2,#4\n"		\
+		"	SETD [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_12(to, from, ret) \
+	__asm_copy_from_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_13(to, from, ret) \
+	__asm_copy_from_user_12x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"8:	SETB [%0++],D1Ar1\n",		\
+		"9:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 8b,9b\n")
+
+#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_12x_cont(to, from, ret,	\
+		"	GETW D1Ar1,[%1++]\n"		\
+		"8:	SETW [%0++],D1Ar1\n" COPY,	\
+		"9:	ADD  %2,%2,#2\n"		\
+		"	SETW [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_14(to, from, ret) \
+	__asm_copy_from_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_15(to, from, ret) \
+	__asm_copy_from_user_14x_cont(to, from, ret,	\
+		"	GETB D1Ar1,[%1++]\n"		\
+		"10:	SETB [%0++],D1Ar1\n",		\
+		"11:	ADD  %2,%2,#1\n"		\
+		"	SETB [%0++],D1Ar1\n",		\
+		"	.long 10b,11b\n")
+
+#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+	__asm_copy_from_user_12x_cont(to, from, ret,	\
+		"	GETD D1Ar1,[%1++]\n"		\
+		"8:	SETD [%0++],D1Ar1\n" COPY,	\
+		"9:	ADD  %2,%2,#4\n"		\
+		"	SETD [%0++],D1Ar1\n" FIXUP,	\
+		"	.long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_16(to, from, ret) \
+	__asm_copy_from_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_8x64(to, from, ret) \
+	asm volatile (				\
+		"	GETL D0Ar2,D1Ar1,[%1++]\n"	\
+		"2:	SETL [%0++],D0Ar2,D1Ar1\n"	\
+		"1:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"	MOV D1Ar1,#0\n"			\
+		"	MOV D0Ar2,#0\n"			\
+		"3:	ADD  %2,%2,#8\n"		\
+		"	SETL [%0++],D0Ar2,D1Ar1\n"	\
+		"	MOVT    D0Ar2,#HI(1b)\n"	\
+		"	JUMP    D0Ar2,#LO(1b)\n"	\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.long 2b,3b\n"			\
+		"	.previous\n"			\
+		: "=a" (to), "=r" (from), "=r" (ret)	\
+		: "0" (to), "1" (from), "2" (ret)	\
+		: "D1Ar1", "D0Ar2", "memory")
+
+/*	rewind 'from' pointer when a fault occurs
+ *
+ *	Rationale:
+ *		A fault occurs while reading from user buffer, which is the
+ *		source. Since the fault is at a single address, we only
+ *		need to rewind by 8 bytes.
+ *		Since we don't write to kernel buffer until we read first,
+ *		the kernel buffer is at the right state and needn't be
+ *		corrected.
+ */
+#define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id)	\
+	__asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,		\
+		"SUB	%1, %1, #8\n")
+
+/*	rewind 'from' pointer when a fault occurs
+ *
+ *	Rationale:
+ *		A fault occurs while reading from user buffer, which is the
+ *		source. Since the fault is at a single address, we only
+ *		need to rewind by 4 bytes.
+ *		Since we don't write to kernel buffer until we read first,
+ *		the kernel buffer is at the right state and needn't be
+ *		corrected.
+ */
+#define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id)	\
+	__asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,		\
+		"SUB	%1, %1, #4\n")
+
+
+/* Copy from user to kernel, zeroing the bytes that were inaccessible in
+   userland.  The return-value is the number of bytes that were
+   inaccessible.  */
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long n)
+{
+	register char *dst asm ("A0.2") = pdst;
+	register const char __user *src asm ("A1.2") = psrc;
+	unsigned long retn = 0;
+
+	if (n == 0)
+		return 0;
+
+	if ((unsigned long) src & 1) {
+		__asm_copy_from_user_1(dst, src, retn);
+		n--;
+	}
+	if ((unsigned long) dst & 1) {
+		/* Worst case - byte copy */
+		while (n > 0) {
+			__asm_copy_from_user_1(dst, src, retn);
+			n--;
+			if (retn)
+				goto copy_exception_bytes;
+		}
+	}
+	if (((unsigned long) src & 2) && n >= 2) {
+		__asm_copy_from_user_2(dst, src, retn);
+		n -= 2;
+	}
+	if ((unsigned long) dst & 2) {
+		/* Second worst case - word copy */
+		while (n >= 2) {
+			__asm_copy_from_user_2(dst, src, retn);
+			n -= 2;
+			if (retn)
+				goto copy_exception_bytes;
+		}
+	}
+
+	/* We only need one check after the unalignment-adjustments,
+	   because if both adjustments were done, either both or
+	   neither reference had an exception.  */
+	if (retn != 0)
+		goto copy_exception_bytes;
+
+#ifdef USE_RAPF
+	/* 64 bit copy loop */
+	if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+		if (n >= RAPF_MIN_BUF_SIZE) {
+			/* Copy using fast 64bit rapf */
+			__asm_copy_from_user_64bit_rapf_loop(dst, src, retn,
+							n, "64cuz");
+		}
+		while (n >= 8) {
+			__asm_copy_from_user_8x64(dst, src, retn);
+			n -= 8;
+			if (retn)
+				goto copy_exception_bytes;
+		}
+	}
+
+	if (n >= RAPF_MIN_BUF_SIZE) {
+		/* Copy using fast 32bit rapf */
+		__asm_copy_from_user_32bit_rapf_loop(dst, src, retn,
+						n, "32cuz");
+	}
+#else
+	/* 64 bit copy loop */
+	if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+		while (n >= 8) {
+			__asm_copy_from_user_8x64(dst, src, retn);
+			n -= 8;
+			if (retn)
+				goto copy_exception_bytes;
+		}
+	}
+#endif
+
+	while (n >= 4) {
+		__asm_copy_from_user_4(dst, src, retn);
+		n -= 4;
+
+		if (retn)
+			goto copy_exception_bytes;
+	}
+
+	/* If we get here, there were no memory read faults.  */
+	switch (n) {
+		/* These copies are at least "naturally aligned" (so we don't
+		   have to check each byte), due to the src alignment code.
+		   The *_3 case *will* get the correct count for retn.  */
+	case 0:
+		/* This case deliberately left in (if you have doubts check the
+		   generated assembly code).  */
+		break;
+	case 1:
+		__asm_copy_from_user_1(dst, src, retn);
+		break;
+	case 2:
+		__asm_copy_from_user_2(dst, src, retn);
+		break;
+	case 3:
+		__asm_copy_from_user_3(dst, src, retn);
+		break;
+	}
+
+	/* If we get here, retn correctly reflects the number of failing
+	   bytes.  */
+	return retn;
+
+ copy_exception_bytes:
+	/* We already have "retn" bytes cleared, and need to clear the
+	   remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
+	   memset is preferred here, since this isn't speed-critical code and
+	   we'd rather have this a leaf-function than calling memset.  */
+	{
+		char *endp;
+		for (endp = dst + n; dst < endp; dst++)
+			*dst = 0;
+	}
+
+	return retn + n;
+}
+EXPORT_SYMBOL(__copy_user_zeroing);
+
+#define __asm_clear_8x64(to, ret) \
+	asm volatile (					\
+		"	MOV  D0Ar2,#0\n"		\
+		"	MOV  D1Ar1,#0\n"		\
+		"	SETL [%0],D0Ar2,D1Ar1\n"	\
+		"2:	SETL [%0++],D0Ar2,D1Ar1\n"	\
+		"1:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"3:	ADD  %1,%1,#8\n"		\
+		"	MOVT    D0Ar2,#HI(1b)\n"	\
+		"	JUMP    D0Ar2,#LO(1b)\n"	\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.long 2b,3b\n"			\
+		"	.previous\n"			\
+		: "=r" (to), "=r" (ret)	\
+		: "0" (to), "1" (ret)	\
+		: "D1Ar1", "D0Ar2", "memory")
+
+/* Zero userspace.  */
+
+#define __asm_clear(to, ret, CLEAR, FIXUP, TENTRY) \
+	asm volatile (					\
+		"	MOV D1Ar1,#0\n"			\
+			CLEAR				\
+		"1:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+			FIXUP				\
+		"	MOVT    D1Ar1,#HI(1b)\n"	\
+		"	JUMP    D1Ar1,#LO(1b)\n"	\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+			TENTRY				\
+		"	.previous"			\
+		: "=r" (to), "=r" (ret)			\
+		: "0" (to), "1" (ret)			\
+		: "D1Ar1", "memory")
+
+#define __asm_clear_1(to, ret) \
+	__asm_clear(to, ret,			\
+		"	SETB [%0],D1Ar1\n"	\
+		"2:	SETB [%0++],D1Ar1\n",	\
+		"3:	ADD  %1,%1,#1\n",	\
+		"	.long 2b,3b\n")
+
+#define __asm_clear_2(to, ret) \
+	__asm_clear(to, ret,			\
+		"	SETW [%0],D1Ar1\n"	\
+		"2:	SETW [%0++],D1Ar1\n",	\
+		"3:	ADD  %1,%1,#2\n",	\
+		"	.long 2b,3b\n")
+
+#define __asm_clear_3(to, ret) \
+	__asm_clear(to, ret,			\
+		 "2:	SETW [%0++],D1Ar1\n"	\
+		 "	SETB [%0],D1Ar1\n"	\
+		 "3:	SETB [%0++],D1Ar1\n",	\
+		 "4:	ADD  %1,%1,#2\n"	\
+		 "5:	ADD  %1,%1,#1\n",	\
+		 "	.long 2b,4b\n"		\
+		 "	.long 3b,5b\n")
+
+#define __asm_clear_4x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+	__asm_clear(to, ret,				\
+		"	SETD [%0],D1Ar1\n"		\
+		"2:	SETD [%0++],D1Ar1\n" CLEAR,	\
+		"3:	ADD  %1,%1,#4\n" FIXUP,		\
+		"	.long 2b,3b\n" TENTRY)
+
+#define __asm_clear_4(to, ret) \
+	__asm_clear_4x_cont(to, ret, "", "", "")
+
+#define __asm_clear_8x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+	__asm_clear_4x_cont(to, ret,			\
+		"	SETD [%0],D1Ar1\n"		\
+		"4:	SETD [%0++],D1Ar1\n" CLEAR,	\
+		"5:	ADD  %1,%1,#4\n" FIXUP,		\
+		"	.long 4b,5b\n" TENTRY)
+
+#define __asm_clear_8(to, ret) \
+	__asm_clear_8x_cont(to, ret, "", "", "")
+
+#define __asm_clear_12x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+	__asm_clear_8x_cont(to, ret,			\
+		"	SETD [%0],D1Ar1\n"		\
+		"6:	SETD [%0++],D1Ar1\n" CLEAR,	\
+		"7:	ADD  %1,%1,#4\n" FIXUP,		\
+		"	.long 6b,7b\n" TENTRY)
+
+#define __asm_clear_12(to, ret) \
+	__asm_clear_12x_cont(to, ret, "", "", "")
+
+#define __asm_clear_16x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+	__asm_clear_12x_cont(to, ret,			\
+		"	SETD [%0],D1Ar1\n"		\
+		"8:	SETD [%0++],D1Ar1\n" CLEAR,	\
+		"9:	ADD  %1,%1,#4\n" FIXUP,		\
+		"	.long 8b,9b\n" TENTRY)
+
+#define __asm_clear_16(to, ret) \
+	__asm_clear_16x_cont(to, ret, "", "", "")
+
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
+{
+	register char __user *dst asm ("D0Re0") = pto;
+	register unsigned long n asm ("D1Re0") = pn;
+	register unsigned long retn asm ("D0Ar6") = 0;
+
+	if ((unsigned long) dst & 1) {
+		__asm_clear_1(dst, retn);
+		n--;
+	}
+
+	if ((unsigned long) dst & 2) {
+		__asm_clear_2(dst, retn);
+		n -= 2;
+	}
+
+	/* 64 bit copy loop */
+	if (!((__force unsigned long) dst & 7)) {
+		while (n >= 8) {
+			__asm_clear_8x64(dst, retn);
+			n -= 8;
+		}
+	}
+
+	while (n >= 16) {
+		__asm_clear_16(dst, retn);
+		n -= 16;
+	}
+
+	while (n >= 4) {
+		__asm_clear_4(dst, retn);
+		n -= 4;
+	}
+
+	switch (n) {
+	case 0:
+		break;
+	case 1:
+		__asm_clear_1(dst, retn);
+		break;
+	case 2:
+		__asm_clear_2(dst, retn);
+		break;
+	case 3:
+		__asm_clear_3(dst, retn);
+		break;
+	}
+
+	return retn;
+}
+EXPORT_SYMBOL(__do_clear_user);
+
+unsigned char __get_user_asm_b(const void __user *addr, long *err)
+{
+	register unsigned char x asm ("D0Re0") = 0;
+	asm volatile (
+		"	GETB %0,[%2]\n"
+		"1:\n"
+		"	GETB %0,[%2]\n"
+		"2:\n"
+		"	.section .fixup,\"ax\"\n"
+		"3:	MOV     D0FrT,%3\n"
+		"	SETD    [%1],D0FrT\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		"	.previous\n"
+		"	.section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		"	.previous\n"
+		: "=r" (x)
+		: "r" (err), "r" (addr), "P" (-EFAULT)
+		: "D0FrT");
+	return x;
+}
+EXPORT_SYMBOL(__get_user_asm_b);
+
+unsigned short __get_user_asm_w(const void __user *addr, long *err)
+{
+	register unsigned short x asm ("D0Re0") = 0;
+	asm volatile (
+		"	GETW %0,[%2]\n"
+		"1:\n"
+		"	GETW %0,[%2]\n"
+		"2:\n"
+		"	.section .fixup,\"ax\"\n"
+		"3:	MOV     D0FrT,%3\n"
+		"	SETD    [%1],D0FrT\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		"	.previous\n"
+		"	.section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		"	.previous\n"
+		: "=r" (x)
+		: "r" (err), "r" (addr), "P" (-EFAULT)
+		: "D0FrT");
+	return x;
+}
+EXPORT_SYMBOL(__get_user_asm_w);
+
+unsigned int __get_user_asm_d(const void __user *addr, long *err)
+{
+	register unsigned int x asm ("D0Re0") = 0;
+	asm volatile (
+		"	GETD %0,[%2]\n"
+		"1:\n"
+		"	GETD %0,[%2]\n"
+		"2:\n"
+		"	.section .fixup,\"ax\"\n"
+		"3:	MOV     D0FrT,%3\n"
+		"	SETD    [%1],D0FrT\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		"	.previous\n"
+		"	.section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		"	.previous\n"
+		: "=r" (x)
+		: "r" (err), "r" (addr), "P" (-EFAULT)
+		: "D0FrT");
+	return x;
+}
+EXPORT_SYMBOL(__get_user_asm_d);
+
+long __put_user_asm_b(unsigned int x, void __user *addr)
+{
+	register unsigned int err asm ("D0Re0") = 0;
+	asm volatile (
+		"	MOV  %0,#0\n"
+		"	SETB [%2],%1\n"
+		"1:\n"
+		"	SETB [%2],%1\n"
+		"2:\n"
+		".section .fixup,\"ax\"\n"
+		"3:	MOV     %0,%3\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		".previous"
+		: "=r"(err)
+		: "d" (x), "a" (addr), "P"(-EFAULT)
+		: "D0FrT");
+	return err;
+}
+EXPORT_SYMBOL(__put_user_asm_b);
+
+long __put_user_asm_w(unsigned int x, void __user *addr)
+{
+	register unsigned int err asm ("D0Re0") = 0;
+	asm volatile (
+		"	MOV  %0,#0\n"
+		"	SETW [%2],%1\n"
+		"1:\n"
+		"	SETW [%2],%1\n"
+		"2:\n"
+		".section .fixup,\"ax\"\n"
+		"3:	MOV     %0,%3\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		".previous"
+		: "=r"(err)
+		: "d" (x), "a" (addr), "P"(-EFAULT)
+		: "D0FrT");
+	return err;
+}
+EXPORT_SYMBOL(__put_user_asm_w);
+
+long __put_user_asm_d(unsigned int x, void __user *addr)
+{
+	register unsigned int err asm ("D0Re0") = 0;
+	asm volatile (
+		"	MOV  %0,#0\n"
+		"	SETD [%2],%1\n"
+		"1:\n"
+		"	SETD [%2],%1\n"
+		"2:\n"
+		".section .fixup,\"ax\"\n"
+		"3:	MOV     %0,%3\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		".previous"
+		: "=r"(err)
+		: "d" (x), "a" (addr), "P"(-EFAULT)
+		: "D0FrT");
+	return err;
+}
+EXPORT_SYMBOL(__put_user_asm_d);
+
+long __put_user_asm_l(unsigned long long x, void __user *addr)
+{
+	register unsigned int err asm ("D0Re0") = 0;
+	asm volatile (
+		"	MOV  %0,#0\n"
+		"	SETL [%2],%1,%t1\n"
+		"1:\n"
+		"	SETL [%2],%1,%t1\n"
+		"2:\n"
+		".section .fixup,\"ax\"\n"
+		"3:	MOV     %0,%3\n"
+		"	MOVT    D0FrT,#HI(2b)\n"
+		"	JUMP    D0FrT,#LO(2b)\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.long 1b,3b\n"
+		".previous"
+		: "=r"(err)
+		: "d" (x), "a" (addr), "P"(-EFAULT)
+		: "D0FrT");
+	return err;
+}
+EXPORT_SYMBOL(__put_user_asm_l);
+
+long strnlen_user(const char __user *src, long count)
+{
+	long res;
+
+	if (!access_ok(VERIFY_READ, src, 0))
+		return 0;
+
+	asm volatile ("	MOV     D0Ar4, %1\n"
+		      "	MOV     D0Ar6, %2\n"
+		      "0:\n"
+		      "	SUBS    D0FrT, D0Ar6, #0\n"
+		      "	SUB     D0Ar6, D0Ar6, #1\n"
+		      "	BLE     2f\n"
+		      "	GETB    D0FrT, [D0Ar4+#1++]\n"
+		      "1:\n"
+		      "	TST     D0FrT, #255\n"
+		      "	BNE     0b\n"
+		      "2:\n"
+		      "	SUB     %0, %2, D0Ar6\n"
+		      "3:\n"
+		      "	.section .fixup,\"ax\"\n"
+		      "4:\n"
+		      "	MOV     %0, #0\n"
+		      "	MOVT    D0FrT,#HI(3b)\n"
+		      "	JUMP    D0FrT,#LO(3b)\n"
+		      "	.previous\n"
+		      "	.section __ex_table,\"a\"\n"
+		      "	.long 1b,4b\n"
+		      "	.previous\n"
+		      : "=r" (res)
+		      : "r" (src), "r" (count)
+		      : "D0FrT", "D0Ar4", "D0Ar6", "cc");
+
+	return res;
+}
+EXPORT_SYMBOL(strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	long res;
+
+	if (count == 0)
+		return 0;
+
+	/*
+	 * Currently, in 2.4.0-test9, most ports use a simple byte-copy loop.
+	 *  So do we.
+	 *
+	 *  This code is deduced from:
+	 *
+	 *      char tmp2;
+	 *      long tmp1, tmp3;
+	 *      tmp1 = count;
+	 *      while ((*dst++ = (tmp2 = *src++)) != 0
+	 *             && --tmp1)
+	 *        ;
+	 *
+	 *      res = count - tmp1;
+	 *
+	 *  with tweaks.
+	 */
+
+	asm volatile ("	MOV  %0,%3\n"
+		      "1:\n"
+		      "	GETB D0FrT,[%2++]\n"
+		      "2:\n"
+		      "	CMP  D0FrT,#0\n"
+		      "	SETB [%1++],D0FrT\n"
+		      "	BEQ  3f\n"
+		      "	SUBS %0,%0,#1\n"
+		      "	BNZ  1b\n"
+		      "3:\n"
+		      "	SUB  %0,%3,%0\n"
+		      "4:\n"
+		      "	.section .fixup,\"ax\"\n"
+		      "5:\n"
+		      "	MOV  %0,%7\n"
+		      "	MOVT    D0FrT,#HI(4b)\n"
+		      "	JUMP    D0FrT,#LO(4b)\n"
+		      "	.previous\n"
+		      "	.section __ex_table,\"a\"\n"
+		      "	.long 2b,5b\n"
+		      "	.previous"
+		      : "=r" (res), "=r" (dst), "=r" (src), "=r" (count)
+		      : "3" (count), "1" (dst), "2" (src), "P" (-EFAULT)
+		      : "D0FrT", "memory", "cc");
+
+	return res;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig
new file mode 100644
index 0000000..cd7f2f2
--- /dev/null
+++ b/arch/metag/mm/Kconfig
@@ -0,0 +1,153 @@
+menu "Memory management options"
+
+config PAGE_OFFSET
+	hex "Kernel page offset address"
+	default "0x40000000"
+	help
+	  This option allows you to set the virtual address at which the
+	  kernel will be mapped to.
+endmenu
+
+config KERNEL_4M_PAGES
+	bool "Map kernel with 4MB pages"
+	depends on METAG_META21_MMU
+	default y
+	help
+	  Map the kernel with large pages to reduce TLB pressure.
+
+choice
+	prompt "User page size"
+	default PAGE_SIZE_4K
+
+config PAGE_SIZE_4K
+	bool "4kB"
+	help
+	  This is the default page size used by all Meta cores.
+
+config PAGE_SIZE_8K
+	bool "8kB"
+	depends on METAG_META21_MMU
+	help
+	  This enables 8kB pages as supported by Meta 2.x and later MMUs.
+
+config PAGE_SIZE_16K
+	bool "16kB"
+	depends on METAG_META21_MMU
+	help
+	  This enables 16kB pages as supported by Meta 2.x and later MMUs.
+
+endchoice
+
+config NUMA
+	bool "Non Uniform Memory Access (NUMA) Support"
+	help
+	  Some Meta systems have MMU-mappable on-chip memories with
+	  lower latencies than main memory. This enables support for
+	  these blocks by binding them to nodes and allowing
+	  memory policies to be used for prioritizing and controlling
+	  allocation behaviour.
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 10 32
+	default "10"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB.  Keep this in mind
+	  when choosing a value for this option.
+
+config METAG_L2C
+	bool "Level 2 Cache Support"
+	depends on METAG_META21
+	help
+	  Press y here to enable support for the Meta Level 2 (L2) cache. This
+	  will enable the cache at start up if it hasn't already been enabled
+	  by the bootloader.
+
+	  If the bootloader enables the L2 you must press y here to ensure the
+	  kernel takes the appropriate actions to keep the cache coherent.
+
+config NODES_SHIFT
+	int
+	default "1"
+	depends on NEED_MULTIPLE_NODES
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+	depends on !NUMA
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_STATIC
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+
+config MAX_ACTIVE_REGIONS
+	int
+	default "2" if SPARSEMEM
+	default "1"
+
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+	depends on METAG_META21_MMU
+
+choice
+	prompt "HugeTLB page size"
+	depends on METAG_META21_MMU && HUGETLB_PAGE
+	default HUGETLB_PAGE_SIZE_1M
+
+config HUGETLB_PAGE_SIZE_8K
+	bool "8kB"
+	depends on PAGE_SIZE_4K
+
+config HUGETLB_PAGE_SIZE_16K
+	bool "16kB"
+	depends on PAGE_SIZE_4K || PAGE_SIZE_8K
+
+config HUGETLB_PAGE_SIZE_32K
+	bool "32kB"
+
+config HUGETLB_PAGE_SIZE_64K
+	bool "64kB"
+
+config HUGETLB_PAGE_SIZE_128K
+	bool "128kB"
+
+config HUGETLB_PAGE_SIZE_256K
+	bool "256kB"
+
+config HUGETLB_PAGE_SIZE_512K
+	bool "512kB"
+
+config HUGETLB_PAGE_SIZE_1M
+	bool "1MB"
+
+config HUGETLB_PAGE_SIZE_2M
+	bool "2MB"
+
+config HUGETLB_PAGE_SIZE_4M
+	bool "4MB"
+
+endchoice
+
+config METAG_COREMEM
+	bool
+	default y if SUSPEND
+
+source "mm/Kconfig"
diff --git a/arch/metag/mm/Makefile b/arch/metag/mm/Makefile
new file mode 100644
index 0000000..9943311
--- /dev/null
+++ b/arch/metag/mm/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for the linux Meta-specific parts of the memory manager.
+#
+
+obj-y				+= cache.o
+obj-y				+= extable.o
+obj-y				+= fault.o
+obj-y				+= init.o
+obj-y				+= ioremap.o
+obj-y				+= maccess.o
+
+mmu-y				:= mmu-meta1.o
+mmu-$(CONFIG_METAG_META21_MMU)	:= mmu-meta2.o
+obj-y				+= $(mmu-y)
+
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_METAG_L2C)		+= l2cache.o
+obj-$(CONFIG_NUMA)		+= numa.o
diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c
new file mode 100644
index 0000000..b5d3b2e
--- /dev/null
+++ b/arch/metag/mm/cache.c
@@ -0,0 +1,521 @@
+/*
+ * arch/metag/mm/cache.c
+ *
+ * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Cache control code
+ */
+
+#include <linux/export.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define DEFAULT_CACHE_WAYS_LOG2	2
+
+/*
+ * Size of a set in the caches. Initialised for default 16K stride, adjusted
+ * according to values passed through TBI global heap segment via LDLK (on ATP)
+ * or config registers (on HTP/MTP)
+ */
+static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+					- DEFAULT_CACHE_WAYS_LOG2;
+static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+					- DEFAULT_CACHE_WAYS_LOG2;
+/*
+ * The number of sets in the caches. Initialised for HTP/ATP, adjusted
+ * according to NOMMU setting in config registers
+ */
+static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+
+#ifndef CONFIG_METAG_META12
+/**
+ * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
+ */
+static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
+
+#define LNKGET_CONSTANT 0xdeadbeef
+
+void __init metag_lnkget_probe(void)
+{
+	int temp;
+	long flags;
+
+	/*
+	 * It's conceivable the user has configured a globally coherent cache
+	 * shared with non-Linux hardware threads, so use LOCK2 to prevent them
+	 * from executing and causing cache eviction during the test.
+	 */
+	__global_lock2(flags);
+
+	/* read a value to bring it into the cache */
+	(void)lnkget_testdata[0];
+	lnkget_testdata[0] = 0;
+
+	/* lnkget/lnkset it to modify it */
+	asm volatile(
+		"1:	LNKGETD %0, [%1]\n"
+		"	LNKSETD [%1], %2\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
+		: "cc");
+
+	/* re-read it to see if the cached value changed */
+	temp = lnkget_testdata[0];
+
+	__global_unlock2(flags);
+
+	/* flush the cache line to fix any incoherency */
+	__builtin_dcache_flush((void *)&lnkget_testdata[0]);
+
+#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
+	/* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
+	if (temp == LNKGET_CONSTANT)
+		pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+	/*
+	 * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
+	 * because the kernel is configured to use LNKGET/SET for atomicity
+	 */
+	WARN(temp != LNKGET_CONSTANT,
+	     "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+	     "Expect kernel failure as it's used for atomicity primitives\n");
+#elif defined(CONFIG_SMP)
+	/*
+	 * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
+	 * gateway page won't flush and userland could break.
+	 */
+	WARN(temp != LNKGET_CONSTANT,
+	     "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+	     "Expect userland failure as it's used for user gateway page\n");
+#else
+	/*
+	 * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
+	 * doesn't actually matter as it doesn't have any effect on !SMP &&
+	 * !ATOMICITY_LNKGET.
+	 */
+	if (temp != LNKGET_CONSTANT)
+		pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
+#endif
+}
+#endif /* !CONFIG_METAG_META12 */
+
+/**
+ * metag_cache_probe() - Probe L1 cache configuration.
+ *
+ * Probe the L1 cache configuration to aid the L1 physical cache flushing
+ * functions.
+ */
+void __init metag_cache_probe(void)
+{
+#ifndef CONFIG_METAG_META12
+	int coreid = metag_in32(METAC_CORE_ID);
+	int config = metag_in32(METAC_CORE_CONFIG2);
+	int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
+
+	if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
+	    cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
+		icache_sets_log2 = 1;
+		dcache_sets_log2 = 1;
+	}
+
+	/* For normal size caches, the smallest size is 4Kb.
+	   For small caches, the smallest size is 64b */
+	icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
+				? 6 : 12;
+	icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
+				>> METAC_CORE_C2ICSZ_S;
+	icache_set_shift -= icache_sets_log2;
+
+	dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
+				? 6 : 12;
+	dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
+				>> METAC_CORECFG2_DCSZ_S;
+	dcache_set_shift -= dcache_sets_log2;
+
+	metag_lnkget_probe();
+#else
+	/* Extract cache sizes from global heap segment */
+	unsigned long val, u;
+	int width, shift, addend;
+	PTBISEG seg;
+
+	seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+					  TBID_SEGSCOPE_GLOBAL,
+					  TBID_SEGTYPE_HEAP));
+	if (seg != NULL) {
+		val = seg->Data[1];
+
+		/* Work out width of I-cache size bit-field */
+		u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
+		       >> METAG_TBI_ICACHE_SIZE_S;
+		width = 0;
+		while (u & 1) {
+			width++;
+			u >>= 1;
+		}
+		/* Extract sign-extended size addend value */
+		shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
+		addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
+				 << shift)
+			>> (shift + METAG_TBI_ICACHE_SIZE_S);
+		/* Now calculate I-cache set size */
+		icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+				    - DEFAULT_CACHE_WAYS_LOG2)
+					+ addend;
+
+		/* Similarly for D-cache */
+		u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
+		       >> METAG_TBI_DCACHE_SIZE_S;
+		width = 0;
+		while (u & 1) {
+			width++;
+			u >>= 1;
+		}
+		shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
+		addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
+				 << shift)
+			>> (shift + METAG_TBI_DCACHE_SIZE_S);
+		dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+				    - DEFAULT_CACHE_WAYS_LOG2)
+					+ addend;
+	}
+#endif
+}
+
+static void metag_phys_data_cache_flush(const void *start)
+{
+	unsigned long flush0, flush1, flush2, flush3;
+	int loops, step;
+	int thread;
+	int part, offset;
+	int set_shift;
+
+	/* Use a sequence of writes to flush the cache region requested */
+	thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+					  >> TXENABLE_THREAD_S;
+
+	/* Cache is broken into sets which lie in contiguous RAMs */
+	set_shift = dcache_set_shift;
+
+	/* Move to the base of the physical cache flush region */
+	flush0 = LINSYSCFLUSH_DCACHE_LINE;
+	step   = 64;
+
+	/* Get partition data for this thread */
+	part = metag_in32(SYSC_DCPART0 +
+			      (SYSC_xCPARTn_STRIDE * thread));
+
+	if ((int)start < 0)
+		/* Access Global vs Local partition */
+		part >>= SYSC_xCPARTG_AND_S
+			- SYSC_xCPARTL_AND_S;
+
+	/* Extract offset and move SetOff */
+	offset = (part & SYSC_xCPARTL_OR_BITS)
+			>> SYSC_xCPARTL_OR_S;
+	flush0 += (offset << (set_shift - 4));
+
+	/* Shrink size */
+	part = (part & SYSC_xCPARTL_AND_BITS)
+			>> SYSC_xCPARTL_AND_S;
+	loops = ((part + 1) << (set_shift - 4));
+
+	/* Reduce loops by step of cache line size */
+	loops /= step;
+
+	flush1 = flush0 + (1 << set_shift);
+	flush2 = flush0 + (2 << set_shift);
+	flush3 = flush0 + (3 << set_shift);
+
+	if (dcache_sets_log2 == 1) {
+		flush2 = flush1;
+		flush3 = flush1 + step;
+		flush1 = flush0 + step;
+		step  <<= 1;
+		loops >>= 1;
+	}
+
+	/* Clear loops ways in cache */
+	while (loops-- != 0) {
+		/* Clear the ways. */
+#if 0
+		/*
+		 * GCC doesn't generate very good code for this so we
+		 * provide inline assembly instead.
+		 */
+		metag_out8(0, flush0);
+		metag_out8(0, flush1);
+		metag_out8(0, flush2);
+		metag_out8(0, flush3);
+
+		flush0 += step;
+		flush1 += step;
+		flush2 += step;
+		flush3 += step;
+#else
+		asm volatile (
+			"SETB\t[%0+%4++],%5\n"
+			"SETB\t[%1+%4++],%5\n"
+			"SETB\t[%2+%4++],%5\n"
+			"SETB\t[%3+%4++],%5\n"
+			: "+e" (flush0),
+			  "+e" (flush1),
+			  "+e" (flush2),
+			  "+e" (flush3)
+			: "e" (step), "a" (0));
+#endif
+	}
+}
+
+void metag_data_cache_flush_all(const void *start)
+{
+	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+		/* No need to flush the data cache it's not actually enabled */
+		return;
+
+	metag_phys_data_cache_flush(start);
+}
+
+void metag_data_cache_flush(const void *start, int bytes)
+{
+	unsigned long flush0;
+	int loops, step;
+
+	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+		/* No need to flush the data cache it's not actually enabled */
+		return;
+
+	if (bytes >= 4096) {
+		metag_phys_data_cache_flush(start);
+		return;
+	}
+
+	/* Use linear cache flush mechanism on META IP */
+	flush0 = (int)start;
+	loops  = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
+					(DCACHE_LINE_BYTES - 1);
+	loops  >>= DCACHE_LINE_S;
+
+#define PRIM_FLUSH(addr, offset) do {			\
+	int __addr = ((int) (addr)) + ((offset) * 64);	\
+	__builtin_dcache_flush((void *)(__addr));	\
+	} while (0)
+
+#define LOOP_INC (4*64)
+
+	do {
+		/* By default stop */
+		step = 0;
+
+		switch (loops) {
+		/* Drop Thru Cases! */
+		default:
+			PRIM_FLUSH(flush0, 3);
+			loops -= 4;
+			step = 1;
+		case 3:
+			PRIM_FLUSH(flush0, 2);
+		case 2:
+			PRIM_FLUSH(flush0, 1);
+		case 1:
+			PRIM_FLUSH(flush0, 0);
+			flush0 += LOOP_INC;
+		case 0:
+			break;
+		}
+	} while (step);
+}
+EXPORT_SYMBOL(metag_data_cache_flush);
+
+static void metag_phys_code_cache_flush(const void *start, int bytes)
+{
+	unsigned long flush0, flush1, flush2, flush3, end_set;
+	int loops, step;
+	int thread;
+	int set_shift, set_size;
+	int part, offset;
+
+	/* Use a sequence of writes to flush the cache region requested */
+	thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+					  >> TXENABLE_THREAD_S;
+	set_shift = icache_set_shift;
+
+	/* Move to the base of the physical cache flush region */
+	flush0 = LINSYSCFLUSH_ICACHE_LINE;
+	step   = 64;
+
+	/* Get partition code for this thread */
+	part = metag_in32(SYSC_ICPART0 +
+			  (SYSC_xCPARTn_STRIDE * thread));
+
+	if ((int)start < 0)
+		/* Access Global vs Local partition */
+		part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
+
+	/* Extract offset and move SetOff */
+	offset = (part & SYSC_xCPARTL_OR_BITS)
+			>> SYSC_xCPARTL_OR_S;
+	flush0 += (offset << (set_shift - 4));
+
+	/* Shrink size */
+	part = (part & SYSC_xCPARTL_AND_BITS)
+			>> SYSC_xCPARTL_AND_S;
+	loops = ((part + 1) << (set_shift - 4));
+
+	/* Where does the Set end? */
+	end_set = flush0 + loops;
+	set_size = loops;
+
+#ifdef CONFIG_METAG_META12
+	if ((bytes < 4096) && (bytes < loops)) {
+		/* Unreachable on HTP/MTP */
+		/* Only target the sets that could be relavent */
+		flush0 += (loops - step) & ((int) start);
+		loops = (((int) start) & (step-1)) + bytes + step - 1;
+	}
+#endif
+
+	/* Reduce loops by step of cache line size */
+	loops /= step;
+
+	flush1 = flush0 + (1<<set_shift);
+	flush2 = flush0 + (2<<set_shift);
+	flush3 = flush0 + (3<<set_shift);
+
+	if (icache_sets_log2 == 1) {
+		flush2 = flush1;
+		flush3 = flush1 + step;
+		flush1 = flush0 + step;
+#if 0
+		/* flush0 will stop one line early in this case
+		 * (flush1 will do the final line).
+		 * However we don't correct end_set here at the moment
+		 * because it will never wrap on HTP/MTP
+		 */
+		end_set -= step;
+#endif
+		step  <<= 1;
+		loops >>= 1;
+	}
+
+	/* Clear loops ways in cache */
+	while (loops-- != 0) {
+#if 0
+		/*
+		 * GCC doesn't generate very good code for this so we
+		 * provide inline assembly instead.
+		 */
+		/* Clear the ways */
+		metag_out8(0, flush0);
+		metag_out8(0, flush1);
+		metag_out8(0, flush2);
+		metag_out8(0, flush3);
+
+		flush0 += step;
+		flush1 += step;
+		flush2 += step;
+		flush3 += step;
+#else
+		asm volatile (
+			"SETB\t[%0+%4++],%5\n"
+			"SETB\t[%1+%4++],%5\n"
+			"SETB\t[%2+%4++],%5\n"
+			"SETB\t[%3+%4++],%5\n"
+			: "+e" (flush0),
+			  "+e" (flush1),
+			  "+e" (flush2),
+			  "+e" (flush3)
+			: "e" (step), "a" (0));
+#endif
+
+		if (flush0 == end_set) {
+			/* Wrap within Set 0 */
+			flush0 -= set_size;
+			flush1 -= set_size;
+			flush2 -= set_size;
+			flush3 -= set_size;
+		}
+	}
+}
+
+void metag_code_cache_flush_all(const void *start)
+{
+	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+		/* No need to flush the code cache it's not actually enabled */
+		return;
+
+	metag_phys_code_cache_flush(start, 4096);
+}
+EXPORT_SYMBOL(metag_code_cache_flush_all);
+
+void metag_code_cache_flush(const void *start, int bytes)
+{
+#ifndef CONFIG_METAG_META12
+	void *flush;
+	int loops, step;
+#endif /* !CONFIG_METAG_META12 */
+
+	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+		/* No need to flush the code cache it's not actually enabled */
+		return;
+
+#ifdef CONFIG_METAG_META12
+	/* CACHEWD isn't available on Meta1, so always do full cache flush */
+	metag_phys_code_cache_flush(start, bytes);
+
+#else /* CONFIG_METAG_META12 */
+	/* If large size do full physical cache flush */
+	if (bytes >= 4096) {
+		metag_phys_code_cache_flush(start, bytes);
+		return;
+	}
+
+	/* Use linear cache flush mechanism on META IP */
+	flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
+	loops  = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
+		(ICACHE_LINE_BYTES-1);
+	loops  >>= ICACHE_LINE_S;
+
+#define PRIM_IFLUSH(addr, offset) \
+	__builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
+
+#define LOOP_INC (4*64)
+
+	do {
+		/* By default stop */
+		step = 0;
+
+		switch (loops) {
+		/* Drop Thru Cases! */
+		default:
+			PRIM_IFLUSH(flush, 3);
+			loops -= 4;
+			step = 1;
+		case 3:
+			PRIM_IFLUSH(flush, 2);
+		case 2:
+			PRIM_IFLUSH(flush, 1);
+		case 1:
+			PRIM_IFLUSH(flush, 0);
+			flush += LOOP_INC;
+		case 0:
+			break;
+		}
+	} while (step);
+#endif /* !CONFIG_METAG_META12 */
+}
+EXPORT_SYMBOL(metag_code_cache_flush);
diff --git a/arch/metag/mm/extable.c b/arch/metag/mm/extable.c
new file mode 100644
index 0000000..2a21eae
--- /dev/null
+++ b/arch/metag/mm/extable.c
@@ -0,0 +1,15 @@
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+	unsigned long pc = instruction_pointer(regs);
+
+	fixup = search_exception_tables(pc);
+	if (fixup)
+		regs->ctx.CurrPC = fixup->fixup;
+
+	return fixup != NULL;
+}
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
new file mode 100644
index 0000000..2c75bf7
--- /dev/null
+++ b/arch/metag/mm/fault.c
@@ -0,0 +1,239 @@
+/*
+ *  Meta page fault handling.
+ *
+ *  Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ */
+
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#include <asm/traps.h>
+
+/* Clear any pending catch buffer state. */
+static void clear_cbuf_entry(struct pt_regs *regs, unsigned long addr,
+			     unsigned int trapno)
+{
+	PTBICTXEXTCB0 cbuf = regs->extcb0;
+
+	switch (trapno) {
+		/* Instruction fetch faults leave no catch buffer state. */
+	case TBIXXF_SIGNUM_IGF:
+	case TBIXXF_SIGNUM_IPF:
+		return;
+	default:
+		if (cbuf[0].CBAddr == addr) {
+			cbuf[0].CBAddr = 0;
+			cbuf[0].CBFlags &= ~TXCATCH0_FAULT_BITS;
+
+			/* And, as this is the ONLY catch entry, we
+			 * need to clear the cbuf bit from the context!
+			 */
+			regs->ctx.SaveMask &= ~(TBICTX_CBUF_BIT |
+						TBICTX_XCBF_BIT);
+
+			return;
+		}
+		pr_err("Failed to clear cbuf entry!\n");
+	}
+}
+
+int show_unhandled_signals = 1;
+
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+		  unsigned int write_access, unsigned int trapno)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma, *prev_vma;
+	siginfo_t info;
+	int fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+				(write_access ? FAULT_FLAG_WRITE : 0);
+
+	tsk = current;
+
+	if ((address >= VMALLOC_START) && (address < VMALLOC_END)) {
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int offset = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+		pud_t *pud, *pud_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd = ((pgd_t *)mmu_get_base()) + offset;
+		pgd_k = swapper_pg_dir + offset;
+
+		/* This will never happen with the folded page table. */
+		if (!pgd_present(*pgd)) {
+			if (!pgd_present(*pgd_k))
+				goto bad_area_nosemaphore;
+			set_pgd(pgd, *pgd_k);
+			return 0;
+		}
+
+		pud = pud_offset(pgd, address);
+		pud_k = pud_offset(pgd_k, address);
+		if (!pud_present(*pud_k))
+			goto bad_area_nosemaphore;
+		set_pud(pud, *pud_k);
+
+		pmd = pmd_offset(pud, address);
+		pmd_k = pmd_offset(pud_k, address);
+		if (!pmd_present(*pmd_k))
+			goto bad_area_nosemaphore;
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset_kernel(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto bad_area_nosemaphore;
+
+		/* May only be needed on Chorus2 */
+		flush_tlb_all();
+		return 0;
+	}
+
+	mm = tsk->mm;
+
+	if (in_atomic() || !mm)
+		goto no_context;
+
+retry:
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma_prev(mm, address, &prev_vma);
+
+	if (!vma || address < vma->vm_start)
+		goto check_expansion;
+
+good_area:
+	if (write_access) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return 0;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR)
+			tsk->maj_flt++;
+		else
+			tsk->min_flt++;
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			/*
+			 * No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+	return 0;
+
+check_expansion:
+	vma = prev_vma;
+	if (vma && (expand_stack(vma, address) == 0))
+		goto good_area;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	if (user_mode(regs)) {
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = SEGV_MAPERR;
+		info.si_addr = (__force void __user *)address;
+		info.si_trapno = trapno;
+
+		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+		    printk_ratelimit()) {
+			pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
+			       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+			       tsk->comm, task_pid_nr(tsk), address,
+			       regs->ctx.CurrPC, regs->ctx.AX[0].U0,
+			       write_access, trapno, trap_name(trapno));
+			print_vma_addr(" in ", regs->ctx.CurrPC);
+			print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+			printk("\n");
+			show_regs(regs);
+		}
+		force_sig_info(SIGSEGV, &info, tsk);
+		return 1;
+	}
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (__force void __user *)address;
+	info.si_trapno = trapno;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+
+	return 1;
+
+	/*
+	 * We ran out of memory, or some other thing happened to us that made
+	 * us unable to handle the page fault gracefully.
+	 */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs))
+		do_group_exit(SIGKILL);
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs)) {
+		clear_cbuf_entry(regs, address, trapno);
+		return 1;
+	}
+
+	die("Oops", regs, (write_access << 15) | trapno, address);
+	do_exit(SIGKILL);
+}
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
new file mode 100644
index 0000000..d71f621
--- /dev/null
+++ b/arch/metag/mm/highmem.c
@@ -0,0 +1,133 @@
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+
+static pte_t *kmap_pte;
+
+unsigned long highstart_pfn, highend_pfn;
+
+void *kmap(struct page *page)
+{
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
+ * no global lock is needed and because the kmap code must perform a global TLB
+ * invalidation when the kmap pool wraps.
+ *
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+
+void *kmap_atomic(struct page *page)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+	int type;
+
+	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	type = kmap_atomic_idx_push();
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+	set_pte(kmap_pte - idx, mk_pte(page, PAGE_KERNEL));
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	int idx, type;
+
+	if (kvaddr >= (void *)FIXADDR_START) {
+		type = kmap_atomic_idx();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+		/*
+		 * Force other mappings to Oops if they'll try to access this
+		 * pte without first remap it.  Keeping stale mappings around
+		 * is a bad idea also, in case the page changes cacheability
+		 * attributes or becomes a protected page in a hypervisor.
+		 */
+		pte_clear(&init_mm, vaddr, kmap_pte-idx);
+		flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+		kmap_atomic_idx_pop();
+	}
+
+	pagefault_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+	int type;
+
+	pagefault_disable();
+
+	type = kmap_atomic_idx_push();
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+	set_pte(kmap_pte - idx, pfn_pte(pfn, PAGE_KERNEL));
+	flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+	return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+	unsigned long vaddr = (unsigned long)ptr;
+	int idx;
+	pte_t *pte;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	idx = virt_to_fix(vaddr);
+	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
+	return pte_page(*pte);
+}
+
+void __init kmap_init(void)
+{
+	unsigned long kmap_vstart;
+
+	/* cache the first kmap pte */
+	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+}
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
new file mode 100644
index 0000000..3c52fa6
--- /dev/null
+++ b/arch/metag/mm/hugetlbpage.c
@@ -0,0 +1,259 @@
+/*
+ * arch/metag/mm/hugetlbpage.c
+ *
+ * METAG HugeTLB page support.
+ *
+ * Cloned from SuperH
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+						unsigned long len)
+{
+	struct mm_struct *mm = current->mm;
+	struct hstate *h = hstate_file(file);
+	struct vm_area_struct *vma;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	if (TASK_SIZE - len < addr)
+		return -EINVAL;
+
+	vma = find_vma(mm, ALIGN_HUGEPT(addr));
+	if (vma && !(vma->vm_flags & MAP_HUGETLB))
+		return -EINVAL;
+
+	vma = find_vma(mm, addr);
+	if (vma) {
+		if (addr + len > vma->vm_start)
+			return -EINVAL;
+		if (!(vma->vm_flags & MAP_HUGETLB) &&
+		    (ALIGN_HUGEPT(addr + len) > vma->vm_start))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_offset(pgd, addr);
+	pmd = pmd_offset(pud, addr);
+	pte = pte_alloc_map(mm, NULL, pmd, addr);
+	pgd->pgd &= ~_PAGE_SZ_MASK;
+	pgd->pgd |= _PAGE_SZHUGE;
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_offset(pgd, addr);
+	pmd = pmd_offset(pud, addr);
+	pte = pte_offset_kernel(pmd, addr);
+
+	return pte;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+			      unsigned long address, int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_page_shift(pmd) > PAGE_SHIFT;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+/*
+ * Look for an unmapped area starting after another hugetlb vma.
+ * There are guaranteed to be no huge pte's spare if all the huge pages are
+ * full size (4MB), so in that case compile out this search.
+ */
+#if HPAGE_SHIFT == HUGEPT_SHIFT
+static inline unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+	return 0;
+}
+#else
+static unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr, addr;
+	int after_huge;
+
+	if (mm->context.part_huge) {
+		start_addr = mm->context.part_huge;
+		after_huge = 1;
+	} else {
+		start_addr = TASK_UNMAPPED_BASE;
+		after_huge = 0;
+	}
+new_search:
+	addr = start_addr;
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		if ((!vma && !after_huge) || TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = TASK_UNMAPPED_BASE;
+				goto new_search;
+			}
+			return 0;
+		}
+		/* skip ahead if we've aligned right over some vmas */
+		if (vma && vma->vm_end <= addr)
+			continue;
+		/* space before the next vma? */
+		if (after_huge && (!vma || ALIGN_HUGEPT(addr + len)
+			    <= vma->vm_start)) {
+			unsigned long end = addr + len;
+			if (end & HUGEPT_MASK)
+				mm->context.part_huge = end;
+			else if (addr == mm->context.part_huge)
+				mm->context.part_huge = 0;
+			return addr;
+		}
+		if (vma && (vma->vm_flags & MAP_HUGETLB)) {
+			/* space after a huge vma in 2nd level page table? */
+			if (vma->vm_end & HUGEPT_MASK) {
+				after_huge = 1;
+				/* no need to align to the next PT block */
+				addr = vma->vm_end;
+				continue;
+			}
+		}
+		after_huge = 0;
+		addr = ALIGN_HUGEPT(vma->vm_end);
+	}
+}
+#endif
+
+/* Do a full search to find an area without any nearby normal pages. */
+static unsigned long
+hugetlb_get_unmapped_area_new_pmd(unsigned long len)
+{
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = PAGE_MASK & HUGEPT_MASK;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		if (!prepare_hugepage_range(file, addr, len))
+			return addr;
+	}
+
+	/*
+	 * Look for an existing hugetlb vma with space after it (this is to to
+	 * minimise fragmentation caused by huge pages.
+	 */
+	addr = hugetlb_get_unmapped_area_existing(len);
+	if (addr)
+		return addr;
+
+	/*
+	 * Find an unmapped naturally aligned set of 4MB blocks that we can use
+	 * for huge pages.
+	 */
+	return hugetlb_get_unmapped_area_new_pmd(len);
+}
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+
+/* necessary for boot time 4MB huge page allocation */
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == (1 << HPAGE_SHIFT)) {
+		hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz: Unsupported page size %lu M\n",
+		       ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
new file mode 100644
index 0000000..504a398
--- /dev/null
+++ b/arch/metag/mm/init.c
@@ -0,0 +1,451 @@
+/*
+ *  Copyright (C) 2005,2006,2007,2008,2009,2010 Imagination Technologies
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/initrd.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/tlb.h>
+#include <asm/user_gateway.h>
+#include <asm/mmzone.h>
+#include <asm/fixmap.h>
+
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
+
+unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+void *gateway_page;
+
+/*
+ * Insert the gateway page into a set of page tables, creating the
+ * page tables if necessary.
+ */
+static void insert_gateway_page(pgd_t *pgd, unsigned long address)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	BUG_ON(!pgd_present(*pgd));
+
+	pud = pud_offset(pgd, address);
+	BUG_ON(!pud_present(*pud));
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd)) {
+		pte = alloc_bootmem_pages(PAGE_SIZE);
+		set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	set_pte(pte, pfn_pte(__pa(gateway_page) >> PAGE_SHIFT, PAGE_READONLY));
+}
+
+/* Alloc and map a page in a known location accessible to userspace. */
+static void __init user_gateway_init(void)
+{
+	unsigned long address = USER_GATEWAY_PAGE;
+	int offset = pgd_index(address);
+	pgd_t *pgd;
+
+	gateway_page = alloc_bootmem_pages(PAGE_SIZE);
+
+	pgd = swapper_pg_dir + offset;
+	insert_gateway_page(pgd, address);
+
+#ifdef CONFIG_METAG_META12
+	/*
+	 * Insert the gateway page into our current page tables even
+	 * though we've already inserted it into our reference page
+	 * table (swapper_pg_dir). This is because with a META1 mmu we
+	 * copy just the user address range and not the gateway page
+	 * entry on context switch, see switch_mmu().
+	 */
+	pgd = (pgd_t *)mmu_get_base() + offset;
+	insert_gateway_page(pgd, address);
+#endif /* CONFIG_METAG_META12 */
+
+	BUG_ON((&__user_gateway_end - &__user_gateway_start) > PAGE_SIZE);
+
+	gateway_page += (address & ~PAGE_MASK);
+
+	memcpy(gateway_page, &__user_gateway_start,
+	       &__user_gateway_end - &__user_gateway_start);
+
+	/*
+	 * We don't need to flush the TLB here, there should be no mapping
+	 * present at boot for this address and only valid mappings are in
+	 * the TLB (apart from on Meta 1.x, but those cached invalid
+	 * mappings should be impossible to hit here).
+	 *
+	 * We don't flush the code cache here even though we have written
+	 * code through the data cache and they may not be coherent. At
+	 * this point we assume there is no stale data in the code cache
+	 * for this address so there is no need to flush.
+	 */
+}
+
+static void __init allocate_pgdat(unsigned int nid)
+{
+	unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	unsigned long phys;
+#endif
+
+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	phys = __memblock_alloc_base(sizeof(struct pglist_data),
+				SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+	/* Retry with all of system memory */
+	if (!phys)
+		phys = __memblock_alloc_base(sizeof(struct pglist_data),
+					     SMP_CACHE_BYTES,
+					     memblock_end_of_DRAM());
+	if (!phys)
+		panic("Can't allocate pgdat for node %d\n", nid);
+
+	NODE_DATA(nid) = __va(phys);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+	unsigned long total_pages, paddr;
+	unsigned long end_pfn;
+	struct pglist_data *p;
+
+	p = NODE_DATA(nid);
+
+	/* Nothing to do.. */
+	if (!p->node_spanned_pages)
+		return;
+
+	end_pfn = p->node_start_pfn + p->node_spanned_pages;
+#ifdef CONFIG_HIGHMEM
+	if (end_pfn > max_low_pfn)
+		end_pfn = max_low_pfn;
+#endif
+
+	total_pages = bootmem_bootmap_pages(end_pfn - p->node_start_pfn);
+
+	paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+	if (!paddr)
+		panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+	init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/*
+	 * XXX Handle initial reservations for the system memory node
+	 * only for the moment, we'll refactor this later for handling
+	 * reservations in other nodes.
+	 */
+	if (nid == 0) {
+		struct memblock_region *reg;
+
+		/* Reserve the sections we're already using. */
+		for_each_memblock(reserved, reg) {
+			unsigned long size = reg->size;
+
+#ifdef CONFIG_HIGHMEM
+			/* ...but not highmem */
+			if (PFN_DOWN(reg->base) >= highstart_pfn)
+				continue;
+
+			if (PFN_UP(reg->base + size) > highstart_pfn)
+				size = (highstart_pfn - PFN_DOWN(reg->base))
+				       << PAGE_SHIFT;
+#endif
+
+			reserve_bootmem(reg->base, size, BOOTMEM_DEFAULT);
+		}
+	}
+
+	sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+	struct memblock_region *reg;
+	int i;
+
+	/* Add active regions with valid PFNs. */
+	for_each_memblock(memory, reg) {
+		unsigned long start_pfn, end_pfn;
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
+		memblock_set_node(PFN_PHYS(start_pfn),
+				  PFN_PHYS(end_pfn - start_pfn), 0);
+	}
+
+	/* All of system RAM sits in node 0 for the non-NUMA case */
+	allocate_pgdat(0);
+	node_set_online(0);
+
+	soc_mem_setup();
+
+	for_each_online_node(i)
+		bootmem_init_one_node(i);
+
+	sparse_init();
+}
+
+extern char _heap_start[];
+
+static void __init init_and_reserve_mem(void)
+{
+	unsigned long start_pfn, heap_start;
+	u64 base = min_low_pfn << PAGE_SHIFT;
+	u64 size = (max_low_pfn << PAGE_SHIFT) - base;
+
+	heap_start = (unsigned long) &_heap_start;
+
+	memblock_add(base, size);
+
+	/*
+	 * Partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	start_pfn = PFN_UP(__pa(heap_start));
+
+	/*
+	 * Reserve the kernel text.
+	 */
+	memblock_reserve(base, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - base);
+
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Add & reserve highmem, so page structures are initialised.
+	 */
+	base = highstart_pfn << PAGE_SHIFT;
+	size = (highend_pfn << PAGE_SHIFT) - base;
+	if (size) {
+		memblock_add(base, size);
+		memblock_reserve(base, size);
+	}
+#endif
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * Ensure we have allocated page tables in swapper_pg_dir for the
+ * fixed mappings range from 'start' to 'end'.
+ */
+static void __init allocate_pgtables(unsigned long start, unsigned long end)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = pgd_index(vaddr);
+	j = pmd_index(vaddr);
+	pgd = swapper_pg_dir + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pmd = (pmd_t *)pgd;
+		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+			vaddr += PMD_SIZE;
+
+			if (!pmd_none(*pmd))
+				continue;
+
+			pte = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pmd_populate_kernel(&init_mm, pmd, pte);
+		}
+		j = 0;
+	}
+}
+
+static void __init fixedrange_init(void)
+{
+	unsigned long vaddr, end;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/*
+	 * Fixed mappings:
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	allocate_pgtables(vaddr, end);
+
+	/*
+	 * Permanent kmaps:
+	 */
+	vaddr = PKMAP_BASE;
+	allocate_pgtables(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP);
+
+	pgd = swapper_pg_dir + pgd_index(vaddr);
+	pud = pud_offset(pgd, vaddr);
+	pmd = pmd_offset(pud, vaddr);
+	pte = pte_offset_kernel(pmd, vaddr);
+	pkmap_page_table = pte;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * paging_init() continues the virtual memory environment setup which
+ * was begun by the code in arch/metag/kernel/setup.c.
+ */
+void __init paging_init(unsigned long mem_end)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	int nid;
+
+	init_and_reserve_mem();
+
+	memblock_allow_resize();
+
+	memblock_dump_all();
+
+	nodes_clear(node_online_map);
+
+	init_new_context(&init_task, &init_mm);
+
+	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+	do_init_bootmem();
+	mmu_init(mem_end);
+
+#ifdef CONFIG_HIGHMEM
+	fixedrange_init();
+	kmap_init();
+#endif
+
+	/* Initialize the zero page to a bootmem page, already zeroed. */
+	empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+
+	user_gateway_init();
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long low, start_pfn;
+
+		start_pfn = pgdat->bdata->node_min_pfn;
+		low = pgdat->bdata->node_low_pfn;
+
+		if (max_zone_pfns[ZONE_NORMAL] < low)
+			max_zone_pfns[ZONE_NORMAL] = low;
+
+#ifdef CONFIG_HIGHMEM
+		max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
+#endif
+		pr_info("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+			nid, start_pfn, low);
+	}
+
+	free_area_init_nodes(max_zone_pfns);
+}
+
+void __init mem_init(void)
+{
+	int nid;
+
+#ifdef CONFIG_HIGHMEM
+	unsigned long tmp;
+	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+		struct page *page = pfn_to_page(tmp);
+		ClearPageReserved(page);
+		init_page_count(page);
+		__free_page(page);
+		totalhigh_pages++;
+	}
+	totalram_pages += totalhigh_pages;
+	num_physpages += totalhigh_pages;
+#endif /* CONFIG_HIGHMEM */
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long node_pages = 0;
+
+		num_physpages += pgdat->node_present_pages;
+
+		if (pgdat->node_spanned_pages)
+			node_pages = free_all_bootmem_node(pgdat);
+
+		totalram_pages += node_pages;
+	}
+
+	pr_info("Memory: %luk/%luk available\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+		num_physpages << (PAGE_SHIFT - 10));
+
+	show_mem(0);
+
+	return;
+}
+
+static void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long)(&__init_begin),
+			(unsigned long)(&__init_end));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	end = end & PAGE_MASK;
+	free_init_pages("initrd memory", start, end);
+}
+#endif
+
+#ifdef CONFIG_OF_FLATTREE
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+					    unsigned long end)
+{
+	pr_err("%s(%lx, %lx)\n",
+	       __func__, start, end);
+}
+#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/metag/mm/ioremap.c b/arch/metag/mm/ioremap.c
new file mode 100644
index 0000000..a136a43
--- /dev/null
+++ b/arch/metag/mm/ioremap.c
@@ -0,0 +1,89 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * Needed for memory-mapped I/O devices mapped outside our normal DRAM
+ * window (that is, all memory-mapped I/O devices).
+ *
+ * Copyright (C) 1995,1996 Linus Torvalds
+ *
+ * Meta port based on CRIS-port by Axis Communications AB
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+			unsigned long flags)
+{
+	unsigned long addr;
+	struct vm_struct *area;
+	unsigned long offset, last_addr;
+	pgprot_t prot;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/* Custom region addresses are accessible and uncached by default. */
+	if (phys_addr >= LINSYSCUSTOM_BASE &&
+	    phys_addr < (LINSYSCUSTOM_BASE + LINSYSCUSTOM_LIMIT))
+		return (__force void __iomem *) phys_addr;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY |
+			_PAGE_ACCESSED | _PAGE_KERNEL | _PAGE_CACHE_WIN0 |
+			flags);
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = (unsigned long) area->addr;
+	if (ioremap_page_range(addr, addr + size, phys_addr, prot)) {
+		vunmap((void *) addr);
+		return NULL;
+	}
+	return (__force void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	struct vm_struct *p;
+
+	if ((__force unsigned long)addr >= LINSYSCUSTOM_BASE &&
+	    (__force unsigned long)addr < (LINSYSCUSTOM_BASE +
+					   LINSYSCUSTOM_LIMIT))
+		return;
+
+	p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+	if (unlikely(!p)) {
+		pr_err("iounmap: bad address %p\n", addr);
+		return;
+	}
+
+	kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/metag/mm/l2cache.c b/arch/metag/mm/l2cache.c
new file mode 100644
index 0000000..c64ee61
--- /dev/null
+++ b/arch/metag/mm/l2cache.c
@@ -0,0 +1,192 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+
+/* If non-0, then initialise the L2 cache */
+static int l2cache_init = 1;
+/* If non-0, then initialise the L2 cache prefetch */
+static int l2cache_init_pf = 1;
+
+int l2c_pfenable;
+
+static volatile u32 l2c_testdata[16] __initdata __aligned(64);
+
+static int __init parse_l2cache(char *p)
+{
+	char *cp = p;
+
+	if (get_option(&cp, &l2cache_init) != 1) {
+		pr_err("Bad l2cache parameter (%s)\n", p);
+		return 1;
+	}
+	return 0;
+}
+early_param("l2cache", parse_l2cache);
+
+static int __init parse_l2cache_pf(char *p)
+{
+	char *cp = p;
+
+	if (get_option(&cp, &l2cache_init_pf) != 1) {
+		pr_err("Bad l2cache_pf parameter (%s)\n", p);
+		return 1;
+	}
+	return 0;
+}
+early_param("l2cache_pf", parse_l2cache_pf);
+
+static int __init meta_l2c_setup(void)
+{
+	/*
+	 * If the L2 cache isn't even present, don't do anything, but say so in
+	 * the log.
+	 */
+	if (!meta_l2c_is_present()) {
+		pr_info("L2 Cache: Not present\n");
+		return 0;
+	}
+
+	/*
+	 * Check whether the line size is recognised.
+	 */
+	if (!meta_l2c_linesize()) {
+		pr_warn_once("L2 Cache: unknown line size id (config=0x%08x)\n",
+			     meta_l2c_config());
+	}
+
+	/*
+	 * Initialise state.
+	 */
+	l2c_pfenable = _meta_l2c_pf_is_enabled();
+
+	/*
+	 * Enable the L2 cache and print to log whether it was already enabled
+	 * by the bootloader.
+	 */
+	if (l2cache_init) {
+		pr_info("L2 Cache: Enabling... ");
+		if (meta_l2c_enable())
+			pr_cont("already enabled\n");
+		else
+			pr_cont("done\n");
+	} else {
+		pr_info("L2 Cache: Not enabling\n");
+	}
+
+	/*
+	 * Enable L2 cache prefetch.
+	 */
+	if (l2cache_init_pf) {
+		pr_info("L2 Cache: Enabling prefetch... ");
+		if (meta_l2c_pf_enable(1))
+			pr_cont("already enabled\n");
+		else
+			pr_cont("done\n");
+	} else {
+		pr_info("L2 Cache: Not enabling prefetch\n");
+	}
+
+	return 0;
+}
+core_initcall(meta_l2c_setup);
+
+int meta_l2c_disable(void)
+{
+	unsigned long flags;
+	int en;
+
+	if (!meta_l2c_is_present())
+		return 1;
+
+	/*
+	 * Prevent other threads writing during the writeback, otherwise the
+	 * writes will get "lost" when the L2 is disabled.
+	 */
+	__global_lock2(flags);
+	en = meta_l2c_is_enabled();
+	if (likely(en)) {
+		_meta_l2c_pf_enable(0);
+		wr_fence();
+		_meta_l2c_purge();
+		_meta_l2c_enable(0);
+	}
+	__global_unlock2(flags);
+
+	return !en;
+}
+
+int meta_l2c_enable(void)
+{
+	unsigned long flags;
+	int en;
+
+	if (!meta_l2c_is_present())
+		return 0;
+
+	/*
+	 * Init (clearing the L2) can happen while the L2 is disabled, so other
+	 * threads are safe to continue executing, however we must not init the
+	 * cache if it's already enabled (dirty lines would be discarded), so
+	 * this operation should still be atomic with other threads.
+	 */
+	__global_lock1(flags);
+	en = meta_l2c_is_enabled();
+	if (likely(!en)) {
+		_meta_l2c_init();
+		_meta_l2c_enable(1);
+		_meta_l2c_pf_enable(l2c_pfenable);
+	}
+	__global_unlock1(flags);
+
+	return en;
+}
+
+int meta_l2c_pf_enable(int pfenable)
+{
+	unsigned long flags;
+	int en = l2c_pfenable;
+
+	if (!meta_l2c_is_present())
+		return 0;
+
+	/*
+	 * We read modify write the enable register, so this operation must be
+	 * atomic with other threads.
+	 */
+	__global_lock1(flags);
+	en = l2c_pfenable;
+	l2c_pfenable = pfenable;
+	if (meta_l2c_is_enabled())
+		_meta_l2c_pf_enable(pfenable);
+	__global_unlock1(flags);
+
+	return en;
+}
+
+int meta_l2c_flush(void)
+{
+	unsigned long flags;
+	int en;
+
+	/*
+	 * Prevent other threads writing during the writeback. This also
+	 * involves read modify writes.
+	 */
+	__global_lock2(flags);
+	en = meta_l2c_is_enabled();
+	if (likely(en)) {
+		_meta_l2c_pf_enable(0);
+		wr_fence();
+		_meta_l2c_purge();
+		_meta_l2c_enable(0);
+		_meta_l2c_init();
+		_meta_l2c_enable(1);
+		_meta_l2c_pf_enable(l2c_pfenable);
+	}
+	__global_unlock2(flags);
+
+	return !en;
+}
diff --git a/arch/metag/mm/maccess.c b/arch/metag/mm/maccess.c
new file mode 100644
index 0000000..eba2cfc
--- /dev/null
+++ b/arch/metag/mm/maccess.c
@@ -0,0 +1,68 @@
+/*
+ * safe read and write memory routines callable while atomic
+ *
+ * Copyright 2012 Imagination Technologies
+ */
+
+#include <linux/uaccess.h>
+#include <asm/io.h>
+
+/*
+ * The generic probe_kernel_write() uses the user copy code which can split the
+ * writes if the source is unaligned, and repeats writes to make exceptions
+ * precise. We override it here to avoid these things happening to memory mapped
+ * IO memory where they could have undesired effects.
+ * Due to the use of CACHERD instruction this only works on Meta2 onwards.
+ */
+#ifdef CONFIG_METAG_META21
+long probe_kernel_write(void *dst, const void *src, size_t size)
+{
+	unsigned long ldst = (unsigned long)dst;
+	void __iomem *iodst = (void __iomem *)dst;
+	unsigned long lsrc = (unsigned long)src;
+	const u8 *psrc = (u8 *)src;
+	unsigned int pte, i;
+	u8 bounce[8] __aligned(8);
+
+	if (!size)
+		return 0;
+
+	/* Use the write combine bit to decide is the destination is MMIO. */
+	pte = __builtin_meta2_cacherd(dst);
+
+	/* Check the mapping is valid and writeable. */
+	if ((pte & (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+	    != (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+		return -EFAULT;
+
+	/* Fall back to generic version for cases we're not interested in. */
+	if (pte & MMCU_ENTRY_WRC_BIT	|| /* write combined memory */
+	    (ldst & (size - 1))		|| /* destination unaligned */
+	    size > 8			|| /* more than max write size */
+	    (size & (size - 1)))	   /* non power of 2 size */
+		return __probe_kernel_write(dst, src, size);
+
+	/* If src is unaligned, copy to the aligned bounce buffer first. */
+	if (lsrc & (size - 1)) {
+		for (i = 0; i < size; ++i)
+			bounce[i] = psrc[i];
+		psrc = bounce;
+	}
+
+	switch (size) {
+	case 1:
+		writeb(*psrc, iodst);
+		break;
+	case 2:
+		writew(*(const u16 *)psrc, iodst);
+		break;
+	case 4:
+		writel(*(const u32 *)psrc, iodst);
+		break;
+	case 8:
+		writeq(*(const u64 *)psrc, iodst);
+		break;
+	}
+	return 0;
+}
+#endif
diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c
new file mode 100644
index 0000000..91f4255
--- /dev/null
+++ b/arch/metag/mm/mmu-meta1.c
@@ -0,0 +1,157 @@
+/*
+ *  Copyright (C) 2005,2006,2007,2008,2009 Imagination Technologies
+ *
+ * Meta 1 MMU handling code.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#include <asm/mmu.h>
+
+#define DM3_BASE (LINSYSDIRECT_BASE + (MMCU_DIRECTMAPn_ADDR_SCALE * 3))
+
+/*
+ * This contains the physical address of the top level 2k pgd table.
+ */
+static unsigned long mmu_base_phys;
+
+/*
+ * Given a physical address, return a mapped virtual address that can be used
+ * to access that location.
+ * In practice, we use the DirectMap region to make this happen.
+ */
+static unsigned long map_addr(unsigned long phys)
+{
+	static unsigned long dm_base = 0xFFFFFFFF;
+	int offset;
+
+	offset = phys - dm_base;
+
+	/* Are we in the current map range ? */
+	if ((offset < 0) || (offset >= MMCU_DIRECTMAPn_ADDR_SCALE)) {
+		/* Calculate new DM area */
+		dm_base = phys & ~(MMCU_DIRECTMAPn_ADDR_SCALE - 1);
+
+		/* Actually map it in! */
+		metag_out32(dm_base, MMCU_DIRECTMAP3_ADDR);
+
+		/* And calculate how far into that area our reference is */
+		offset = phys - dm_base;
+	}
+
+	return DM3_BASE + offset;
+}
+
+/*
+ * Return the physical address of the base of our pgd table.
+ */
+static inline unsigned long __get_mmu_base(void)
+{
+	unsigned long base_phys;
+	unsigned int stride;
+
+	if (is_global_space(PAGE_OFFSET))
+		stride = 4;
+	else
+		stride = hard_processor_id();	/* [0..3] */
+
+	base_phys = metag_in32(MMCU_TABLE_PHYS_ADDR);
+	base_phys += (0x800 * stride);
+
+	return base_phys;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pgd */
+static unsigned long pgd_entry_addr(unsigned long virt)
+{
+	unsigned long pgd_phys;
+	unsigned long pgd_virt;
+
+	if (!mmu_base_phys)
+		mmu_base_phys = __get_mmu_base();
+
+	/*
+	 * Are we trying to map a global address.  If so, then index
+	 * the global pgd table instead of our local one.
+	 */
+	if (is_global_space(virt)) {
+		/* Scale into 2gig map */
+		virt &= ~0x80000000;
+	}
+
+	/* Base of the pgd table plus our 4Meg entry, 4bytes each */
+	pgd_phys = mmu_base_phys + ((virt >> PGDIR_SHIFT) * 4);
+
+	pgd_virt = map_addr(pgd_phys);
+
+	return pgd_virt;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pte */
+static unsigned long pgtable_entry_addr(unsigned long virt)
+{
+	unsigned long pgtable_phys;
+	unsigned long pgtable_virt, pte_virt;
+
+	/* Find the physical address of the 4MB page table*/
+	pgtable_phys = metag_in32(pgd_entry_addr(virt)) & MMCU_ENTRY_ADDR_BITS;
+
+	/* Map it to a virtual address */
+	pgtable_virt = map_addr(pgtable_phys);
+
+	/* And index into it for our pte */
+	pte_virt = pgtable_virt + ((virt >> PAGE_SHIFT) & 0x3FF) * 4;
+
+	return pte_virt;
+}
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+	return metag_in32(pgd_entry_addr(vaddr));
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+	return metag_in32(pgtable_entry_addr(vaddr));
+}
+
+unsigned long mmu_get_base(void)
+{
+	static unsigned long __base;
+
+	/* Find the base of our MMU pgd table */
+	if (!__base)
+		__base = pgd_entry_addr(0);
+
+	return __base;
+}
+
+void __init mmu_init(unsigned long mem_end)
+{
+	unsigned long entry, addr;
+	pgd_t *p_swapper_pg_dir;
+
+	/*
+	 * Now copy over any MMU pgd entries already in the mmu page tables
+	 * over to our root init process (swapper_pg_dir) map.  This map is
+	 * then inherited by all other processes, which means all processes
+	 * inherit a map of the kernel space.
+	 */
+	addr = PAGE_OFFSET;
+	entry = pgd_index(PAGE_OFFSET);
+	p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+	while (addr <= META_MEMORY_LIMIT) {
+		unsigned long pgd_entry;
+		/* copy over the current MMU value */
+		pgd_entry = mmu_read_first_level_page(addr);
+		pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+		p_swapper_pg_dir++;
+		addr += PGDIR_SIZE;
+		entry++;
+	}
+}
diff --git a/arch/metag/mm/mmu-meta2.c b/arch/metag/mm/mmu-meta2.c
new file mode 100644
index 0000000..81dcbb0
--- /dev/null
+++ b/arch/metag/mm/mmu-meta2.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2008,2009,2010,2011 Imagination Technologies Ltd.
+ *
+ * Meta 2 enhanced mode MMU handling code.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+	unsigned int cpu = hard_processor_id();
+	unsigned long offset, linear_base, linear_limit;
+	unsigned int phys0;
+	pgd_t *pgd, entry;
+
+	if (is_global_space(vaddr))
+		vaddr &= ~0x80000000;
+
+	offset = vaddr >> PGDIR_SHIFT;
+
+	phys0 = metag_in32(mmu_phys0_addr(cpu));
+
+	/* Top bit of linear base is always zero. */
+	linear_base = (phys0 >> PGDIR_SHIFT) & 0x1ff;
+
+	/* Limit in the range 0 (4MB) to 9 (2GB). */
+	linear_limit = 1 << ((phys0 >> 8) & 0xf);
+	linear_limit += linear_base;
+
+	/*
+	 * If offset is below linear base or above the limit then no
+	 * mapping exists.
+	 */
+	if (offset < linear_base || offset > linear_limit)
+		return 0;
+
+	offset -= linear_base;
+	pgd = (pgd_t *)mmu_get_base();
+	entry = pgd[offset];
+
+	return pgd_val(entry);
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+	return __builtin_meta2_cacherd((void *)(vaddr & PAGE_MASK));
+}
+
+unsigned long mmu_get_base(void)
+{
+	unsigned int cpu = hard_processor_id();
+	unsigned long stride;
+
+	stride = cpu * LINSYSMEMTnX_STRIDE;
+
+	/*
+	 * Bits 18:2 of the MMCU_TnLocal_TABLE_PHYS1 register should be
+	 * used as an offset to the start of the top-level pgd table.
+	 */
+	stride += (metag_in32(mmu_phys1_addr(cpu)) & 0x7fffc);
+
+	if (is_global_space(PAGE_OFFSET))
+		stride += LINSYSMEMTXG_OFFSET;
+
+	return LINSYSMEMT0L_BASE + stride;
+}
+
+#define FIRST_LEVEL_MASK	0xffffffc0
+#define SECOND_LEVEL_MASK	0xfffff000
+#define SECOND_LEVEL_ALIGN	64
+
+static void repriv_mmu_tables(void)
+{
+	unsigned long phys0_addr;
+	unsigned int g;
+
+	/*
+	 * Check that all the mmu table regions are priv protected, and if not
+	 * fix them and emit a warning. If we left them without priv protection
+	 * then userland processes would have access to a 2M window into
+	 * physical memory near where the page tables are.
+	 */
+	phys0_addr = MMCU_T0LOCAL_TABLE_PHYS0;
+	for (g = 0; g < 2; ++g) {
+		unsigned int t, phys0;
+		unsigned long flags;
+		for (t = 0; t < 4; ++t) {
+			__global_lock2(flags);
+			phys0 = metag_in32(phys0_addr);
+			if ((phys0 & _PAGE_PRESENT) && !(phys0 & _PAGE_PRIV)) {
+				pr_warn("Fixing priv protection on T%d %s MMU table region\n",
+					t,
+					g ? "global" : "local");
+				phys0 |= _PAGE_PRIV;
+				metag_out32(phys0, phys0_addr);
+			}
+			__global_unlock2(flags);
+
+			phys0_addr += MMCU_TnX_TABLE_PHYSX_STRIDE;
+		}
+
+		phys0_addr += MMCU_TXG_TABLE_PHYSX_OFFSET
+			    - 4*MMCU_TnX_TABLE_PHYSX_STRIDE;
+	}
+}
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+static void mmu_resume(void)
+{
+	/*
+	 * If a full suspend to RAM has happened then the original bad MMU table
+	 * priv may have been restored, so repriv them again.
+	 */
+	repriv_mmu_tables();
+}
+#else
+#define mmu_resume NULL
+#endif	/* CONFIG_METAG_SUSPEND_MEM */
+
+static struct syscore_ops mmu_syscore_ops = {
+	.resume  = mmu_resume,
+};
+
+void __init mmu_init(unsigned long mem_end)
+{
+	unsigned long entry, addr;
+	pgd_t *p_swapper_pg_dir;
+#ifdef CONFIG_KERNEL_4M_PAGES
+	unsigned long mem_size = mem_end - PAGE_OFFSET;
+	unsigned int pages = DIV_ROUND_UP(mem_size, 1 << 22);
+	unsigned int second_level_entry = 0;
+	unsigned long *second_level_table;
+#endif
+
+	/*
+	 * Now copy over any MMU pgd entries already in the mmu page tables
+	 * over to our root init process (swapper_pg_dir) map.  This map is
+	 * then inherited by all other processes, which means all processes
+	 * inherit a map of the kernel space.
+	 */
+	addr = META_MEMORY_BASE;
+	entry = pgd_index(META_MEMORY_BASE);
+	p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+	while (entry < (PTRS_PER_PGD - pgd_index(META_MEMORY_BASE))) {
+		unsigned long pgd_entry;
+		/* copy over the current MMU value */
+		pgd_entry = mmu_read_first_level_page(addr);
+		pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+		p_swapper_pg_dir++;
+		addr += PGDIR_SIZE;
+		entry++;
+	}
+
+#ifdef CONFIG_KERNEL_4M_PAGES
+	/*
+	 * At this point we can also map the kernel with 4MB pages to
+	 * reduce TLB pressure.
+	 */
+	second_level_table = alloc_bootmem_pages(SECOND_LEVEL_ALIGN * pages);
+
+	addr = PAGE_OFFSET;
+	entry = pgd_index(PAGE_OFFSET);
+	p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+	while (pages > 0) {
+		unsigned long phys_addr, second_level_phys;
+		pte_t *pte = (pte_t *)&second_level_table[second_level_entry];
+
+		phys_addr = __pa(addr);
+
+		second_level_phys = __pa(pte);
+
+		pgd_val(*p_swapper_pg_dir) = ((second_level_phys &
+					       FIRST_LEVEL_MASK) |
+					      _PAGE_SZ_4M |
+					      _PAGE_PRESENT);
+
+		pte_val(*pte) = ((phys_addr & SECOND_LEVEL_MASK) |
+				 _PAGE_PRESENT | _PAGE_DIRTY |
+				 _PAGE_ACCESSED | _PAGE_WRITE |
+				 _PAGE_CACHEABLE | _PAGE_KERNEL);
+
+		p_swapper_pg_dir++;
+		addr += PGDIR_SIZE;
+		/* Second level pages must be 64byte aligned. */
+		second_level_entry += (SECOND_LEVEL_ALIGN /
+				       sizeof(unsigned long));
+		pages--;
+	}
+	load_pgd(swapper_pg_dir, hard_processor_id());
+	flush_tlb_all();
+#endif
+
+	repriv_mmu_tables();
+	register_syscore_ops(&mmu_syscore_ops);
+}
diff --git a/arch/metag/mm/numa.c b/arch/metag/mm/numa.c
new file mode 100644
index 0000000..9ae578c
--- /dev/null
+++ b/arch/metag/mm/numa.c
@@ -0,0 +1,81 @@
+/*
+ *  Multiple memory node support for Meta machines
+ *
+ *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2010  Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <asm/sections.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL_GPL(node_data);
+
+extern char _heap_start[];
+
+/*
+ * On Meta machines the conventional approach is to stash system RAM
+ * in node 0, and other memory blocks in to node 1 and up, ordered by
+ * latency. Each node's pgdat is node-local at the beginning of the node,
+ * immediately followed by the node mem map.
+ */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+	unsigned long bootmap_pages, bootmem_paddr;
+	unsigned long start_pfn, end_pfn;
+	unsigned long pgdat_paddr;
+
+	/* Don't allow bogus node assignment */
+	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
+
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = end >> PAGE_SHIFT;
+
+	memblock_add(start, end - start);
+
+	memblock_set_node(PFN_PHYS(start_pfn),
+			  PFN_PHYS(end_pfn - start_pfn), nid);
+
+	/* Node-local pgdat */
+	pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
+					  SMP_CACHE_BYTES, end);
+	NODE_DATA(nid) = __va(pgdat_paddr);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+
+	/* Node-local bootmap */
+	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+	bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
+					    PAGE_SIZE, end);
+	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+			  start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/* Reserve the pgdat and bootmap space with the bootmem allocator */
+	reserve_bootmem_node(NODE_DATA(nid), pgdat_paddr & PAGE_MASK,
+			     sizeof(struct pglist_data), BOOTMEM_DEFAULT);
+	reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
+			     bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+	/* It's up */
+	node_set_online(nid);
+
+	/* Kick sparsemem */
+	sparse_memory_present_with_active_regions(nid);
+}
+
+void __init __weak soc_mem_setup(void)
+{
+}
diff --git a/arch/metag/tbx/Makefile b/arch/metag/tbx/Makefile
new file mode 100644
index 0000000..e994239
--- /dev/null
+++ b/arch/metag/tbx/Makefile
@@ -0,0 +1,21 @@
+#
+# Makefile for TBX library files..
+#
+
+asflags-y		+= -mmetac=2.1 -Wa,-mfpu=metac21 -mdsp
+asflags-$(CONFIG_SMP)	+= -DTBX_PERCPU_SP_SAVE
+
+ccflags-y		+= -mmetac=2.1
+
+lib-y			+= tbicore.o
+lib-y			+= tbictx.o
+lib-y			+= tbidefr.o
+lib-y			+= tbilogf.o
+lib-y			+= tbipcx.o
+lib-y			+= tbiroot.o
+lib-y			+= tbisoft.o
+lib-y			+= tbistring.o
+lib-y			+= tbitimer.o
+
+lib-$(CONFIG_METAG_DSP)	+= tbidspram.o
+lib-$(CONFIG_METAG_FPU)	+= tbictxfpu.o
diff --git a/arch/metag/tbx/tbicore.S b/arch/metag/tbx/tbicore.S
new file mode 100644
index 0000000..a0838eb
--- /dev/null
+++ b/arch/metag/tbx/tbicore.S
@@ -0,0 +1,136 @@
+/*
+ * tbicore.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Core functions needed to support use of the thread binary interface for META
+ * processors
+ */
+
+	.file	"tbicore.S"
+/* Get data structures and defines from the TBI C header */
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+	.data
+	.balign	8
+	.global	___pTBISegs
+	.type	___pTBISegs,object
+___pTBISegs:
+	.quad	0		/* Segment list pointer with it's */
+	.size	___pTBISegs,.-___pTBISegs
+					/* own id or spin-lock location */
+/*
+ * Return ___pTBISegs value specific to privilege level - not very complicated
+ * at the moment
+ *
+ * Register Usage: D0Re0 is the result, D1Re0 is used as a scratch
+ */
+	.text
+	.balign	4
+	.global	___TBISegList
+	.type	___TBISegList,function
+___TBISegList:
+	MOVT	A1LbP,#HI(___pTBISegs)
+	ADD	A1LbP,A1LbP,#LO(___pTBISegs)
+	GETL	D0Re0,D1Re0,[A1LbP]
+	MOV	PC,D1RtP
+	.size	___TBISegList,.-___TBISegList
+
+/*
+ * Search the segment list for a match given Id, pStart can be NULL
+ *
+ * Register Usage: D1Ar1 is pSeg, D0Ar2 is Id, D0Re0 is the result
+ *                 D0Ar4, D1Ar3 are used as a scratch
+ *                 NB: The PSTAT bit if Id in D0Ar2 may be toggled
+ */
+	.text
+	.balign	4
+	.global	___TBIFindSeg
+	.type	___TBIFindSeg,function
+___TBIFindSeg:
+	MOVT	A1LbP,#HI(___pTBISegs)
+	ADD	A1LbP,A1LbP,#LO(___pTBISegs)
+	GETL	D1Ar3,D0Ar4,[A1LbP]	/* Read segment list head */
+	MOV	D0Re0,TXSTATUS		/* What priv level are we at? */
+	CMP	D1Ar1,#0		/* Is pStart provided? */
+/* Disable privilege adaption for now */
+	ANDT	D0Re0,D0Re0,#0	/*HI(TXSTATUS_PSTAT_BIT)  ; Is PSTAT set? Zero if not */
+	LSL	D0Re0,D0Re0,#(TBID_PSTAT_S-TXSTATUS_PSTAT_S)
+	XOR	D0Ar2,D0Ar2,D0Re0	/* Toggle Id PSTAT if privileged */
+	MOVNZ	D1Ar3,D1Ar1		/* Use pStart if provided */
+$LFindSegLoop:			
+	ADDS	D0Re0,D1Ar3,#0		/* End of list? Load result into D0Re0 */
+	MOVZ	PC,D1RtP		/* If result is NULL we leave */
+	GETL	D1Ar3,D0Ar4,[D1Ar3]	/* Read pLink and Id */
+	CMP	D0Ar4,D0Ar2		/* Does it match? */
+	BNZ	$LFindSegLoop		/* Loop if there is no match */
+	TST	D0Re0,D0Re0		/* Clear zero flag - we found it! */
+	MOV	PC,D1RtP		/* Return */
+	.size	___TBIFindSeg,.-___TBIFindSeg
+
+/* Useful offsets to encode the lower bits of the lock/unlock addresses */
+#define UON  (LINSYSEVENT_WR_ATOMIC_LOCK   & 0xFFF8)
+#define UOFF (LINSYSEVENT_WR_ATOMIC_UNLOCK & 0xFFF8)
+
+/*
+ * Perform a whole spin-lock sequence as used by the TBISignal routine
+ *
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ *                 (All other usage due to ___TBIPoll - D0Ar6, D1Re0)
+ */
+	.text
+	.balign	4
+	.global	___TBISpin
+	.type	___TBISpin,function
+___TBISpin:
+	SETL	[A0StP++],D0FrT,D1RtP	/* Save our return address */
+	ORS	D0Re0,D0Re0,#1		/* Clear zero flag */
+	MOV	D1RtP,PC		/* Setup return address to form loop */
+$LSpinLoop:
+	BNZ	___TBIPoll		/* Keep repeating if fail to set */
+	GETL	D0FrT,D1RtP,[--A0StP]	/* Restore return address */
+	MOV	PC,D1RtP		/* Return */
+	.size	___TBISpin,.-___TBISpin
+
+/*
+ * Perform an attempt to gain access to a spin-lock and set some bits
+ * 
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ *                 !!On return Zero flag is SET if we are sucessfull!!
+ *                 A0.3 is used to hold base address of system event region
+ *                 D1Re0 use to hold TXMASKI while interrupts are off
+ */
+	.text
+	.balign	4
+	.global	___TBIPoll
+	.type	___TBIPoll,function
+___TBIPoll:
+	MOV	D1Re0,#0		/* Prepare to disable ints */
+	MOVT	A0.3,#HI(LINSYSEVENT_WR_ATOMIC_LOCK)
+	SWAP	D1Re0,TXMASKI		/* Really stop ints */
+	LOCK2				/* Gain all locks */
+	SET	[A0.3+#UON],D1RtP	/* Stop shared memory access too */
+	DCACHE	[D1Ar1],A0.3		/* Flush Cache line */
+	GETD	D0Re0,[D1Ar1]		/* Get new state from memory or hit */
+	DCACHE	[D1Ar1],A0.3		/* Flush Cache line */
+	GETD	D0Re0,[D1Ar1]		/* Get current state */
+	TST	D0Re0,D0Ar2		/* Are we clear to send? */
+	ORZ	D0Re0,D0Re0,D0Ar2	/* Yes: So set bits and */
+	SETDZ	[D1Ar1],D0Re0		/*      transmit new state */
+	SET	[A0.3+#UOFF],D1RtP	/* Allow shared memory access */
+	LOCK0				/* Release all locks */
+	MOV	TXMASKI,D1Re0		/* Allow ints */
+$LPollEnd:
+	XORNZ	D0Re0,D0Re0,D0Re0	/* No: Generate zero result */
+	MOV	PC,D1RtP		/* Return (NZ indicates failure) */
+	.size	___TBIPoll,.-___TBIPoll
+
+/*
+ * End of tbicore.S
+ */
diff --git a/arch/metag/tbx/tbictx.S b/arch/metag/tbx/tbictx.S
new file mode 100644
index 0000000..19af983
--- /dev/null
+++ b/arch/metag/tbx/tbictx.S
@@ -0,0 +1,366 @@
+/*
+ * tbictx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+	.file	"tbictx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is NOT saved in XAX3 */
+#define A0_4
+#else
+/* Ax.4 is saved in XAX4 */
+#define A0_4 A0.4,
+#endif
+
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+/*
+ * TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask )
+ */
+	.text
+	.balign	4
+	.global	___TBINestInts
+	.type	___TBINestInts,function
+___TBINestInts:
+	XOR	D0Ar4,D0Ar4,#-1			/* D0Ar4 = ~TrigBit */
+	AND	D0Ar4,D0Ar4,#0xFFFF		/* D0Ar4 &= 0xFFFF */
+	MOV	D0Ar6,TXMASKI			/* BGNDHALT currently enabled? */
+	TSTT	D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XCBF_BIT
+	AND	D0Ar4,D0Ar2,D0Ar4		/* D0Ar4 = Ints to allow */
+	XOR	D0Ar2,D0Ar2,D0Ar4		/* Less Ints in TrigMask */
+	BNZ	___TBINestInts2			/* Jump if ctx save required! */
+	TSTT	D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT	/* Is catch state dirty? */
+	OR	D0Ar4,D0Ar4,D0Ar6		/* Or in TXMASKI BGNDHALT if set */
+	TSTNZ	D0Ar4,D0Ar4			/* Yes: AND triggers enabled */
+	MOV	D0Re0,D0Ar2			/* Update State argument */
+	MOV	D1Re0,D1Ar1			/*  with less Ints in TrigMask */
+	MOVZ	TXMASKI,D0Ar4			/* Early return: Enable Ints */
+	MOVZ	PC,D1RtP			/* Early return */
+	.size	___TBINestInts,.-___TBINestInts
+/*
+ * Drop thru into sub-function-
+ */
+	.global	___TBINestInts2
+	.type	___TBINestInts2,function
+___TBINestInts2:
+	MOV	D0FrT,A0FrP			/* Full entry sequence so we */
+	ADD	A0FrP,A0StP,#0			/*     can make sub-calls */
+	MSETL	[A0StP],D0FrT,D0.5,D0.6		/*     and preserve our result */
+	ORT	D0Ar2,D0Ar2,#TBICTX_XCBF_BIT	/* Add in XCBF save request */
+	MOV	D0.5,D0Ar2			/* Save State in DX.5 */
+	MOV	D1.5,D1Ar1
+	OR	D0.6,D0Ar4,D0Ar6		/* Save TrigMask in D0.6 */
+	MOVT	D1RtP,#HI(___TBICtxSave)	/* Save catch buffer */
+	CALL	D1RtP,#LO(___TBICtxSave)
+	MOV	TXMASKI,D0.6			/* Allow Ints */
+	MOV	D0Re0,D0.5			/* Return State */
+	MOV	D1Re0,D1.5
+	MGETL	D0FrT,D0.5,D0.6,[A0FrP]		/* Full exit sequence */
+	SUB	A0StP,A0FrP,#(8*3)
+	MOV	A0FrP,D0FrT
+	MOV	PC,D1RtP
+	.size	___TBINestInts2,.-___TBINestInts2
+
+/*
+ * void *__TBICtxSave( TBIRES State, void *pExt )
+ *
+ *       D0Ar2 contains TBICTX_*_BIT values that control what
+ *          extended data is to be saved beyond the end of D1Ar1.
+ *       These bits must be ored into the SaveMask of this structure.
+ *
+ *       Virtually all possible scratch registers are used.
+ *
+ *       The D1Ar1 parameter is only used as the basis for saving
+ *       CBUF state.
+ */
+/*
+ *       If TBICTX_XEXT_BIT is specified in State. then State.pCtx->Ext is
+ *       utilised to save the base address of the context save area and
+ *       the extended states saved. The XEXT flag then indicates that the
+ *       original state of the A0.2 and A1.2 registers from TBICTX.Ext.AX2
+ *       are stored as the first part of the extended state structure.
+ */
+	.balign	4
+	.global	___TBICtxSave
+	.type	___TBICtxSave,function
+___TBICtxSave:
+	GETD	D0Re0,[D1Ar1+#TBICTX_SaveMask-2]	/* Get SaveMask */
+	TSTT	D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+						/* Just XCBF to save? */
+	MOV	A0.2,D1Ar3			/* Save pointer into A0.2 */
+	MOV	A1.2,D1RtP			/* Free off D0FrT:D1RtP pair */
+	BZ	$LCtxSaveCBUF			/* Yes: Only XCBF may be saved */
+	TSTT	D0Ar2,#TBICTX_XEXT_BIT		/* Extended base-state model? */
+	BZ	$LCtxSaveXDX8
+	GETL	D0Ar6,D1Ar5,[D1Ar1+#TBICTX_Ext_AX2]	/* Get A0.2, A1.2 state */
+	MOV	D0Ar4,D0Ar2			/* Extract Ctx.SaveFlags value */
+	ANDMT	D0Ar4,D0Ar4,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+	SETD	[D1Ar1+#TBICTX_Ext_Ctx_pExt],A0.2
+	SETD	[D1Ar1+#TBICTX_Ext_Ctx_SaveMask-2],D0Ar4
+	SETL	[A0.2++],D0Ar6,D1Ar5		/* Save A0.2, A1.2 state */
+$LCtxSaveXDX8:
+	TSTT	D0Ar2,#TBICTX_XDX8_BIT		/* Save extended DX regs? */
+	BZ	$LCtxSaveXAXX
+/*
+ * Save 8 extra DX registers
+ */
+	MSETL	[A0.2],D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15
+$LCtxSaveXAXX:
+	TSTT	D0Ar2,#TBICTX_XAXX_BIT		/* Save extended AX regs? */
+	SWAP	D0Re0,A0.2			/* pDst into D0Re0 */
+	BZ	$LCtxSaveXHL2
+/*
+ * Save 4 extra AX registers
+ */
+	MSETL	[D0Re0], A0_4 A0.5,A0.6,A0.7	/* Save 8*3 bytes */
+$LCtxSaveXHL2:
+	TSTT	D0Ar2,#TBICTX_XHL2_BIT		/* Save hardware-loop regs? */
+	SWAP	D0Re0,A0.2			/* pDst back into A0.2 */
+	MOV	D0Ar6,TXL1START
+	MOV	D1Ar5,TXL2START
+	BZ	$LCtxSaveXTDP
+/*
+ * Save hardware loop registers
+ */
+	SETL	[A0.2++],D0Ar6,D1Ar5		/* Save 8*1 bytes */
+	MOV	D0Ar6,TXL1END
+	MOV	D1Ar5,TXL2END
+	MOV	D0FrT,TXL1COUNT
+	MOV	D1RtP,TXL2COUNT
+	MSETL	[A0.2],D0Ar6,D0FrT		/* Save 8*2 bytes */
+/*
+ * Clear loop counters to disable any current loops
+ */
+	XOR	TXL1COUNT,D0FrT,D0FrT
+	XOR	TXL2COUNT,D1RtP,D1RtP
+$LCtxSaveXTDP:
+	TSTT	D0Ar2,#TBICTX_XTDP_BIT		/* Save per-thread DSP regs? */
+	BZ	$LCtxSaveCBUF
+/*
+ * Save per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+#ifndef CTX_NO_DSP
+D	SETL	[A0.2++],AC0.0,AC1.0		/* Save ACx.0 lower 32-bits */
+DH	SETL	[A0.2++],AC0.0,AC1.0		/* Save ACx.0 upper 32-bits */
+D	SETL	[A0.2++],D0AR.0,D1AR.0		/* Save DSP RAM registers */
+D	SETL	[A0.2++],D0AR.1,D1AR.1
+D	SETL	[A0.2++],D0AW.0,D1AW.0
+D	SETL	[A0.2++],D0AW.1,D1AW.1
+D	SETL	[A0.2++],D0BR.0,D1BR.0
+D	SETL	[A0.2++],D0BR.1,D1BR.1
+D	SETL	[A0.2++],D0BW.0,D1BW.0
+D	SETL	[A0.2++],D0BW.1,D1BW.1
+D	SETL	[A0.2++],D0ARI.0,D1ARI.0
+D	SETL	[A0.2++],D0ARI.1,D1ARI.1
+D	SETL	[A0.2++],D0AWI.0,D1AWI.0
+D	SETL	[A0.2++],D0AWI.1,D1AWI.1
+D	SETL	[A0.2++],D0BRI.0,D1BRI.0
+D	SETL	[A0.2++],D0BRI.1,D1BRI.1
+D	SETL	[A0.2++],D0BWI.0,D1BWI.0
+D	SETL	[A0.2++],D0BWI.1,D1BWI.1
+D	SETD	[A0.2++],T0
+D	SETD	[A0.2++],T1
+D	SETD	[A0.2++],T2
+D	SETD	[A0.2++],T3
+D	SETD	[A0.2++],T4
+D	SETD	[A0.2++],T5
+D	SETD	[A0.2++],T6
+D	SETD	[A0.2++],T7
+D	SETD	[A0.2++],T8
+D	SETD	[A0.2++],T9
+D	SETD	[A0.2++],TA
+D	SETD	[A0.2++],TB
+D	SETD	[A0.2++],TC
+D	SETD	[A0.2++],TD
+D	SETD	[A0.2++],TE
+D	SETD	[A0.2++],TF
+#else
+	ADD	A0.2,A0.2,#(8*18+4*16)
+#endif
+	MOV	D0Ar6,TXMRSIZE
+	MOV	D1Ar5,TXDRSIZE
+	SETL	[A0.2++],D0Ar6,D1Ar5		/* Save 8*1 bytes */
+	
+$LCtxSaveCBUF:
+#ifdef TBI_1_3
+	MOV	D0Ar4,D0Re0			/* Copy Ctx Flags */
+	ANDT	D0Ar4,D0Ar4,#TBICTX_XCBF_BIT	/*   mask XCBF if already set */
+	XOR	D0Ar4,D0Ar4,#-1
+	AND	D0Ar2,D0Ar2,D0Ar4		/*   remove XCBF if already set */
+#endif
+	TSTT	D0Ar2,#TBICTX_XCBF_BIT		/* Want to save CBUF? */
+	ANDT	D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+	OR	D0Ar2,D0Ar2,D0Re0		/* Generate new SaveMask */
+	SETD	[D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in bits saved to TBICTX */
+	MOV	D0Re0,A0.2			/* Return end of save area */
+	MOV	D0Ar4,TXDIVTIME			/* Get TXDIVTIME */
+	MOVZ	PC,A1.2				/* No: Early return */
+	TSTT	D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT	/* Need to save CBUF? */
+	MOVZ	PC,A1.2				/* No: Early return */
+	ORT	D0Ar2,D0Ar2,#TBICTX_XCBF_BIT
+	SETD	[D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in XCBF bit to TBICTX */
+	ADD	A0.2,D1Ar1,#TBICTX_BYTES	/* Dump CBUF state after TBICTX */
+/*
+ * Save CBUF
+ */
+	SETD	[A0.2+# 0],TXCATCH0		/* Restore TXCATCHn */
+	SETD	[A0.2+# 4],TXCATCH1
+	TSTT	D0Ar2,#TBICTX_CBRP_BIT		/* ... RDDIRTY was/is set */
+	SETD	[A0.2+# 8],TXCATCH2
+	SETD	[A0.2+#12],TXCATCH3
+	BZ	$LCtxSaveComplete
+	SETL	[A0.2+#(2*8)],RD		/* Save read pipeline */
+	SETL	[A0.2+#(3*8)],RD		/* Save read pipeline */
+	SETL	[A0.2+#(4*8)],RD		/* Save read pipeline */
+	SETL	[A0.2+#(5*8)],RD		/* Save read pipeline */
+	SETL	[A0.2+#(6*8)],RD		/* Save read pipeline */
+	SETL	[A0.2+#(7*8)],RD		/* Save read pipeline */
+	AND	TXDIVTIME,D0Ar4,#TXDIVTIME_DIV_BITS /* Clear RPDIRTY */
+$LCtxSaveComplete:
+	MOV	PC,A1.2				/* Return */
+	.size	___TBICtxSave,.-___TBICtxSave
+
+/*
+ * void *__TBICtxRestore( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be recovered from D1Ar3 (pExt).
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+/*
+ *	If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ *       the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ *       related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+	.balign	4
+	.global	___TBICtxRestore
+	.type	___TBICtxRestore,function
+___TBICtxRestore:
+	GETD	D0Ar6,[D1Ar1+#TBICTX_CurrMODE]	/* Get TXMODE Value */
+	ANDST	D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+	MOV	D1Re0,D0Ar2			/* Keep flags in D1Re0 */
+	MOV	D0Re0,D1Ar3			/* D1Ar3 is default result */
+	MOVZ	PC,D1RtP			/* Early return, nothing to do */
+	ANDT	D0Ar6,D0Ar6,#0xE000		/* Top bits of TXMODE required */
+	MOV	A0.3,D0Ar6			/* Save TXMODE for later */
+	TSTT	D1Re0,#TBICTX_XEXT_BIT		/* Check for XEXT bit */
+	BZ	$LCtxRestXDX8
+	GETD	D0Ar4,[D1Ar1+#TBICTX_SaveMask-2]/* Get current SaveMask */
+	GETL	D0Ar6,D1Ar5,[D0Re0++]		/* Restore A0.2, A1.2 state */
+	ANDMT	D0Ar4,D0Ar4,#(0xFFFF-(TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT))
+	SETD	[D1Ar1+#TBICTX_SaveMask-2],D0Ar4/* New SaveMask */
+#ifdef METAC_1_0
+	SETD	[D1Ar1+#TBICTX_Ext_AX2_U0],D0Ar6
+	MOV	D0Ar6,D1Ar1
+	SETD	[D0Ar6+#TBICTX_Ext_AX2_U1],D1Ar5
+#else
+	SETL	[D1Ar1+#TBICTX_Ext_AX2],D0Ar6,D1Ar5
+#endif
+$LCtxRestXDX8:
+	TSTT	D1Re0,#TBICTX_XDX8_BIT		/* Get extended DX regs? */
+	MOV	A1.2,D1RtP			/* Free off D1RtP register */
+	BZ	$LCtxRestXAXX
+/*
+ * Restore 8 extra DX registers
+ */
+	MGETL	D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15,[D0Re0]
+$LCtxRestXAXX:
+	TSTT	D1Re0,#TBICTX_XAXX_BIT		/* Get extended AX regs? */
+	BZ	$LCtxRestXHL2
+/*
+ * Restore 3 extra AX registers
+ */
+	MGETL	A0_4 A0.5,A0.6,A0.7,[D0Re0]	/* Get 8*3 bytes */
+$LCtxRestXHL2:
+	TSTT	D1Re0,#TBICTX_XHL2_BIT		/* Get hardware-loop regs? */
+	BZ	$LCtxRestXTDP
+/*
+ * Get hardware loop registers
+ */
+	MGETL	D0Ar6,D0Ar4,D0Ar2,[D0Re0]	/* Get 8*3 bytes */
+	MOV	TXL1START,D0Ar6
+	MOV	TXL2START,D1Ar5
+	MOV	TXL1END,D0Ar4
+	MOV	TXL2END,D1Ar3
+	MOV	TXL1COUNT,D0Ar2
+	MOV	TXL2COUNT,D1Ar1
+$LCtxRestXTDP:
+	TSTT	D1Re0,#TBICTX_XTDP_BIT		/* Get per-thread DSP regs? */
+	MOVZ	PC,A1.2				/* No: Early return */
+/*
+ * Get per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+	MOV	A0.2,D0Re0
+	GETL	D0Ar6,D1Ar5,[D0Re0++#((16*4)+(18*8))]
+#ifndef CTX_NO_DSP
+D	GETL	AC0.0,AC1.0,[A0.2++]		/* Restore ACx.0 lower 32-bits */
+DH	GETL	AC0.0,AC1.0,[A0.2++]		/* Restore ACx.0 upper 32-bits */
+#else
+	ADD	A0.2,A0.2,#(2*8)
+#endif
+	ADD	D0Re0,D0Re0,#(2*4)
+	MOV	TXMODE,A0.3			/* Some TXMODE bits needed */
+	MOV	TXMRSIZE,D0Ar6
+	MOV	TXDRSIZE,D1Ar5
+#ifndef CTX_NO_DSP
+D	GETL	D0AR.0,D1AR.0,[A0.2++]		/* Restore DSP RAM registers */
+D	GETL	D0AR.1,D1AR.1,[A0.2++]
+D	GETL	D0AW.0,D1AW.0,[A0.2++]
+D	GETL	D0AW.1,D1AW.1,[A0.2++]
+D	GETL	D0BR.0,D1BR.0,[A0.2++]
+D	GETL	D0BR.1,D1BR.1,[A0.2++]
+D	GETL	D0BW.0,D1BW.0,[A0.2++]
+D	GETL	D0BW.1,D1BW.1,[A0.2++]
+#else
+	ADD	A0.2,A0.2,#(8*8)
+#endif
+	MOV	TXMODE,#0			/* Restore TXMODE */
+#ifndef CTX_NO_DSP
+D	GETL	D0ARI.0,D1ARI.0,[A0.2++]
+D	GETL	D0ARI.1,D1ARI.1,[A0.2++]
+D	GETL	D0AWI.0,D1AWI.0,[A0.2++]
+D	GETL	D0AWI.1,D1AWI.1,[A0.2++]
+D	GETL	D0BRI.0,D1BRI.0,[A0.2++]
+D	GETL	D0BRI.1,D1BRI.1,[A0.2++]
+D	GETL	D0BWI.0,D1BWI.0,[A0.2++]
+D	GETL	D0BWI.1,D1BWI.1,[A0.2++]
+D	GETD	T0,[A0.2++]
+D	GETD	T1,[A0.2++]
+D	GETD	T2,[A0.2++]
+D	GETD	T3,[A0.2++]
+D	GETD	T4,[A0.2++]
+D	GETD	T5,[A0.2++]
+D	GETD	T6,[A0.2++]
+D	GETD	T7,[A0.2++]
+D	GETD	T8,[A0.2++]
+D	GETD	T9,[A0.2++]
+D	GETD	TA,[A0.2++]
+D	GETD	TB,[A0.2++]
+D	GETD	TC,[A0.2++]
+D	GETD	TD,[A0.2++]
+D	GETD	TE,[A0.2++]
+D	GETD	TF,[A0.2++]
+#else
+	ADD	A0.2,A0.2,#(8*8+4*16)
+#endif
+	MOV	PC,A1.2				/* Return */
+	.size	___TBICtxRestore,.-___TBICtxRestore
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbictxfpu.S b/arch/metag/tbx/tbictxfpu.S
new file mode 100644
index 0000000..e773bea
--- /dev/null
+++ b/arch/metag/tbx/tbictxfpu.S
@@ -0,0 +1,190 @@
+/*
+ * tbictxfpu.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+	.file	"tbifpuctx.S"
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef TBI_1_4
+/*
+ * void *__TBICtxFPUSave( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be saved.
+ *                 These bits must be ored into the SaveMask of this structure.
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+	.text
+	.balign	4
+	.global	___TBICtxFPUSave
+	.type	___TBICtxFPUSave,function
+___TBICtxFPUSave:
+
+	/* D1Ar1:D0Ar2 - State
+	 * D1Ar3       - pExt
+	 * D0Ar4       - Value of METAC_CORE_ID
+	 * D1Ar5       - Scratch
+	 * D0Ar6       - Scratch
+	 */
+	
+	/* If the FPAC bit isnt set then there is nothing to do */
+	TSTT	D0Ar2,#TBICTX_FPAC_BIT
+	MOVZ	PC, D1RtP
+
+	/* Obtain the Core config */
+	MOVT	D0Ar4,        #HI(METAC_CORE_ID)
+	ADD	D0Ar4, D0Ar4, #LO(METAC_CORE_ID)
+	GETD	D0Ar4, [D0Ar4]
+
+	/* Detect FX.8 - FX.15 and add to core config */
+	MOV	D0Ar6, TXENABLE
+	AND	D0Ar6, D0Ar6, #(TXENABLE_CLASSALT_FPUR8 << TXENABLE_CLASS_S)
+	AND	D0Ar4, D0Ar4, #LO(0x0000FFFF)
+	ORT	D0Ar4, D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT)
+	XOR	D0Ar4, D0Ar4, D0Ar6
+
+	/* Save the relevant bits to the buffer */
+	SETD	[D1Ar3++], D0Ar4
+
+	/* Save the relevant bits of TXDEFR (Assumes TXDEFR is coherent) ... */
+	MOV	D0Ar6, TXDEFR
+	LSR	D0Re0, D0Ar6, #8
+	AND	D0Re0, D0Re0, #LO(TXDEFR_FPE_FE_BITS>>8)
+	AND	D0Ar6, D0Ar6, #LO(TXDEFR_FPE_ICTRL_BITS)
+	OR	D0Re0, D0Re0, D0Ar6
+
+	/* ... along with relevant bits of TXMODE to buffer */
+	MOV	D0Ar6, TXMODE
+	ANDT	D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS)
+	ORT	D0Ar6, D0Ar6, #HI(TXMODE_FPURMODEWRITE_BIT)
+	OR	D0Ar6, D0Ar6, D0Re0
+	SETD	[D1Ar3++], D0Ar6
+
+	GETD	D0Ar6,[D1Ar1+#TBICTX_SaveMask-2] /* Get the current SaveMask */
+	/* D0Ar6       - pCtx->SaveMask */
+
+	TSTT	D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+	    	                                    * to avoid stalls
+	    	                                    */
+	/* Save the standard FPU registers */
+F	MSETL	[D1Ar3++], FX.0, FX.2, FX.4, FX.6
+
+	/* Save the extended FPU registers if they are present */
+	BZ	$Lskip_save_fx8_fx16
+F	MSETL	[D1Ar3++], FX.8, FX.10, FX.12, FX.14
+$Lskip_save_fx8_fx16:
+
+	/* Save the FPU Accumulator if it is present */
+	TST	D0Ar4, #METAC_COREID_NOFPACC_BIT
+	BNZ	$Lskip_save_fpacc
+F	SETL	[D1Ar3++], ACF.0
+F	SETL	[D1Ar3++], ACF.1
+F	SETL	[D1Ar3++], ACF.2
+$Lskip_save_fpacc:
+
+	/* Update pCtx->SaveMask */
+	ANDT	D0Ar2, D0Ar2, #TBICTX_FPAC_BIT
+	OR	D0Ar6, D0Ar6, D0Ar2
+	SETD	[D1Ar1+#TBICTX_SaveMask-2],D0Ar6/* Add in XCBF bit to TBICTX */
+
+	MOV	D0Re0, D1Ar3 /* Return end of save area */
+	MOV	PC, D1RtP
+
+	.size	___TBICtxFPUSave,.-___TBICtxFPUSave
+
+/*
+ * void *__TBICtxFPURestore( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be recovered from D1Ar3 (pExt).
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+/*
+ * If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ *       the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ *       related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+	.balign	4
+	.global	___TBICtxFPURestore
+	.type	___TBICtxFPURestore,function
+___TBICtxFPURestore:
+
+	/* D1Ar1:D0Ar2 - State
+	 * D1Ar3       - pExt
+	 * D0Ar4       - Value of METAC_CORE_ID
+	 * D1Ar5       - Scratch
+	 * D0Ar6       - Scratch
+	 * D1Re0       - Scratch
+	 */
+
+	/* If the FPAC bit isnt set then there is nothing to do */
+	TSTT	D0Ar2,#TBICTX_FPAC_BIT
+	MOVZ	PC, D1RtP
+
+	/* Obtain the relevant bits of the Core config */
+	GETD	D0Ar4, [D1Ar3++]
+
+	/* Restore FPU related parts of TXDEFR. Assumes TXDEFR is coherent */
+	GETD	D1Ar5, [D1Ar3++]
+	MOV	D0Ar6, D1Ar5
+	LSL	D1Re0, D1Ar5, #8
+	ANDT	D1Re0, D1Re0, #HI(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+	AND	D1Ar5, D1Ar5, #LO(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+	OR	D1Re0, D1Re0, D1Ar5
+
+	MOV	D1Ar5, TXDEFR
+	ANDMT	D1Ar5, D1Ar5, #HI(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+	ANDMB	D1Ar5, D1Ar5, #LO(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+	OR	D1Re0, D1Re0, D1Ar5
+	MOV	TXDEFR, D1Re0
+
+	/* Restore relevant bits of TXMODE */
+	MOV	D1Ar5, TXMODE
+	ANDMT	D1Ar5, D1Ar5, #HI(~TXMODE_FPURMODE_BITS)
+	ANDT	D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS|TXMODE_FPURMODEWRITE_BIT)
+	OR	D0Ar6, D0Ar6, D1Ar5
+	MOV	TXMODE, D0Ar6
+
+	TSTT	D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+	    	                                    * to avoid stalls
+	    	                                    */
+	/* Save the standard FPU registers */
+F	MGETL	FX.0, FX.2, FX.4, FX.6, [D1Ar3++]
+
+	/* Save the extended FPU registers if they are present */
+	BZ	$Lskip_restore_fx8_fx16
+F	MGETL	FX.8, FX.10, FX.12, FX.14, [D1Ar3++]
+$Lskip_restore_fx8_fx16:
+
+	/* Save the FPU Accumulator if it is present */
+	TST	D0Ar4, #METAC_COREID_NOFPACC_BIT
+	BNZ	$Lskip_restore_fpacc
+F	GETL	ACF.0, [D1Ar3++]
+F	GETL	ACF.1, [D1Ar3++]
+F	GETL	ACF.2, [D1Ar3++]
+$Lskip_restore_fpacc:
+
+	MOV	D0Re0, D1Ar3 /* Return end of save area */
+	MOV	PC, D1RtP
+
+	.size	___TBICtxFPURestore,.-___TBICtxFPURestore
+
+#endif /* TBI_1_4 */
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbidefr.S b/arch/metag/tbx/tbidefr.S
new file mode 100644
index 0000000..3eb165e
--- /dev/null
+++ b/arch/metag/tbx/tbidefr.S
@@ -0,0 +1,175 @@
+/*
+ * tbidefr.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Routing deferred exceptions
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+	.text
+	.balign	4
+	.global	___TBIHandleDFR
+	.type	___TBIHandleDFR,function
+/* D1Ar1:D0Ar2 -- State
+ * D0Ar3       -- SigNum
+ * D0Ar4       -- Triggers
+ * D1Ar5       -- InstOrSWSId
+ * D0Ar6       -- pTBI (volatile)
+ */
+___TBIHandleDFR:
+#ifdef META_BUG_MBN100212
+	MSETL	[A0StP++], D0FrT, D0.5
+
+	/* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+	 * D0Ar4       -- The deferred exceptions
+	 * D1Ar3       -- As per D0Ar4 but just the trigger bits
+	 * D0.5        -- The bgnd deferred exceptions
+	 * D1.5        -- TXDEFR with bgnd re-added
+	 */
+
+	/* - Collect the pending deferred exceptions using TXSTAT,
+	 *   (ack's the bgnd exceptions as a side-effect)
+	 * - Manually collect remaining (interrupt) deferred exceptions
+	 *   using TXDEFR
+	 * - Replace the triggers (from TXSTATI) with the int deferred
+	 *   exceptions DEFR ..., TXSTATI would have returned if it was valid
+	 *   from bgnd code
+	 * - Reconstruct TXDEFR by or'ing bgnd deferred exceptions (except
+	 *   the DEFER bit) and the int deferred exceptions. This will be
+	 *   restored later
+	 */
+	DEFR	D0.5,  TXSTAT
+	MOV	D1.5,  TXDEFR
+	ANDT	D0.5,  D0.5, #HI(0xFFFF0000)
+	MOV	D1Ar3, D1.5
+	ANDT	D1Ar3, D1Ar3, #HI(0xFFFF0000)
+	OR	D0Ar4, D1Ar3, #TXSTAT_DEFER_BIT
+	OR	D1.5, D1.5, D0.5
+
+	/* Mask off anything unrelated to the deferred exception triggers */
+	ANDT	D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+	/* Can assume that at least one exception happened since this
+	 * handler wouldnt have been called otherwise.
+	 * 
+	 * Replace the signal number and at the same time, prepare
+	 * the mask to acknowledge the exception
+	 *
+	 * D1Re0 -- The bits to acknowledge
+	 * D1Ar3 -- The signal number
+	 * D1RtP -- Scratch to deal with non-conditional insns
+	 */
+	MOVT	D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+	MOV	D1RtP, #TXSTAT_FPE_INVALID_S
+	FFB	D1Ar3, D1Ar3
+	CMP	D1Ar3, #TXSTAT_FPE_INVALID_S
+	MOVLE	D1Ar3, D1RtP /* Collapse FPE triggers to a single signal */
+	MOV	D1RtP, #1
+	LSLGT	D1Re0, D1RtP, D1Ar3
+
+	/* Get the handler using the signal number
+	 *
+	 * D1Ar3 -- The signal number
+	 * D0Re0 -- Offset into TBI struct containing handler address
+	 * D1Re0 -- Mask of triggers to keep
+	 * D1RtP -- Address of handler
+	 */
+	SUB	D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+	LSL	D0Re0, D1Ar3, #2
+	XOR	D1Re0, D1Re0, #-1   /* Prepare mask for acknowledge (avoids stall) */
+	ADD	D0Re0,D0Re0,#TBI_fnSigs
+	GETD	D1RtP, [D0Ar6+D0Re0]
+
+	/* Acknowledge triggers */
+	AND	D1.5, D1.5, D1Re0
+
+	/* Restore remaining exceptions
+	 * Do this here in case the handler enables nested interrupts
+	 *
+	 * D1.5 -- TXDEFR with this exception ack'd
+	 */
+	MOV	TXDEFR, D1.5
+
+	/* Call the handler */
+	SWAP	D1RtP, PC
+
+	GETL	D0.5,  D1.5,  [--A0StP]
+	GETL	D0FrT, D1RtP, [--A0StP]
+	MOV	PC,D1RtP
+#else  /* META_BUG_MBN100212 */
+
+	/* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+	 * D0Ar4       -- The deferred exceptions
+	 * D1Ar3       -- As per D0Ar4 but just the trigger bits
+	 */
+
+	/* - Collect the pending deferred exceptions using TXSTAT,
+	 *   (ack's the interrupt exceptions as a side-effect)
+	 */
+	DEFR	D0Ar4, TXSTATI
+
+	/* Mask off anything unrelated to the deferred exception triggers */
+	MOV	D1Ar3, D0Ar4
+	ANDT	D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+	/* Can assume that at least one exception happened since this
+	 * handler wouldnt have been called otherwise.
+	 * 
+	 * Replace the signal number and at the same time, prepare
+	 * the mask to acknowledge the exception
+	 *
+	 * The unusual code for 1<<D1Ar3 may need explanation.
+	 * Normally this would be done using 'MOV rs,#1' and 'LSL rd,rs,D1Ar3'
+	 * but only D1Re0 is available in D1 and no crossunit insns are available
+	 * Even worse, there is no conditional 'MOV r,#uimm8'.
+	 * Since the CMP proves that D1Ar3 >= 20, we can reuse the bottom 12-bits
+	 * of D1Re0 (using 'ORGT r,#1') in the knowledge that the top 20-bits will
+	 * be discarded without affecting the result.
+	 *
+	 * D1Re0 -- The bits to acknowledge
+	 * D1Ar3 -- The signal number
+	 */
+	MOVT	D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+	MOV	D0Re0, #TXSTAT_FPE_INVALID_S
+	FFB	D1Ar3, D1Ar3
+	CMP	D1Ar3, #TXSTAT_FPE_INVALID_S
+	MOVLE	D1Ar3, D0Re0 /* Collapse FPE triggers to a single signal */
+	ORGT	D1Re0, D1Re0, #1
+	LSLGT	D1Re0, D1Re0, D1Ar3
+
+	SUB	D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+
+	/* Acknowledge triggers and restore remaining exceptions
+	 * Do this here in case the handler enables nested interrupts
+	 *
+	 * (x | y) ^ y == x & ~y. It avoids the restrictive XOR ...,#-1 insn
+	 * and is the same length
+	 */
+	MOV	D0Re0, TXDEFR
+	OR	D0Re0, D0Re0, D1Re0
+	XOR	TXDEFR, D0Re0, D1Re0
+
+	/* Get the handler using the signal number
+	 *
+	 * D1Ar3 -- The signal number
+	 * D0Re0 -- Address of handler
+	 */
+	LSL	D0Re0, D1Ar3, #2
+	ADD	D0Re0,D0Re0,#TBI_fnSigs
+	GETD	D0Re0, [D0Ar6+D0Re0]
+
+	/* Tailcall the handler */
+	MOV	PC,D0Re0
+
+#endif /* META_BUG_MBN100212 */
+	.size	___TBIHandleDFR,.-___TBIHandleDFR
+/*
+ * End of tbidefr.S
+ */
diff --git a/arch/metag/tbx/tbidspram.S b/arch/metag/tbx/tbidspram.S
new file mode 100644
index 0000000..2f27c03
--- /dev/null
+++ b/arch/metag/tbx/tbidspram.S
@@ -0,0 +1,161 @@
+/*
+ * tbidspram.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+	.file	"tbidspram.S"
+
+/* These aren't generally useful to a user so for now, they arent publically available */
+#define _TBIECH_DSPRAM_DUA_S    8
+#define _TBIECH_DSPRAM_DUA_BITS 0x7f00
+#define _TBIECH_DSPRAM_DUB_S    0
+#define _TBIECH_DSPRAM_DUB_BITS 0x007f
+
+/*
+ * void *__TBIDspramSaveA( short DspramSizes, void *pExt )
+ */
+	.text
+	.balign	4
+	.global	___TBIDspramSaveA
+	.type	___TBIDspramSaveA,function
+___TBIDspramSaveA:
+
+	SETL	[A0StP++], D0.5, D1.5
+	MOV	A0.3, D0Ar2
+
+	/* D1Ar1 - Dspram Sizes
+	 * A0.4  - Pointer to buffer
+	 */
+
+	/* Save the specified amount of dspram DUA */
+DL	MOV	D0AR.0, #0
+	LSR	D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+	AND	D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+	SUB	TXRPT, D1Ar1, #1
+$L1:
+DL	MOV	D0Re0, [D0AR.0++]
+DL	MOV	D0Ar6, [D0AR.0++]
+DL	MOV	D0Ar4, [D0AR.0++]
+DL	MOV	D0.5,  [D0AR.0++]
+	MSETL	[A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+	BR	$L1
+
+	GETL	D0.5, D1.5, [--A0StP]
+	MOV	PC, D1RtP
+
+	.size	___TBIDspramSaveA,.-___TBIDspramSaveA
+
+/*
+ * void *__TBIDspramSaveB( short DspramSizes, void *pExt )
+ */
+	.balign	4
+	.global	___TBIDspramSaveB
+	.type	___TBIDspramSaveB,function
+___TBIDspramSaveB:
+
+	SETL	[A0StP++], D0.5, D1.5
+	MOV	A0.3, D0Ar2
+
+	/* D1Ar1 - Dspram Sizes
+	 * A0.3  - Pointer to buffer
+	 */
+
+	/* Save the specified amount of dspram DUA */
+DL	MOV	D0BR.0, #0
+	LSR	D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+	AND	D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+	SUB	TXRPT, D1Ar1, #1
+$L2:
+DL	MOV	D0Re0, [D0BR.0++]
+DL	MOV	D0Ar6, [D0BR.0++]
+DL	MOV	D0Ar4, [D0BR.0++]
+DL	MOV	D0.5,  [D0BR.0++]
+	MSETL	[A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+	BR	$L2
+
+	GETL	D0.5, D1.5, [--A0StP]
+	MOV	PC, D1RtP
+
+	.size	___TBIDspramSaveB,.-___TBIDspramSaveB
+
+/*
+ * void *__TBIDspramRestoreA( short DspramSizes, void *pExt )
+ */
+	.balign	4
+	.global	___TBIDspramRestoreA
+	.type	___TBIDspramRestoreA,function
+___TBIDspramRestoreA:
+
+	SETL	[A0StP++], D0.5, D1.5
+	MOV	A0.3, D0Ar2
+
+	/* D1Ar1 - Dspram Sizes
+	 * A0.3 - Pointer to buffer
+	 */
+
+	/* Restore the specified amount of dspram DUA */
+DL	MOV	D0AW.0, #0
+	LSR	D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+	AND	D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+	SUB	TXRPT, D1Ar1, #1
+$L3:
+	MGETL	D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL	MOV	[D0AW.0++], D0Re0
+DL	MOV	[D0AW.0++], D0Ar6
+DL	MOV	[D0AW.0++], D0Ar4
+DL	MOV	[D0AW.0++], D0.5
+
+	BR	$L3
+
+	GETL	D0.5, D1.5, [--A0StP]
+	MOV	PC, D1RtP
+
+	.size	___TBIDspramRestoreA,.-___TBIDspramRestoreA
+
+/*
+ * void *__TBIDspramRestoreB( short DspramSizes, void *pExt )
+ */
+	.balign	4
+	.global	___TBIDspramRestoreB
+	.type	___TBIDspramRestoreB,function
+___TBIDspramRestoreB:
+
+	SETL	[A0StP++], D0.5, D1.5
+	MOV	A0.3, D0Ar2
+
+	/* D1Ar1 - Dspram Sizes
+	 * A0.3 - Pointer to buffer
+	 */
+
+	/* Restore the specified amount of dspram DUA */
+DL	MOV	D0BW.0, #0
+	LSR	D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+	AND	D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+	SUB	TXRPT, D1Ar1, #1
+$L4:
+	MGETL	D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL	MOV	[D0BW.0++], D0Re0
+DL	MOV	[D0BW.0++], D0Ar6
+DL	MOV	[D0BW.0++], D0Ar4
+DL	MOV	[D0BW.0++], D0.5
+
+	BR	$L4
+
+	GETL	D0.5, D1.5, [--A0StP]
+	MOV	PC, D1RtP
+
+	.size	___TBIDspramRestoreB,.-___TBIDspramRestoreB
+
+/*
+ * End of tbidspram.S
+ */
diff --git a/arch/metag/tbx/tbilogf.S b/arch/metag/tbx/tbilogf.S
new file mode 100644
index 0000000..4a34d80
--- /dev/null
+++ b/arch/metag/tbx/tbilogf.S
@@ -0,0 +1,48 @@
+/*
+ * tbilogf.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Defines __TBILogF trap code for debugging messages and __TBICont for debug
+ * assert to be implemented on.
+ */
+
+	.file	"tbilogf.S"
+
+/*
+ * Perform console printf using external debugger or host support
+ */
+	.text
+	.balign	4
+	.global	___TBILogF
+	.type	___TBILogF,function
+___TBILogF:
+	MSETL 	[A0StP],D0Ar6,D0Ar4,D0Ar2
+	SWITCH 	#0xC10020
+	MOV	D0Re0,#0
+	SUB 	A0StP,A0StP,#24
+	MOV	PC,D1RtP
+	.size	___TBILogF,.-___TBILogF
+
+/*
+ * Perform wait for continue under control of the debugger
+ */
+	.text
+	.balign	4
+	.global	___TBICont
+	.type	___TBICont,function
+___TBICont:
+	MOV 	D0Ar6,#1
+	MSETL 	[A0StP],D0Ar6,D0Ar4,D0Ar2
+	SWITCH 	#0xC30006 	/* Returns if we are to continue */
+	SUB	A0StP,A0StP,#(8*3)
+	MOV	PC,D1RtP	/* Return */
+	.size	___TBICont,.-___TBICont
+
+/*
+ * End of tbilogf.S
+ */
diff --git a/arch/metag/tbx/tbipcx.S b/arch/metag/tbx/tbipcx.S
new file mode 100644
index 0000000..de0626f
--- /dev/null
+++ b/arch/metag/tbx/tbipcx.S
@@ -0,0 +1,451 @@
+/*
+ * tbipcx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Asyncronous trigger handling including exceptions
+ */
+
+	.file	"tbipcx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+/* BEGIN HACK */
+/* define these for now while doing inital conversion to GAS 
+   will fix properly later */
+
+/* Signal identifiers always have the TBID_SIGNAL_BIT set and contain the
+   following related bit-fields */
+#define TBID_SIGNUM_S       2
+
+/* END HACK */
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4  ,A0.4
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+#ifdef METAC_1_1
+#ifndef BOOTROM
+#ifndef SPECIAL_BUILD
+/* Jump straight into the boot ROM version of this code */
+#define CODE_USES_BOOTROM
+#endif
+#endif
+#endif
+
+/* Define space needed for CATCH buffer state in traditional units */
+#define CATCH_ENTRIES      5
+#define CATCH_ENTRY_BYTES 16
+
+#ifndef CODE_USES_BOOTROM
+#define A0GblIStP	A0.15  /* PTBICTX for current thread in PRIV system */
+#define A1GblIGbP 	A1.15  /* Interrupt A1GbP value in PRIV system */
+#endif
+
+/*
+ * TBIRES __TBIASyncTrigger( TBIRES State )
+ */
+	.text
+	.balign	4
+	.global	___TBIASyncTrigger
+	.type	___TBIASyncTrigger,function
+___TBIASyncTrigger:
+#ifdef CODE_USES_BOOTROM
+	MOVT	D0Re0,#HI(LINCORE_BASE)
+	JUMP	D0Re0,#0xA0
+#else
+	MOV	D0FrT,A0FrP			/* Boing entry sequence */
+	ADD	A0FrP,A0StP,#0
+	SETL	[A0StP++],D0FrT,D1RtP
+	MOV	D0Re0,PCX			/* Check for repeat call */
+	MOVT	D0FrT,#HI(___TBIBoingRTI+4)
+	ADD	D0FrT,D0FrT,#LO(___TBIBoingRTI+4)
+	CMP	D0Re0,D0FrT
+	BEQ	___TBIBoingExit			/* Already set up - come out */
+	ADD	D1Ar1,D1Ar1,#7			/* PRIV system stack here */
+	MOV	A0.2,A0StP			/*  else push context here */
+	MOVS	D0Re0,D0Ar2			/* Return in user mode? */
+	ANDMB	D1Ar1,D1Ar1,#0xfff8		/*  align priv stack to 64-bit */
+	MOV	D1Re0,D1Ar1			/*   and set result to arg */
+	MOVMI	A0.2,D1Ar1			/*  use priv stack if PRIV set			 */
+/*
+ * Generate an initial TBICTX to return to our own current call context
+ */
+	MOVT	D1Ar5,#HI(___TBIBoingExit)	/* Go here to return */
+	ADD	D1Ar5,D1Ar5,#LO(___TBIBoingExit)
+	ADD	A0.3,A0.2,#TBICTX_DX		/* DX Save area */
+	ANDT	D0Ar2,D0Ar2,#TBICTX_PRIV_BIT	/* Extract PRIV bit */
+	MOVT	D0Ar6,#TBICTX_SOFT_BIT		/* Only soft thread state */
+	ADD	D0Ar6,D0Ar6,D0Ar2		/* Add in PRIV bit if requested */
+	SETL	[A0.2],D0Ar6,D1Ar5		/* Push header fields */
+	ADD	D0FrT,A0.2,#TBICTX_AX		/* Address AX save area */
+	MSETL	[A0.3],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+	MOV	D0Ar6,#0
+	MOV	D1Ar5,#0
+	SETL	[A0.3++],D0Ar6,D1Ar5		/* Zero CT register states */
+	SETL	[A0.3++],D0Ar6,D1Ar5
+	MSETL	[D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+	MOV	A0FrP,A0.2			/* Restore me! */
+	B	___TBIResume
+	.size	___TBIASyncTrigger,.-___TBIASyncTrigger
+
+/*
+ * Optimised return to handler for META Core
+ */
+___TBIBoingRTH:
+	RTH					/* Go to background level */
+	MOVT	A0.2,     #HI($Lpcx_target)
+	ADD	A0.2,A0.2,#LO($Lpcx_target)
+	MOV	PCX,A0.2                        /* Setup PCX for interrupts */
+	MOV	PC,D1Re0			/* Jump to handler */
+/*
+ * This is where the code below needs to jump to wait for outermost interrupt
+ * event in a non-privilege mode system (single shared interrupt stack).
+ */
+___TBIBoingPCX:
+	MGETL	A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+	MOV	TXSTATUS,D0Re0			/* Restore flags */
+	GETL	D0Re0,D1Re0,[D1Re0+#TBICTX_DX-TBICTX_BYTES]
+___TBIBoingRTI:
+	RTI					/* Wait for interrupt */
+$Lpcx_target:
+/*
+ * Save initial interrupt state on current stack
+ */
+	SETL	[A0StP+#TBICTX_DX],D0Re0,D1Re0	/* Save key registers */
+	ADD	D1Re0,A0StP,#TBICTX_AX		/* Address AX save area */
+	MOV	D0Re0,TXSTATUS			/* Read TXSTATUS into D0Re0 */
+	MOV	TXSTATUS,#0			/* Clear TXSTATUS */
+	MSETL	[D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+/*
+ * Register state at this point is-
+ *
+ *	D0Re0 - Old TXSTATUS with PRIV and CBUF bits set if appropriate
+ *	A0StP - Is call stack frame and base of TBICTX being generated
+ *	A1GbP - Is valid static access link
+ */
+___TBIBoing:
+	LOCK0					/* Make sure we have no locks! */
+	ADD	A1.2,A0StP,#TBICTX_DX+(8*1)	/* Address DX.1 save area */
+	MOV	A0FrP,A0StP			/* Setup frame pointer */
+	MSETL	[A1.2],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+	MOV	D0Ar4,TXRPT			/* Save critical CT regs */
+	MOV	D1Ar3,TXBPOBITS
+	MOV	D1Ar1,TXDIVTIME			/* Calc catch buffer pSrc */
+	MOV	D0Ar2,TXMODE
+	MOV	TXMODE,#0			/* Clear TXMODE */
+#ifdef TXDIVTIME_RPDIRTY_BIT
+	TSTT	D1Ar1,#HI(TXDIVTIME_RPDIRTY_BIT)/* NZ = RPDIRTY */
+	MOVT	D0Ar6,#TBICTX_CBRP_BIT
+	ORNZ	D0Re0,D0Re0,D0Ar6		/* Set CBRP if RPDIRTY set */
+#endif
+	MSETL	[A1.2],D0Ar4,D0Ar2		/* Save CT regs state */
+	MOV	D0Ar2,D0Re0			/* Copy TXSTATUS */
+	ANDMT	D0Ar2,D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT
+#ifdef TBI_1_4
+	MOVT	D1Ar1,#TBICTX_FPAC_BIT		/* Copy FPActive into FPAC */
+	TSTT	D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+	ORNZ	D0Ar2,D0Ar2,D1Ar1
+#endif
+	MOV	D1Ar1,PCX			/* Read CurrPC */
+	ORT	D0Ar2,D0Ar2,#TBICTX_CRIT_BIT	/* SaveMask + CRIT bit */
+	SETL	[A0FrP+#TBICTX_Flags],D0Ar2,D1Ar1 /* Set pCtx header fields */
+/*
+ * Completed context save, now we need to make a call to an interrupt handler
+ *
+ *	D0Re0 - holds PRIV, WAIT, CBUF flags, HALT reason if appropriate
+ *	A0FrP - interrupt stack frame and base of TBICTX being generated
+ *	A0StP - same as A0FrP
+ */
+___TBIBoingWait:
+				/* Reserve space for TBICTX and CBUF */
+	ADD	A0StP,A0StP,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+	MOV	D0Ar4,TXSTATI			/* Read the Triggers data */
+	MOV	D1Ar3,TXDIVTIME			/* Read IRQEnc bits */
+	MOV	D0Ar2,D0Re0			/* Copy PRIV and WAIT flags */
+	ANDT	D0Ar2,D0Ar2,#TBICTX_PRIV_BIT+TBICTX_WAIT_BIT+TBICTX_CBUF_BIT
+#ifdef TBI_1_4
+	MOVT	D1Ar5,#TBICTX_FPAC_BIT		/* Copy FPActive into FPAC */
+	TSTT	D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+	ORNZ	D0Ar2,D0Ar2,D1Ar5
+#endif
+	ANDT	D1Ar3,D1Ar3,#HI(TXDIVTIME_IRQENC_BITS)
+	LSR	D1Ar3,D1Ar3,#TXDIVTIME_IRQENC_S
+	AND	TXSTATI,D0Ar4,#TXSTATI_BGNDHALT_BIT/* Ack any HALT seen */
+	ANDS	D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* Only seen HALT? */
+	ORT	D0Ar2,D0Ar2,#TBICTX_CRIT_BIT	/* Set CRIT */
+#ifndef BOOTROM
+	MOVT	A1LbP,#HI(___pTBIs)
+	ADD	A1LbP,A1LbP,#LO(___pTBIs)
+	GETL	D1Ar5,D0Ar6,[A1LbP]		/* D0Ar6 = ___pTBIs[1] */
+#else
+/*
+ * For BOOTROM support ___pTBIs must be allocated at offset 0 vs A1GbP
+ */
+	GETL	D1Ar5,D0Ar6,[A1GbP]			/* D0Ar6 = ___pTBIs[1] */
+#endif
+	BZ	___TBIBoingHalt			/* Yes: Service HALT */
+/*
+ * Encode interrupt as signal vector, strip away same/lower TXMASKI bits
+ */
+	MOV	D1Ar1,#1			/* Generate mask for this bit */
+	MOV	D0Re0,TXMASKI			/* Get interrupt mask */
+	LSL	TXSTATI,D1Ar1,D1Ar3		/* Acknowledge trigger */
+	AND	TXMASKI,D0Re0,#TXSTATI_BGNDHALT_BIT	/* Only allow HALTs */
+	OR	D0Ar2,D0Ar2,D0Re0		/* Set TBIRES.Sig.TrigMask */
+	ADD	D1Ar3,D1Ar3,#TBID_SIGNUM_TRT	/* Offset into interrupt sigs */
+	LSL	D0Re0,D1Ar3,#TBID_SIGNUM_S	/* Generate offset from SigNum */
+/*
+ * This is a key moment we are about to call the handler, register state is
+ * as follows-
+ *
+ *	D0Re0 - Handler vector (SigNum<<TBID_SIGNUM_S)
+ *	D0Ar2 - TXMASKI:TBICTX_CRIT_BIT with optional CBUF and PRIV bits
+ *	D1Ar3 - SigNum
+ *	D0Ar4 - State read from TXSTATI
+ *	D1Ar5 - Inst for SWITCH trigger case only, otherwise undefined
+ *	D0Ar6 - pTBI
+ */
+___TBIBoingVec:
+	ADD	D0Re0,D0Re0,#TBI_fnSigs		/* Offset into signal table */
+	GETD	D1Re0,[D0Ar6+D0Re0]		/* Get address for Handler */
+/*
+ * Call handler at interrupt level, when it returns simply resume execution
+ * of state indicated by D1Re0.
+ */
+	MOV	D1Ar1,A0FrP			/* Pass in pCtx */
+	CALLR	D1RtP,___TBIBoingRTH		/* Use RTH to invoke handler */
+	
+/*
+ * Perform critical state restore and execute background thread.
+ *
+ *	A0FrP - is pointer to TBICTX structure to resume
+ *	D0Re0 - contains additional TXMASKI triggers
+ */
+	.text
+	.balign	4
+#ifdef BOOTROM
+	.global	___TBIResume
+#endif
+___TBIResume:
+/*
+ * New META IP method
+ */
+	RTH					/* Go to interrupt level */
+	MOV	D0Ar4,TXMASKI			/* Read TXMASKI */
+	OR	TXMASKI,D0Ar4,D0Re0		/* -Write-Modify TXMASKI */
+	GETL	D0Re0,D1Re0,[A0FrP+#TBICTX_Flags]/* Get Flags:SaveMask, CurrPC */
+	MOV	A0StP,A0FrP			/* Position stack pointer */
+	MOV	D0Ar2,TXPOLLI			/* Read pending triggers */
+	MOV	PCX,D1Re0			/* Set resumption PC */
+	TST	D0Ar2,#0xFFFF			/* Any pending triggers? */
+	BNZ	___TBIBoingWait			/* Yes: Go for triggers */
+	TSTT	D0Re0,#TBICTX_WAIT_BIT		/* Do we WAIT anyway? */
+	BNZ	___TBIBoingWait			/* Yes: Go for triggers */
+	LSLS	D1Ar5,D0Re0,#1			/* Test XCBF (MI) & PRIV (CS)? */
+	ADD	D1Re0,A0FrP,#TBICTX_CurrRPT	/* Address CT save area */
+	ADD	A0StP,A0FrP,#TBICTX_DX+(8*1)	/* Address DX.1 save area */
+	MGETL	A0.2,A0.3,[D1Re0]		/* Get CT reg states */
+	MOV	D1Ar3,A1.3			/* Copy old TXDIVTIME */
+	BPL	___TBIResCrit			/* No: Skip logic */
+	ADD	D0Ar4,A0FrP,#TBICTX_BYTES	/* Source is after TBICTX */
+	ANDST	D1Ar3,D1Ar3,#HI(TXDIVTIME_RPMASK_BITS)/* !Z if RPDIRTY */
+	MGETL	D0.5,D0.6,[D0Ar4]		/* Read Catch state */
+	MOV	TXCATCH0,D0.5			/* Restore TXCATCHn */
+	MOV	TXCATCH1,D1.5
+	MOV	TXCATCH2,D0.6
+	MOV	TXCATCH3,D1.6
+	BZ	___TBIResCrit
+	MOV	D0Ar2,#(1*8)
+	LSRS	D1Ar3,D1Ar3,#TXDIVTIME_RPMASK_S+1 /* 2nd RPMASK bit -> bit 0 */
+	ADD	RA,D0Ar4,#(0*8)			/* Re-read read pipeline */
+	ADDNZ	RA,D0Ar4,D0Ar2			/* If Bit 0 set issue RA */
+	LSRS	D1Ar3,D1Ar3,#2			/* Bit 1 -> C, Bit 2 -> Bit 0 */
+	ADD	D0Ar2,D0Ar2,#8
+	ADDCS	RA,D0Ar4,D0Ar2			/* If C issue RA */
+	ADD	D0Ar2,D0Ar2,#8
+	ADDNZ	RA,D0Ar4,D0Ar2			/* If Bit 0 set issue RA */
+	LSRS	D1Ar3,D1Ar3,#2			/* Bit 1 -> C, Bit 2 -> Bit 0 */
+	ADD	D0Ar2,D0Ar2,#8
+	ADDCS	RA,D0Ar4,D0Ar2			/* If C issue RA */
+	ADD	D0Ar2,D0Ar2,#8
+	ADDNZ	RA,D0Ar4,D0Ar2			/* If Bit 0 set issue RA */
+	MOV	TXDIVTIME,A1.3			/* Set RPDIRTY again */
+___TBIResCrit:
+	LSLS	D1Ar5,D0Re0,#1			/* Test XCBF (MI) & PRIV (CS)? */
+#ifdef TBI_1_4
+	ANDT	D1Ar5,D1Ar5,#(TBICTX_FPAC_BIT*2)
+	LSL	D0Ar6,D1Ar5,#3			/* Convert FPAC into FPACTIVE */
+#endif
+ 	ANDMT	D0Re0,D0Re0,#TBICTX_CBUF_BIT	/* Keep CBUF bit from SaveMask */
+#ifdef TBI_1_4
+	OR	D0Re0,D0Re0,D0Ar6		/* Combine FPACTIVE with others */
+#endif
+	MGETL	D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7,[A0StP] /* Restore DX */
+	MOV	TXRPT,A0.2			/* Restore CT regs */
+	MOV	TXBPOBITS,A1.2
+	MOV	TXMODE,A0.3
+	BCC	___TBIBoingPCX			/* Do non-PRIV wait! */
+	MOV	A1GblIGbP,A1GbP			/* Save A1GbP too */
+	MGETL	A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+/*
+ * Wait for the first interrupt/exception trigger in a privilege mode system
+ * (interrupt stack area for current TASK to be pointed to by A0GblIStP
+ * or per_cpu__stack_save[hwthread_id]).
+ */
+	MOV	TXSTATUS,D0Re0			/* Restore flags */
+	MOV	D0Re0,TXPRIVEXT			/* Set TXPRIVEXT_TXTOGGLEI_BIT */
+	SUB	D1Re0,D1Re0,#TBICTX_BYTES	/* TBICTX is top of int stack */
+#ifdef TBX_PERCPU_SP_SAVE
+	SWAP	D1Ar3,A1GbP
+	MOV	D1Ar3,TXENABLE			/* Which thread are we? */
+	AND	D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+	LSR	D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+	ADDT	D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+	ADD	D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+	SETD	[D1Ar3],D1Re0
+	SWAP	D1Ar3,A1GbP
+#else
+	MOV	A0GblIStP, D1Re0
+#endif
+	OR	D0Re0,D0Re0,#TXPRIVEXT_TXTOGGLEI_BIT
+	MOV	TXPRIVEXT,D0Re0			/* Cannot set TXPRIVEXT if !priv */
+	GETL	D0Re0,D1Re0,[D1Re0+#TBICTX_DX]
+	RTI					/* Wait for interrupt */
+/*
+ * Save initial interrupt state on A0GblIStP, switch to A0GblIStP if
+ * BOOTROM code, save and switch to [A1GbP] otherwise.
+ */
+___TBIBoingPCXP:
+#ifdef TBX_PERCPU_SP_SAVE
+	SWAP	D1Ar3,A1GbP			/* Get PRIV stack base */
+	MOV	D1Ar3,TXENABLE			/* Which thread are we? */
+	AND	D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+	LSR	D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+	ADDT	D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+	ADD	D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+	GETD	D1Ar3,[D1Ar3]
+#else
+	SWAP	D1Ar3,A0GblIStP			/* Get PRIV stack base */
+#endif
+	SETL	[D1Ar3+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */
+	MOV	D0Re0,TXPRIVEXT			/* Clear TXPRIVEXT_TXTOGGLEI_BIT */
+	ADD	D1Re0,D1Ar3,#TBICTX_AX	/* Address AX save area */
+	ANDMB	D0Re0,D0Re0,#0xFFFF-TXPRIVEXT_TXTOGGLEI_BIT
+	MOV	TXPRIVEXT,D0Re0			/* Cannot set TXPRIVEXT if !priv */
+	MOV	D0Re0,TXSTATUS			/* Read TXSTATUS into D0Re0 */
+	MOV	TXSTATUS,#0			/* Clear TXSTATUS */
+	MSETL	[D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+	MOV	A0StP,D1Ar3			/* Switch stacks */
+#ifdef TBX_PERCPU_SP_SAVE
+	MOV	D1Ar3,A1GbP			/* Get D1Ar2 back */
+#else
+	MOV	D1Ar3,A0GblIStP			/* Get D1Ar2 back */
+#endif
+	ORT	D0Re0,D0Re0,#TBICTX_PRIV_BIT	/* Add PRIV to TXSTATUS */
+	MOV	A1GbP,A1GblIGbP			/* Restore A1GbP */
+	B	___TBIBoing			/* Enter common handler code */
+/*
+ * At this point we know it's a background HALT case we are handling.
+ * The restored TXSTATUS always needs to have zero in the reason bits.
+ */
+___TBIBoingHalt:
+	MOV	D0Ar4,TXMASKI			/* Get interrupt mask */
+	ANDST	D0Re0,D0Re0,#HI(TXSTATUS_MAJOR_HALT_BITS+TXSTATUS_MEM_FAULT_BITS)
+	AND	TXMASKI,D0Ar4,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */
+	AND	D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* What ints are off? */
+	OR	D0Ar2,D0Ar2,D0Ar4		/* Set TBIRES.Sig.TrigMask */
+	MOV	D0Ar4,#TXSTATI_BGNDHALT_BIT	/* This was the trigger state */
+	LSR	D1Ar3,D0Re0,#TXSTATUS_MAJOR_HALT_S
+	MOV	D0Re0,#TBID_SIGNUM_XXF<<TBID_SIGNUM_S
+	BNZ	___TBIBoingVec			/* Jump to XXF exception handler */
+/*
+ * Only the SWITCH cases are left, PCX must be valid
+ */
+#ifdef TBI_1_4
+	MOV	D1Ar5,TXPRIVEXT
+	TST	D1Ar5,#TXPRIVEXT_MINIMON_BIT
+	LSR	D1Ar3,D1Ar1,#1                  /* Shift needed for MINIM paths (fill stall) */
+	BZ	$Lmeta                          /* If META only, skip */
+	TSTT	D1Ar1,#HI(0x00800000)
+	ANDMT	D1Ar3,D1Ar3,#HI(0x007FFFFF >> 1)/* Shifted mask for large MINIM */
+	ANDT	D1Ar1,D1Ar1,#HI(0xFFE00000)     /* Static mask for small MINIM */
+	BZ	$Llarge_minim                   /* If large MINIM */
+$Lsmall_minim:
+	TSTT	D1Ar3,#HI(0x00100000 >> 1)
+	ANDMT	D1Ar3,D1Ar3,#HI(0x001FFFFF >> 1)/* Correct shifted mask for large MINIM */
+	ADDZ	D1Ar1,D1Ar1,D1Ar3               /* If META rgn, add twice to undo LSR #1 */
+	B	$Lrecombine
+$Llarge_minim:
+	ANDST	D1Ar1,D1Ar1,#HI(0xFF800000)     /* Correct static mask for small MINIM */
+	                                        /* Z=0 (Cannot place code at NULL) */
+$Lrecombine:
+	ADD	D1Ar1,D1Ar1,D1Ar3               /* Combine static and shifted parts */
+$Lmeta:
+	GETW	D1Ar5,[D1Ar1++]			/* META: lo-16, MINIM: lo-16 (all-16 if short) */
+	GETW	D1Ar3,[D1Ar1]			/* META: hi-16, MINIM: hi-16 (only if long) */
+	MOV	D1Re0,D1Ar5
+	XOR	D1Re0,D1Re0,#0x4000
+	LSLSNZ	D1Re0,D1Re0,#(32-14)		/* MINIM: If long C=0, if short C=1 */
+	LSLCC	D1Ar3,D1Ar3,#16			/* META/MINIM long: Move hi-16 up */
+	LSLCS	D1Ar3,D1Ar5,#16			/* MINIM short: Dup all-16 */
+	ADD	D1Ar5,D1Ar5,D1Ar3		/* ALL: Combine both 16-bit parts */
+#else
+	GETD	D1Ar5,[D1Ar1]			/* Read instruction for switch */
+#endif
+	LSR	D1Ar3,D1Ar5,#22			/* Convert into signal number */
+	AND	D1Ar3,D1Ar3,#TBID_SIGNUM_SW3-TBID_SIGNUM_SW0
+	LSL	D0Re0,D1Ar3,#TBID_SIGNUM_S	/* Generate offset from SigNum */
+	B	___TBIBoingVec			/* Jump to switch handler */
+/*
+ * Exit from TBIASyncTrigger call
+ */
+___TBIBoingExit:
+	GETL	D0FrT,D1RtP,[A0FrP++] 		/* Restore state from frame */
+	SUB	A0StP,A0FrP,#8			/* Unwind stack */
+	MOV	A0FrP,D0FrT			/* Last memory read completes */
+	MOV	PC,D1RtP			/* Return to caller */
+#endif /* ifdef CODE_USES_BOOTROM */
+	.size	___TBIResume,.-___TBIResume
+
+#ifndef BOOTROM
+/*
+ * void __TBIASyncResume( TBIRES State )
+ */
+	.text
+	.balign	4
+	.global	___TBIASyncResume
+	.type	___TBIASyncResume,function
+___TBIASyncResume:
+/*
+ * Perform CRIT|SOFT state restore and execute background thread.
+ */
+	MOV	D1Ar3,D1Ar1			/* Restore this context */
+	MOV	D0Re0,D0Ar2			/* Carry in additional triggers */
+						/* Reserve space for TBICTX */
+	ADD	D1Ar3,D1Ar3,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+	MOV	A0StP,D1Ar3			/* Enter with protection of */
+	MOV	A0FrP,D1Ar1			/*   TBICTX on our stack */
+#ifdef CODE_USES_BOOTROM
+	MOVT	D1Ar1,#HI(LINCORE_BASE)
+	JUMP	D1Ar1,#0xA4
+#else
+	B	___TBIResume
+#endif
+	.size	___TBIASyncResume,.-___TBIASyncResume
+#endif /* ifndef BOOTROM */
+
+/*
+ * End of tbipcx.S
+ */
diff --git a/arch/metag/tbx/tbiroot.S b/arch/metag/tbx/tbiroot.S
new file mode 100644
index 0000000..7d84daf
--- /dev/null
+++ b/arch/metag/tbx/tbiroot.S
@@ -0,0 +1,87 @@
+/*
+ * tbiroot.S
+ *
+ * Copyright (C) 2001, 2002, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Module that creates and via ___TBI function returns a TBI Root Block for
+ * interrupt and background processing on the current thread.
+ */
+
+	.file	"tbiroot.S"
+#include <asm/metag_regs.h>
+
+/*
+ * Get data structures and defines from the TBI C header
+ */
+#include <asm/tbx.h>
+
+
+/* If signals need to be exchanged we must create a TBI Root Block */
+
+	.data
+	.balign	8
+	.global	___pTBIs
+	.type	___pTBIs,object
+___pTBIs:
+	.long	0 /* Bgnd+Int root block ptrs */
+	.long	0
+	.size	___pTBIs,.-___pTBIs
+
+
+/*
+ * Return ___pTBIs value specific to execution level with promotion/demotion
+ *
+ * Register Usage: D1Ar1 is Id, D0Re0 is the primary result
+ *                 D1Re0 is secondary result (___pTBIs for other exec level)
+ */
+	.text
+	.balign	4
+	.global	___TBI
+	.type	___TBI,function
+___TBI:
+	TSTT	D1Ar1,#HI(TBID_ISTAT_BIT)	/* Bgnd or Int level? */
+	MOVT	A1LbP,#HI(___pTBIs)
+	ADD	A1LbP,A1LbP,#LO(___pTBIs)
+	GETL	D0Re0,D1Re0,[A1LbP] /* Base of root block table */
+	SWAPNZ	D0Re0,D1Re0			/* Swap if asked */
+	MOV	PC,D1RtP
+	.size	___TBI,.-___TBI
+
+
+/*
+ * Return identifier of the current thread in TBI segment or signal format with
+ * secondary mask to indicate privilege and interrupt level of thread
+ */
+	.text
+	.balign	4
+	.global	___TBIThrdPrivId
+	.type	___TBIThrdPrivId,function
+___TBIThrdPrivId:
+	.global	___TBIThreadId
+	.type	___TBIThreadId,function
+___TBIThreadId:
+#ifndef METAC_0_1
+	MOV	D1Re0,TXSTATUS			/* Are we privileged or int? */
+	MOV	D0Re0,TXENABLE			/* Which thread are we? */
+/* Disable privilege adaption for now */
+	ANDT	D1Re0,D1Re0,#HI(TXSTATUS_ISTAT_BIT) /* +TXSTATUS_PSTAT_BIT) */
+	LSL	D1Re0,D1Re0,#TBID_ISTAT_S-TXSTATUS_ISTAT_S
+	AND	D0Re0,D0Re0,#TXENABLE_THREAD_BITS
+	LSL	D0Re0,D0Re0,#TBID_THREAD_S-TXENABLE_THREAD_S
+#else
+/* Thread 0 only */
+	XOR	D0Re0,D0Re0,D0Re0
+	XOR	D1Re0,D1Re0,D1Re0
+#endif
+	MOV 	PC,D1RtP			/* Return */
+	.size	___TBIThrdPrivId,.-___TBIThrdPrivId
+	.size	___TBIThreadId,.-___TBIThreadId 
+
+
+/*
+ * End of tbiroot.S
+ */
diff --git a/arch/metag/tbx/tbisoft.S b/arch/metag/tbx/tbisoft.S
new file mode 100644
index 0000000..0346fe8
--- /dev/null
+++ b/arch/metag/tbx/tbisoft.S
@@ -0,0 +1,237 @@
+/*
+ * tbisoft.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Support for soft threads and soft context switches
+ */
+
+	.file	"tbisoft.S"
+
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4  ,A0.4
+#define D0_5  ,D0.5
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#define D0_5
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+	.text
+	.balign	4
+	.global	___TBISwitchTail
+	.type	___TBISwitchTail,function
+___TBISwitchTail:
+	B	$LSwitchTail
+	.size	___TBISwitchTail,.-___TBISwitchTail
+
+/* 
+ * TBIRES __TBIJumpX( TBIX64 ArgsA, PTBICTX *rpSaveCtx, int TrigsMask,
+ *                                    void (*fnMain)(), void *pStack );
+ *
+ * This is a combination of __TBISwitch and __TBIJump with the context of
+ * the calling thread being saved in the rpSaveCtx location with a drop-thru
+ *  effect into the __TBIJump logic. ArgsB passes via __TBIJump to the
+ *  routine eventually invoked will reflect the rpSaveCtx value specified.
+ */
+	.text
+	.balign	4
+	.global	___TBIJumpX
+	.type	___TBIJumpX,function
+___TBIJumpX:
+	CMP	D1RtP,#-1
+	B	$LSwitchStart
+	.size	___TBIJumpX,.-___TBIJumpX
+
+/*
+ * TBIRES __TBISwitch( TBIRES Switch, PTBICTX *rpSaveCtx )
+ *
+ * Software syncronous context switch between soft threads, save only the
+ * registers which are actually valid on call entry.
+ *
+ *	A0FrP, D0RtP, D0.5, D0.6, D0.7      - Saved on stack
+ *	A1GbP is global to all soft threads so not virtualised
+ *	A0StP is then saved as the base of the TBICTX of the thread
+ *	
+ */
+	.text
+	.balign	4
+	.global	___TBISwitch
+	.type	___TBISwitch,function
+___TBISwitch:
+	XORS	D0Re0,D0Re0,D0Re0		/* Set ZERO flag */
+$LSwitchStart:
+	MOV	D0FrT,A0FrP			/* Boing entry sequence */
+	ADD	A0FrP,A0StP,#0			
+	SETL	[A0StP+#8++],D0FrT,D1RtP
+/*
+ * Save current frame state - we save all regs because we don't want
+ * uninitialised crap in the TBICTX structure that the asyncronous resumption
+ * of a thread will restore.
+ */
+	MOVT	D1Re0,#HI($LSwitchExit)		/* ASync resume point here */
+	ADD	D1Re0,D1Re0,#LO($LSwitchExit)
+	SETD	[D1Ar3],A0StP			/* Record pCtx of this thread */
+	MOVT	D0Re0,#TBICTX_SOFT_BIT		/* Only soft thread state */
+	SETL	[A0StP++],D0Re0,D1Re0		/* Push header fields */
+	ADD	D0FrT,A0StP,#TBICTX_AX-TBICTX_DX /* Address AX save area */
+	MOV	D0Re0,#0			/* Setup 0:0 result for ASync */
+	MOV	D1Re0,#0			/* resume of the thread */
+	MSETL	[A0StP],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+	SETL	[A0StP++],D0Re0,D1Re0		/* Zero CurrRPT, CurrBPOBITS, */
+	SETL	[A0StP++],D0Re0,D1Re0		/* Zero CurrMODE, CurrDIVTIME */
+	ADD	A0StP,A0StP,#(TBICTX_AX_REGS*8)	/* Reserve AX save space */
+	MSETL	[D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+	BNZ	___TBIJump
+/*
+ * NextThread MUST be in TBICTX_SOFT_BIT state!
+ */
+$LSwitchTail:
+	MOV	D0Re0,D0Ar2			/* Result from args */
+	MOV	D1Re0,D1Ar1
+	ADD	D1RtP,D1Ar1,#TBICTX_AX
+	MGETL	A0StP,A0FrP,[D1RtP]		/* Get frame values */
+$LSwitchCmn:
+	ADD	A0.2,D1Ar1,#TBICTX_DX+(8*5)
+	MGETL	D0.5,D0.6,D0.7,[A0.2]		/* Get caller-saved DX regs */
+$LSwitchExit:
+	GETL	D0FrT,D1RtP,[A0FrP++] 		/* Restore state from frame */
+	SUB	A0StP,A0FrP,#8			/* Unwind stack */
+	MOV	A0FrP,D0FrT			/* Last memory read completes */
+	MOV	PC,D1RtP			/* Return to caller */
+	.size	___TBISwitch,.-___TBISwitch
+
+/*
+ * void __TBISyncResume( TBIRES State, int TrigMask );
+ *
+ * This routine causes the TBICTX structure specified in State.Sig.pCtx to
+ * be restored. This implies that execution will not return to the caller.
+ * The State.Sig.TrigMask field will be ored into TXMASKI during the
+ * context switch such that any immediately occuring interrupts occur in
+ * the context of the newly specified task. The State.Sig.SaveMask parameter
+ * is ignored.
+ */
+	.text
+	.balign	4
+	.global	___TBISyncResume
+	.type	___TBISyncResume,function
+___TBISyncResume:
+	MOV	D0Re0,D0Ar2			/* Result from args */
+	MOV	D1Re0,D1Ar1
+	XOR	D1Ar5,D1Ar5,D1Ar5		/* D1Ar5 = 0 */
+	ADD	D1RtP,D1Ar1,#TBICTX_AX
+	SWAP	D1Ar5,TXMASKI			/* D1Ar5 <-> TXMASKI */
+	MGETL	A0StP,A0FrP,[D1RtP]		/* Get frame values */
+	OR	TXMASKI,D1Ar5,D1Ar3		/* New TXMASKI */
+	B 	$LSwitchCmn
+	.size	___TBISyncResume,.-___TBISyncResume
+
+/*
+ * void __TBIJump( TBIX64 ArgsA, TBIX32 ArgsB, int TrigsMask,
+ *                               void (*fnMain)(), void *pStack );
+ *
+ * Jump directly to a new routine on an arbitrary stack with arbitrary args
+ * oring bits back into TXMASKI on route.
+ */
+	.text
+	.balign	4
+	.global	___TBIJump
+	.type	___TBIJump,function
+___TBIJump:
+	XOR	D0Re0,D0Re0,D0Re0		/* D0Re0 = 0 */
+	MOV	A0StP,D0Ar6			/* Stack = Frame */
+	SWAP	D0Re0,TXMASKI			/* D0Re0 <-> TXMASKI */
+	MOV	A0FrP,D0Ar6			
+	MOVT	A1LbP,#HI(__exit)
+	ADD	A1LbP,A1LbP,#LO(__exit)
+	MOV	D1RtP,A1LbP			/* D1RtP = __exit */
+	OR	TXMASKI,D0Re0,D0Ar4		/* New TXMASKI */
+	MOV	PC,D1Ar5			/* Jump to fnMain */
+	.size	___TBIJump,.-___TBIJump
+
+/*
+ *	PTBICTX __TBISwitchInit( void *pStack, int (*fnMain)(),
+ *                             .... 4 extra 32-bit args .... );
+ *                             
+ * Generate a new soft thread context ready for it's first outing.
+ *
+ *	D1Ar1 - Region of memory to be used as the new soft thread stack
+ *	D0Ar2 - Main line routine for new soft thread
+ *	D1Ar3, D0Ar4, D1Ar5, D0Ar6 - arguments to be passed on stack
+ *	The routine returns the initial PTBICTX value for the new thread
+ */
+	.text
+	.balign	4
+	.global	___TBISwitchInit
+	.type	___TBISwitchInit,function
+___TBISwitchInit:
+	MOV	D0FrT,A0FrP			/* Need save return point */
+	ADD	A0FrP,A0StP,#0
+	SETL	[A0StP++],D0FrT,D1RtP		/* Save return to caller */
+	MOVT	A1LbP,#HI(__exit)
+	ADD	A1LbP,A1LbP,#LO(__exit)
+	MOV	D1RtP,A1LbP			/* Get address of __exit */
+	ADD	D1Ar1,D1Ar1,#7			/* Align stack to 64-bits */
+	ANDMB	D1Ar1,D1Ar1,#0xfff8		/*   by rounding base up */
+	MOV	A0.2,D1Ar1			/* A0.2 is new stack */
+	MOV	D0FrT,D1Ar1			/* Initial puesdo-frame pointer */
+	SETL	[A0.2++],D0FrT,D1RtP		/* Save return to __exit */
+	MOV	D1RtP,D0Ar2
+	SETL	[A0.2++],D0FrT,D1RtP		/* Save return to fnMain */
+	ADD	D0FrT,D0FrT,#8			/* Advance puesdo-frame pointer */
+	MSETL	[A0.2],D0Ar6,D0Ar4		/* Save extra initial args */
+	MOVT	D1RtP,#HI(___TBIStart)		/* Start up code for new stack */
+	ADD	D1RtP,D1RtP,#LO(___TBIStart)
+	SETL	[A0.2++],D0FrT,D1RtP		/* Save return to ___TBIStart */
+	ADD	D0FrT,D0FrT,#(8*3)		/* Advance puesdo-frame pointer */
+	MOV	D0Re0,A0.2			/* Return pCtx for new thread */
+	MOV	D1Re0,#0			/* pCtx:0 is default Arg1:Arg2 */
+/*
+ * Generate initial TBICTX state
+ */
+	MOVT	D1Ar1,#HI($LSwitchExit)		/* Async restore code */
+	ADD	D1Ar1,D1Ar1,#LO($LSwitchExit)
+	MOVT	D0Ar2,#TBICTX_SOFT_BIT		/* Only soft thread state */
+	ADD	D0Ar6,A0.2,#TBICTX_BYTES	/* New A0StP */
+	MOV	D1Ar5,A1GbP			/* Same A1GbP */
+	MOV	D0Ar4,D0FrT			/* Initial A0FrP */
+	MOV	D1Ar3,A1LbP			/* Same A1LbP */
+	SETL	[A0.2++],D0Ar2,D1Ar1		/* Set header fields */
+	MSETL	[A0.2],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+	MOV	D0Ar2,#0			/* Zero values */
+	MOV	D1Ar1,#0
+	SETL	[A0.2++],D0Ar2,D1Ar1		/* Zero CurrRPT, CurrBPOBITS, */
+	SETL	[A0.2++],D0Ar2,D1Ar1		/*      CurrMODE, and pCurrCBuf */
+	MSETL	[A0.2],D0Ar6,D0Ar4,D0Ar2,D0FrT D0_5 /* Set DX and then AX regs */
+	B	$LSwitchExit			/* All done! */
+	.size	___TBISwitchInit,.-___TBISwitchInit
+
+	.text
+	.balign	4
+	.global	___TBIStart
+	.type	___TBIStart,function
+___TBIStart:
+	MOV	D1Ar1,D1Re0			/* Pass TBIRES args to call */
+	MOV	D0Ar2,D0Re0
+	MGETL	D0Re0,D0Ar6,D0Ar4,[A0FrP]	/* Get hidden args */
+	SUB	A0StP,A0FrP,#(8*3)		/* Entry stack pointer */
+	MOV	A0FrP,D0Re0			/* Entry frame pointer */
+	MOVT	A1LbP,#HI(__exit)
+	ADD	A1LbP,A1LbP,#LO(__exit)
+	MOV	D1RtP,A1LbP			/* D1RtP = __exit */
+	MOV	PC,D1Re0			/* Jump into fnMain */
+	.size	___TBIStart,.-___TBIStart
+
+/*
+ * End of tbisoft.S
+ */
diff --git a/arch/metag/tbx/tbistring.c b/arch/metag/tbx/tbistring.c
new file mode 100644
index 0000000..f90cd08
--- /dev/null
+++ b/arch/metag/tbx/tbistring.c
@@ -0,0 +1,114 @@
+/*
+ * tbistring.c
+ *
+ * Copyright (C) 2001, 2002, 2003, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * String table functions provided as part of the thread binary interface for
+ * Meta processors
+ */
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <asm/tbx.h>
+
+/*
+ * There are not any functions to modify the string table currently, if these
+ * are required at some later point I suggest having a seperate module and
+ * ensuring that creating new entries does not interfere with reading old
+ * entries in any way.
+ */
+
+const TBISTR *__TBIFindStr(const TBISTR *start,
+			   const char *str, int match_len)
+{
+	const TBISTR *search = start;
+	bool exact = true;
+	const TBISEG *seg;
+
+	if (match_len < 0) {
+		/* Make match_len always positive for the inner loop */
+		match_len = -match_len;
+		exact = false;
+	} else {
+		/*
+		 * Also support historic behaviour, which expected match_len to
+		 * include null terminator
+		 */
+		if (match_len && str[match_len-1] == '\0')
+			match_len--;
+	}
+
+	if (!search) {
+		/* Find global string table segment */
+		seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+						  TBID_SEGSCOPE_GLOBAL,
+						  TBID_SEGTYPE_STRING));
+
+		if (!seg || seg->Bytes < sizeof(TBISTR))
+			/* No string table! */
+			return NULL;
+
+		/* Start of string table */
+		search = seg->pGAddr;
+	}
+
+	for (;;) {
+		while (!search->Tag)
+			/* Allow simple gaps which are just zero initialised */
+			search = (const TBISTR *)((const char *)search + 8);
+
+		if (search->Tag == METAG_TBI_STRE) {
+			/* Reached the end of the table */
+			search = NULL;
+			break;
+		}
+
+		if ((search->Len >= match_len) &&
+		    (!exact || (search->Len == match_len + 1)) &&
+		    (search->Tag != METAG_TBI_STRG)) {
+			/* Worth searching */
+			if (!strncmp(str, (const char *)search->String,
+				     match_len))
+				break;
+		}
+
+		/* Next entry */
+		search = (const TBISTR *)((const char *)search + search->Bytes);
+	}
+
+	return search;
+}
+
+const void *__TBITransStr(const char *str, int len)
+{
+	const TBISTR *search = NULL;
+	const void *res = NULL;
+
+	for (;;) {
+		/* Search onwards */
+		search = __TBIFindStr(search, str, len);
+
+		/* No translation returns NULL */
+		if (!search)
+			break;
+
+		/* Skip matching entries with no translation data */
+		if (search->TransLen != METAG_TBI_STRX) {
+			/* Calculate base of translation string */
+			res = (const char *)search->String +
+				((search->Len + 7) & ~7);
+			break;
+		}
+
+		/* Next entry */
+		search = (const TBISTR *)((const char *)search + search->Bytes);
+	}
+
+	/* Return base address of translation data or NULL */
+	return res;
+}
+EXPORT_SYMBOL(__TBITransStr);
diff --git a/arch/metag/tbx/tbitimer.S b/arch/metag/tbx/tbitimer.S
new file mode 100644
index 0000000..5dbedde
--- /dev/null
+++ b/arch/metag/tbx/tbitimer.S
@@ -0,0 +1,207 @@
+/*
+ * tbitimer.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * TBI timer support routines and data values
+ */
+
+	.file	"tbitimer.S"
+/*
+ * Get data structures and defines from the main C header
+ */
+#include <asm/tbx.h>
+
+	.data
+	.balign	8
+	.global	___TBITimeB
+	.type	___TBITimeB,object
+___TBITimeB:
+	.quad	0		/* Background 'lost' ticks */
+	.size	___TBITimeB,.-___TBITimeB
+
+	.data
+	.balign	8
+	.global	___TBITimeI
+	.type	___TBITimeI,object
+___TBITimeI:
+	.quad	0		/* Interrupt 'lost' ticks */
+	.size	___TBITimeI,.-___TBITimeI
+
+	.data
+	.balign	8
+	.global	___TBITimes
+	.type	___TBITimes,object
+___TBITimes:
+	.long	___TBITimeB	/* Table of 'lost' tick values */
+	.long	___TBITimeI
+	.size	___TBITimes,.-___TBITimes
+
+/*
+ * Flag bits for control of ___TBITimeCore
+ */
+#define TIMER_SET_BIT  1
+#define TIMER_ADD_BIT  2
+
+/*
+ * Initialise or stop timer support
+ *
+ * Register Usage: D1Ar1 holds Id, D1Ar2 is initial delay or 0
+ *                 D0FrT is used to call ___TBITimeCore
+ *                 D0Re0 is used for the result which is TXSTAT_TIMER_BIT
+ *                 D0Ar4, D1Ar5, D0Ar6 are all used as scratch
+ *		  Other registers are those set by ___TBITimeCore
+ *			A0.3 is assumed to point at ___TBITime(I/B)
+ */
+	.text
+	.balign	4
+	.global	___TBITimerCtrl
+	.type	___TBITimerCtrl,function
+___TBITimerCtrl:
+	MOV	D1Ar5,#TIMER_SET_BIT		/* Timer SET request */
+	MOVT	D0FrT,#HI(___TBITimeCore)	/* Get timer core reg values */
+	CALL	D0FrT,#LO(___TBITimeCore)	/* and perform register update */
+	NEGS	D0Ar6,D0Ar2			/* Set flags from time-stamp */
+	ASR	D1Ar5,D0Ar6,#31			/* Sign extend D0Ar6 into D1Ar5 */
+	SETLNZ	[A0.3],D0Ar6,D1Ar5		/* ___TBITime(B/I)=-Start if enable */
+	MOV	PC,D1RtP			/* Return */
+	.size	___TBITimerCtrl,.-___TBITimerCtrl
+	
+/*
+ * Return ___TBITimeStamp value
+ *
+ * Register Usage: D1Ar1 holds Id
+ *                 D0FrT is used to call ___TBITimeCore
+ *                 D0Re0, D1Re0 is used for the result
+ *                 D1Ar3, D0Ar4, D1Ar5
+ *		  Other registers are those set by ___TBITimeCore
+ *			D0Ar6 is assumed to be the timer value read
+ *			A0.3 is assumed to point at ___TBITime(I/B)
+ */
+	.text
+	.balign	4
+	.global	___TBITimeStamp
+	.type	___TBITimeStamp,function
+___TBITimeStamp:
+	MOV	D1Ar5,#0			/* Timer GET request */
+	MOVT	D0FrT,#HI(___TBITimeCore)	/* Get timer core reg values */
+	CALL	D0FrT,#LO(___TBITimeCore)	/* with no register update */
+	ADDS	D0Re0,D0Ar4,D0Ar6		/* Add current time value */
+	ADD	D1Re0,D1Ar3,D1Ar5		/*  to 64-bit signed extend time */
+	ADDCS	D1Re0,D1Re0,#1			/* Support borrow too */
+	MOV	PC,D1RtP			/* Return */
+	.size	___TBITimeStamp,.-___TBITimeStamp
+
+/*
+ * Perform ___TBITimerAdd logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds value to be added to the timer
+ *                 D0Re0 is used for the result - new TIMER value
+ *                 D1Ar5, D0Ar6 are used as scratch
+ *		  Other registers are those set by ___TBITimeCore
+ *			D0Ar6 is assumed to be the timer value read
+ *			D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ */
+	.text
+	.balign	4
+	.global	___TBITimerAdd
+	.type	___TBITimerAdd,function
+___TBITimerAdd:
+	MOV	D1Ar5,#TIMER_ADD_BIT		/* Timer ADD request */
+	MOVT	D0FrT,#HI(___TBITimeCore)	/* Get timer core reg values */
+	CALL	D0FrT,#LO(___TBITimeCore)	/* with no register update */
+	ADD	D0Re0,D0Ar2,D0Ar6		/* Regenerate new value = result */
+	NEG	D0Ar2,D0Ar2			/* Negate delta */
+	ASR	D1Re0,D0Ar2,#31			/* Sign extend negated delta */
+	ADDS	D0Ar4,D0Ar4,D0Ar2		/* Add time added to ... */
+	ADD	D1Ar3,D1Ar3,D1Re0		/* ... real timer ... */
+	ADDCS	D1Ar3,D1Ar3,#1			/* ... with carry */
+	SETL	[A0.3],D0Ar4,D1Ar3		/* Update ___TBITime(B/I) */
+	MOV	PC,D1RtP			/* Return */
+	.size	___TBITimerAdd,.-___TBITimerAdd
+
+#ifdef TBI_1_4
+/*
+ * Perform ___TBITimerDeadline logic
+ *    NB: Delays are positive compared to the Wait values which are -ive
+ *
+ * Register Usage: D1Ar1 holds Id
+ *                 D0Ar2 holds Delay requested
+ *                 D0Re0 is used for the result - old TIMER Delay value
+ *                 D1Ar5, D0Ar6 are used as scratch
+ *                 Other registers are those set by ___TBITimeCore
+ *                 D0Ar6 is assumed to be the timer value read
+ *                 D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ *
+ */
+        .text
+        .type   ___TBITimerDeadline,function
+        .global ___TBITimerDeadline
+        .align  2
+___TBITimerDeadline:
+	MOV	D1Ar5,#TIMER_SET_BIT		/* Timer SET request */
+	MOVT	D0FrT,#HI(___TBITimeCore)	/* Get timer core reg values */
+	CALL	D0FrT,#LO(___TBITimeCore)	/* with no register update */
+	MOV	D0Re0,D0Ar6			/* Old value read = result */
+	SUB	D0Ar2,D0Ar6,D0Ar2		/* Delta from (old - new) */
+	ASR	D1Re0,D0Ar2,#31			/* Sign extend delta */
+	ADDS	D0Ar4,D0Ar4,D0Ar2		/* Add time added to ... */
+	ADD	D1Ar3,D1Ar3,D1Re0		/* ... real timer ... */
+	ADDCS	D1Ar3,D1Ar3,#1			/* ... with carry */
+	SETL	[A0.3],D0Ar4,D1Ar3		/* Update ___TBITime(B/I) */
+	MOV	PC,D1RtP			/* Return */
+        .size   ___TBITimerDeadline,.-___TBITimerDeadline
+#endif /* TBI_1_4 */
+
+/*
+ * Perform core timer access logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds input value for SET and
+ *                                             input value for ADD
+ *                 D1Ar5 controls op as SET or ADD as bit values
+ *                 On return D0Ar6, D1Ar5 holds the old 64-bit timer value
+ *                 A0.3 is setup to point at ___TBITime(I/B)
+ *                 A1.3 is setup to point at ___TBITimes
+ *                 D0Ar4, D1Ar3 is setup to value of ___TBITime(I/B)
+ */
+	.text
+	.balign	4
+	.global	___TBITimeCore
+	.type	___TBITimeCore,function
+___TBITimeCore:
+#ifndef METAC_0_1
+	TSTT	D1Ar1,#HI(TBID_ISTAT_BIT)	/* Interrupt level timer? */
+#endif
+	MOVT	A1LbP,#HI(___TBITimes)
+	ADD	A1LbP,A1LbP,#LO(___TBITimes)
+	MOV	A1.3,A1LbP			/* Get ___TBITimes address */
+#ifndef METAC_0_1
+	BNZ	$LTimeCoreI			/* Yes: Service TXTIMERI! */
+#endif
+	LSRS	D1Ar5,D1Ar5,#1			/* Carry = SET, Zero = !ADD */
+	GETD	A0.3,[A1.3+#0]			/* A0.3 == &___TBITimeB */
+	MOV	D0Ar6,TXTIMER			/* Always GET old value */
+	MOVCS	TXTIMER,D0Ar2			/* Conditional SET operation */
+	ADDNZ	TXTIMER,D0Ar2,D0Ar6		/* Conditional ADD operation */
+#ifndef METAC_0_1
+	B	$LTimeCoreEnd
+$LTimeCoreI:
+	LSRS	D1Ar5,D1Ar5,#1			/* Carry = SET, Zero = !ADD */
+	GETD	A0.3,[A1.3+#4]			/* A0.3 == &___TBITimeI */
+	MOV	D0Ar6,TXTIMERI			/* Always GET old value */
+	MOVCS	TXTIMERI,D0Ar2			/* Conditional SET operation */
+	ADDNZ	TXTIMERI,D0Ar2,D0Ar6		/* Conditional ADD operation */
+$LTimeCoreEnd:
+#endif
+	ASR	D1Ar5,D0Ar6,#31			/* Sign extend D0Ar6 into D1Ar5 */
+	GETL	D0Ar4,D1Ar3,[A0.3]		/* Read ___TBITime(B/I) */
+	MOV	PC,D0FrT			/* Return quickly */
+	.size	___TBITimeCore,.-___TBITimeCore
+
+/*
+ * End of tbitimer.S
+ */
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e920cbe..e507ab7 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 
 config ARM_ARCH_TIMER
 	bool
+
+config CLKSRC_METAG_GENERIC
+	def_bool y if METAG
+	help
+	  This option enables support for the Meta per-thread timers.
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 7d671b8..4d8283a 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
+obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c
new file mode 100644
index 0000000..ade7513
--- /dev/null
+++ b/drivers/clocksource/metag_generic.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Support for Meta per-thread timers.
+ *
+ * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used
+ * as a free-running time base (hz clocksource), and the interrupt timer
+ * (TXTIMERI) is used for the timer interrupt (clock event). Both counters
+ * traditionally count at approximately 1MHz.
+ */
+
+#include <clocksource/metag_generic.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+
+#include <asm/clock.h>
+#include <asm/hwthread.h>
+#include <asm/core_reg.h>
+#include <asm/metag_mem.h>
+#include <asm/tbx.h>
+
+#define HARDWARE_FREQ		1000000	/* 1MHz */
+#define HARDWARE_DIV		1	/* divide by 1 = 1MHz clock */
+#define HARDWARE_TO_NS_SHIFT	10	/* convert ticks to ns */
+
+static unsigned int hwtimer_freq = HARDWARE_FREQ;
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+static DEFINE_PER_CPU(char [11], local_clockevent_name);
+
+static int metag_timer_set_next_event(unsigned long delta,
+				      struct clock_event_device *dev)
+{
+	__core_reg_set(TXTIMERI, -delta);
+	return 0;
+}
+
+static void metag_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		/* We should disable the IRQ here */
+		break;
+
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_UNUSED:
+		WARN_ON(1);
+		break;
+	};
+}
+
+static cycle_t metag_clocksource_read(struct clocksource *cs)
+{
+	return __core_reg_get(TXTIMER);
+}
+
+static struct clocksource clocksource_metag = {
+	.name = "META",
+	.rating = 200,
+	.mask = CLOCKSOURCE_MASK(32),
+	.read = metag_clocksource_read,
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static irqreturn_t metag_timer_interrupt(int irq, void *dummy)
+{
+	struct clock_event_device *evt = &__get_cpu_var(local_clockevent);
+
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction metag_timer_irq = {
+	.name = "META core timer",
+	.handler = metag_timer_interrupt,
+	.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
+};
+
+unsigned long long sched_clock(void)
+{
+	unsigned long long ticks = __core_reg_get(TXTIMER);
+	return ticks << HARDWARE_TO_NS_SHIFT;
+}
+
+static void __cpuinit arch_timer_setup(unsigned int cpu)
+{
+	unsigned int txdivtime;
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+	char *name = per_cpu(local_clockevent_name, cpu);
+
+	txdivtime = __core_reg_get(TXDIVTIME);
+
+	txdivtime &= ~TXDIVTIME_DIV_BITS;
+	txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS);
+
+	__core_reg_set(TXDIVTIME, txdivtime);
+
+	sprintf(name, "META %d", cpu);
+	clk->name = name;
+	clk->features = CLOCK_EVT_FEAT_ONESHOT,
+
+	clk->rating = 200,
+	clk->shift = 12,
+	clk->irq = tbisig_map(TBID_SIGNUM_TRT),
+	clk->set_mode = metag_timer_set_mode,
+	clk->set_next_event = metag_timer_set_next_event,
+
+	clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift);
+	clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk);
+	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
+	clk->cpumask = cpumask_of(cpu);
+
+	clockevents_register_device(clk);
+
+	/*
+	 * For all non-boot CPUs we need to synchronize our free
+	 * running clock (TXTIMER) with the boot CPU's clock.
+	 *
+	 * While this won't be accurate, it should be close enough.
+	 */
+	if (cpu) {
+		unsigned int thread0 = cpu_2_hwthread_id[0];
+		unsigned long val;
+
+		val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0);
+		__core_reg_set(TXTIMER, val);
+	}
+}
+
+static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self,
+					   unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		arch_timer_setup(cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata arch_timer_cpu_nb = {
+	.notifier_call = arch_timer_cpu_notify,
+};
+
+int __init metag_generic_timer_init(void)
+{
+	/*
+	 * On Meta 2 SoCs, the actual frequency of the timer is based on the
+	 * Meta core clock speed divided by an integer, so it is only
+	 * approximately 1MHz. Calculating the real frequency here drastically
+	 * reduces clock skew on these SoCs.
+	 */
+#ifdef CONFIG_METAG_META21
+	hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1);
+#endif
+	clocksource_register_hz(&clocksource_metag, hwtimer_freq);
+
+	setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq);
+
+	/* Configure timer on boot CPU */
+	arch_timer_setup(smp_processor_id());
+
+	/* Hook cpu boot to configure other CPU's timers */
+	register_cpu_notifier(&arch_timer_cpu_nb);
+
+	return 0;
+}
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index e65fbf2..98e3b87 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP)			+= irqchip.o
 
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
 obj-$(CONFIG_ARCH_EXYNOS)		+= exynos-combiner.o
+obj-$(CONFIG_METAG)			+= irq-metag-ext.o
+obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
new file mode 100644
index 0000000..92c41ab
--- /dev/null
+++ b/drivers/irqchip/irq-metag-ext.c
@@ -0,0 +1,868 @@
+/*
+ * Meta External interrupt code.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * External interrupts on Meta are configured at two-levels, in the CPU core and
+ * in the external trigger block. Interrupts from SoC peripherals are
+ * multiplexed onto a single Meta CPU "trigger" - traditionally it has always
+ * been trigger 2 (TR2). For info on how de-multiplexing happens check out
+ * meta_intc_irq_demux().
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define HWSTAT_STRIDE 8
+#define HWVEC_BLK_STRIDE 0x1000
+
+/**
+ * struct meta_intc_priv - private meta external interrupt data
+ * @nr_banks:		Number of interrupt banks
+ * @domain:		IRQ domain for all banks of external IRQs
+ * @unmasked:		Record of unmasked IRQs
+ * @levels_altered:	Record of altered level bits
+ */
+struct meta_intc_priv {
+	unsigned int		nr_banks;
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked[4];
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	unsigned long		levels_altered[4];
+#endif
+};
+
+/* Private data for the one and only external interrupt controller */
+static struct meta_intc_priv meta_intc_priv;
+
+/**
+ * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bit offset into the IRQ's bank registers
+ */
+static unsigned int meta_intc_offset(irq_hw_number_t hw)
+{
+	return hw & 0x1f;
+}
+
+/**
+ * meta_intc_bank() - Get the bank number of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bank number indicating which register the IRQ's bits are
+ */
+static unsigned int meta_intc_bank(irq_hw_number_t hw)
+{
+	return hw >> 5;
+}
+
+/**
+ * meta_intc_stat_addr() - Get the address of a HWSTATEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWSTATEXT register containing the status bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWSTATEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_level_addr() - Get the address of a HWLEVELEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWLEVELEXT register containing the sense bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_level_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWLEVELEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_mask_addr() - Get the address of a HWMASKEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWMASKEXT register containing the mask bit for the
+ *		specified hardware IRQ number
+ */
+static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWMASKEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_vec_addr() - Get the vector address of a hardware interrupt
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWVECEXT register controlling the core trigger to
+ *		vector the IRQ onto
+ */
+static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWVEC0EXT +
+				HWVEC_BLK_STRIDE * meta_intc_bank(hw) +
+				HWVECnEXT_STRIDE * meta_intc_offset(hw));
+}
+
+/**
+ * meta_intc_startup_irq() - set up an external irq
+ * @data:	data for the external irq to start up
+ *
+ * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and
+ * unmask irq, both using the appropriate callbacks.
+ */
+static unsigned int meta_intc_startup_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	int thread = hard_processor_id();
+
+	/* Perform any necessary acking. */
+	if (data->chip->irq_ack)
+		data->chip->irq_ack(data);
+
+	/* Wire up this interrupt to the core with HWVECxEXT. */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Perform any necessary unmasking. */
+	data->chip->irq_unmask(data);
+
+	return 0;
+}
+
+/**
+ * meta_intc_shutdown_irq() - turn off an external irq
+ * @data:	data for the external irq to turn off
+ *
+ * Mask irq using the appropriate callback and stop muxing it onto TR2.
+ */
+static void meta_intc_shutdown_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	/* Mask the IRQ */
+	data->chip->irq_mask(data);
+
+	/*
+	 * Disable the IRQ at the core by removing the interrupt from
+	 * the HW vector mapping.
+	 */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_ack_irq() - acknowledge an external irq
+ * @data:	data for the external irq to ack
+ *
+ * Clear down an edge interrupt in the status register.
+ */
+static void meta_intc_ack_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+
+	/* Ack the int, if it is still 'on'.
+	 * NOTE - this only works for edge triggered interrupts.
+	 */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * record_irq_is_masked() - record the IRQ masked so it doesn't get handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is masked (by whichever
+ * callback is used). It records the IRQ masked so that it doesn't get handled
+ * if it still shows up in the status register.
+ */
+static void record_irq_is_masked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/**
+ * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is unmasked (by whichever
+ * callback is used). It records the IRQ unmasked so that it gets handled if it
+ * shows up in the status register.
+ */
+static void record_irq_is_unmasked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/*
+ * For use by wrapper IRQ drivers
+ */
+
+/**
+ * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers
+ * @data:	data for the external irq being masked
+ *
+ * This should be called by any wrapper IRQ driver mask functions. it doesn't do
+ * any masking but records the IRQ as masked so that the core code knows the
+ * mask has taken place. It is the callers responsibility to ensure that the IRQ
+ * won't trigger an interrupt to the core.
+ */
+void meta_intc_mask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_masked(data);
+}
+
+/**
+ * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers
+ * @data:	data for the external irq being unmasked
+ *
+ * This should be called by any wrapper IRQ driver unmask functions. it doesn't
+ * do any unmasking but records the IRQ as unmasked so that the core code knows
+ * the unmask has taken place. It is the callers responsibility to ensure that
+ * the IRQ can now trigger an interrupt to the core.
+ */
+void meta_intc_unmask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_unmasked(data);
+}
+
+
+/**
+ * meta_intc_mask_irq() - mask an external irq using HWMASKEXT
+ * @data:	data for the external irq to mask
+ *
+ * This is a default implementation of a mask function which makes use of the
+ * HWMASKEXT registers available in newer versions.
+ *
+ * Earlier versions without these registers should use SoC level IRQ masking
+ * which call the meta_intc_*_simple() functions above, or if that isn't
+ * available should use the fallback meta_intc_*_nomask() functions below.
+ */
+static void meta_intc_mask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_masked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) & ~bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT
+ * @data:	data for the external irq to unmask
+ *
+ * This is a default implementation of an unmask function which makes use of the
+ * HWMASKEXT registers available on new versions. It should be paired with
+ * meta_intc_mask_irq() above.
+ */
+static void meta_intc_unmask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_unmasked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) | bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring
+ * @data:	data for the external irq to mask
+ *
+ * This is the version of the mask function for older versions which don't have
+ * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is
+ * unvectored from the core and retriggered if necessary later.
+ */
+static void meta_intc_mask_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	record_irq_is_masked(data);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to edge interrupts.
+ */
+static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(stat_addr) & bit) {
+		metag_out32(bit, stat_addr);
+		while (!(metag_in32(stat_addr) & bit))
+			metag_out32(bit, stat_addr);
+	}
+}
+
+/**
+ * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to level interrupts.
+ */
+static void meta_intc_unmask_level_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Re-trigger interrupt */
+	/* Writing a 1 triggers interrupt */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * meta_intc_irq_set_type() - set the type of an external irq
+ * @data:	data for the external irq to set the type of
+ * @flow_type:	new irq flow type
+ *
+ * Set the flow type of an external interrupt. This updates the irq chip and irq
+ * handler depending on whether the irq is edge or level sensitive (the polarity
+ * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows
+ * when to trigger.
+ */
+static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	struct meta_intc_priv *priv = &meta_intc_priv;
+#endif
+	unsigned int irq = data->irq;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+	unsigned long flags;
+	unsigned int level;
+
+	/* update the chip/handler */
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip,
+						   handle_level_irq, NULL);
+	else
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip,
+						   handle_edge_irq, NULL);
+
+	/* and clear/set the bit in HWLEVELEXT */
+	__global_lock2(flags);
+	level = metag_in32(level_addr);
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		level |= bit;
+	else
+		level &= ~bit;
+	metag_out32(level, level_addr);
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	priv->levels_altered[meta_intc_bank(hw)] |= bit;
+#endif
+	__global_unlock2(flags);
+
+	return 0;
+}
+
+/**
+ * meta_intc_irq_demux() - external irq de-multiplexer
+ * @irq:	the virtual interrupt number
+ * @desc:	the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is
+ * this function's job to demux this irq and figure out exactly which external
+ * irq needs servicing.
+ *
+ * Whilst using TR2 to detect external interrupts is a software convention it is
+ * (hopefully) unlikely to change.
+ */
+static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw;
+	unsigned int bank, irq_no, status;
+	void __iomem *stat_addr = meta_intc_stat_addr(0);
+
+	/*
+	 * Locate which interrupt has caused our handler to run.
+	 */
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		/* Which interrupts are currently pending in this bank? */
+recalculate:
+		status = metag_in32(stat_addr) & priv->unmasked[bank];
+
+		for (hw = bank*32; status; status >>= 1, ++hw) {
+			if (status & 0x1) {
+				/*
+				 * Map the hardware IRQ number to a virtual
+				 * Linux IRQ number.
+				 */
+				irq_no = irq_linear_revmap(priv->domain, hw);
+
+				/*
+				 * Only fire off external interrupts that are
+				 * registered to be handled by the kernel.
+				 * Other external interrupts are probably being
+				 * handled by other Meta hardware threads.
+				 */
+				generic_handle_irq(irq_no);
+
+				/*
+				 * The handler may have re-enabled interrupts
+				 * which could have caused a nested invocation
+				 * of this code and make the copy of the
+				 * status register we are using invalid.
+				 */
+				goto recalculate;
+			}
+		}
+		stat_addr += HWSTAT_STRIDE;
+	}
+}
+
+#ifdef CONFIG_SMP
+/**
+ * meta_intc_set_affinity() - set the affinity for an interrupt
+ * @data:	data for the external irq to set the affinity of
+ * @cpumask:	cpu mask representing cpus which can handle the interrupt
+ * @force:	whether to force (ignored)
+ *
+ * Revector the specified external irq onto a specific cpu's TR2 trigger, so
+ * that that cpu tends to be the one who handles it.
+ */
+static int meta_intc_set_affinity(struct irq_data *data,
+				  const struct cpumask *cpumask, bool force)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int cpu, thread;
+
+	/*
+	 * Wire up this interrupt from HWVECxEXT to the Meta core.
+	 *
+	 * Note that we can't wire up HWVECxEXT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	return 0;
+}
+#else
+#define meta_intc_set_affinity	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define META_INTC_CHIP_FLAGS	(IRQCHIP_MASK_ON_SUSPEND \
+				| IRQCHIP_SKIP_SET_WAKE)
+#else
+#define META_INTC_CHIP_FLAGS	0
+#endif
+
+/* public edge/level irq chips which SoCs can override */
+
+struct irq_chip meta_intc_edge_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_ack		= meta_intc_ack_irq,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+struct irq_chip meta_intc_level_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+/**
+ * meta_intc_map() - map an external irq
+ * @d:		irq domain of external trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within external trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured, using the HWLEVELEXT registers to determine
+ * edge/level flow type. These registers will have been set when the irq type is
+ * set (or set to a default at init time).
+ */
+static int meta_intc_map(struct irq_domain *d, unsigned int irq,
+			 irq_hw_number_t hw)
+{
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+
+	/* Go by the current sense in the HWLEVELEXT register */
+	if (metag_in32(level_addr) & bit)
+		irq_set_chip_and_handler(irq, &meta_intc_level_chip,
+					 handle_level_irq);
+	else
+		irq_set_chip_and_handler(irq, &meta_intc_edge_chip,
+					 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops meta_intc_domain_ops = {
+	.map = meta_intc_map,
+	.xlate = irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+
+/**
+ * struct meta_intc_context - suspend context
+ * @levels:	State of HWLEVELEXT registers
+ * @masks:	State of HWMASKEXT registers
+ * @vectors:	State of HWVECEXT registers
+ * @txvecint:	State of TxVECINT registers
+ *
+ * This structure stores the IRQ state across suspend.
+ */
+struct meta_intc_context {
+	u32 levels[4];
+	u32 masks[4];
+	u8 vectors[4*32];
+
+	u8 txvecint[4][4];
+};
+
+/* suspend context */
+static struct meta_intc_context *meta_intc_context;
+
+/**
+ * meta_intc_suspend() - store irq state
+ *
+ * To avoid interfering with other threads we only save the IRQ state of IRQs in
+ * use by Linux.
+ */
+static int meta_intc_suspend(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit;
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return -ENOMEM;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* save mapped irqs which are enabled or have actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* save trigger vector */
+				context->vectors[hw] = metag_in32(vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		/* save level state if any IRQ levels altered */
+		if (priv->levels_altered[bank])
+			context->levels[bank] = metag_in32(level_addr);
+		/* save mask state if any IRQs in use */
+		if (mask)
+			context->masks[bank] = metag_in32(mask_addr);
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* save trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i)
+		for (j = 0; j < 4; ++j)
+			context->txvecint[i][j] = metag_in32(T0VECINT_BHALT +
+							     TnVECINT_STRIDE*i +
+							     8*j);
+	__global_unlock2(flags);
+
+	meta_intc_context = context;
+	return 0;
+}
+
+/**
+ * meta_intc_resume() - restore saved irq state
+ *
+ * Restore the saved IRQ state and drop it.
+ */
+static void meta_intc_resume(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context = meta_intc_context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit, tmp;
+
+	meta_intc_context = NULL;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* restore mapped irqs, enabled or with actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* restore trigger vector */
+				metag_out32(context->vectors[hw], vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		if (mask) {
+			/* restore mask state */
+			__global_lock2(flags);
+			tmp = metag_in32(mask_addr);
+			tmp = (tmp & ~mask) | (context->masks[bank] & mask);
+			metag_out32(tmp, mask_addr);
+			__global_unlock2(flags);
+		}
+
+		mask = priv->levels_altered[bank];
+		if (mask) {
+			/* restore level state */
+			__global_lock2(flags);
+			tmp = metag_in32(level_addr);
+			tmp = (tmp & ~mask) | (context->levels[bank] & mask);
+			metag_out32(tmp, level_addr);
+			__global_unlock2(flags);
+		}
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* restore trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i) {
+		for (j = 0; j < 4; ++j) {
+			metag_out32(context->txvecint[i][j],
+				    T0VECINT_BHALT +
+				    TnVECINT_STRIDE*i +
+				    8*j);
+		}
+	}
+	__global_unlock2(flags);
+
+	kfree(context);
+}
+
+static struct syscore_ops meta_intc_syscore_ops = {
+	.suspend = meta_intc_suspend,
+	.resume = meta_intc_resume,
+};
+
+static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv)
+{
+	register_syscore_ops(&meta_intc_syscore_ops);
+}
+#else
+#define meta_intc_init_syscore_ops(priv) do {} while (0)
+#endif
+
+/**
+ * meta_intc_init_cpu() - register with a Meta cpu
+ * @priv:	private interrupt controller data
+ * @cpu:	the CPU to register on
+ *
+ * Configure @cpu's TR2 irq so that we can demux external irqs.
+ */
+static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR2(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_chained_handler(irq, meta_intc_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * meta_intc_no_mask() - indicate lack of HWMASKEXT registers
+ *
+ * Called from SoC code (or init code below) to dynamically indicate the lack of
+ * HWMASKEXT registers (for example depending on some SoC revision register).
+ * This alters the irq mask and unmask callbacks to use the fallback
+ * unvectoring/retriggering technique instead of using HWMASKEXT registers.
+ */
+void __init meta_intc_no_mask(void)
+{
+	meta_intc_edge_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_edge_chip.irq_unmask	= meta_intc_unmask_edge_irq_nomask;
+	meta_intc_level_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_level_chip.irq_unmask	= meta_intc_unmask_level_irq_nomask;
+}
+
+/**
+ * init_external_IRQ() - initialise the external irq controller
+ *
+ * Set up the external irq controller using device tree properties. This is
+ * called from init_IRQ().
+ */
+int __init init_external_IRQ(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	struct device_node *node;
+	int ret, cpu;
+	u32 val;
+	bool no_masks = false;
+
+	node = of_find_compatible_node(NULL, NULL, "img,meta-intc");
+	if (!node)
+		return -ENOENT;
+
+	/* Get number of banks */
+	ret = of_property_read_u32(node, "num-banks", &val);
+	if (ret) {
+		pr_err("meta-intc: No num-banks property found\n");
+		return ret;
+	}
+	if (val < 1 || val > 4) {
+		pr_err("meta-intc: num-banks (%u) out of range\n", val);
+		return -EINVAL;
+	}
+	priv->nr_banks = val;
+
+	/* Are any mask registers present? */
+	if (of_get_property(node, "no-mask", NULL))
+		no_masks = true;
+
+	/* No HWMASKEXT registers present? */
+	if (no_masks)
+		meta_intc_no_mask();
+
+	/* Set up an IRQ domain */
+	/*
+	 * This is a legacy IRQ domain for now until all the platform setup code
+	 * has been converted to devicetree.
+	 */
+	priv->domain = irq_domain_add_linear(node, priv->nr_banks*32,
+					     &meta_intc_domain_ops, priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR2 for all cpus. */
+	for_each_possible_cpu(cpu)
+		meta_intc_init_cpu(priv, cpu);
+
+	/* Set up system suspend/resume callbacks */
+	meta_intc_init_syscore_ops(priv);
+
+	pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n",
+		priv->nr_banks*32);
+
+	return 0;
+}
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
new file mode 100644
index 0000000..8e94d7a
--- /dev/null
+++ b/drivers/irqchip/irq-metag.c
@@ -0,0 +1,343 @@
+/*
+ * Meta internal (HWSTATMETA) interrupt code.
+ *
+ * Copyright (C) 2011-2012 Imagination Technologies Ltd.
+ *
+ * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c
+ * The code base could be generalised/merged as a lot of the functionality is
+ * similar. Until this is done, we try to keep the code simple here.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define PERF0VECINT		0x04820580
+#define PERF1VECINT		0x04820588
+#define PERF0TRIG_OFFSET	16
+#define PERF1TRIG_OFFSET	17
+
+/**
+ * struct metag_internal_irq_priv - private meta internal interrupt data
+ * @domain:		IRQ domain for all internal Meta IRQs (HWSTATMETA)
+ * @unmasked:		Record of unmasked IRQs
+ */
+struct metag_internal_irq_priv {
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked;
+};
+
+/* Private data for the one and only internal interrupt controller */
+static struct metag_internal_irq_priv metag_internal_irq_priv;
+
+static unsigned int metag_internal_irq_startup(struct irq_data *data);
+static void metag_internal_irq_shutdown(struct irq_data *data);
+static void metag_internal_irq_ack(struct irq_data *data);
+static void metag_internal_irq_mask(struct irq_data *data);
+static void metag_internal_irq_unmask(struct irq_data *data);
+#ifdef CONFIG_SMP
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force);
+#endif
+
+static struct irq_chip internal_irq_edge_chip = {
+	.name = "HWSTATMETA-IRQ",
+	.irq_startup = metag_internal_irq_startup,
+	.irq_shutdown = metag_internal_irq_shutdown,
+	.irq_ack = metag_internal_irq_ack,
+	.irq_mask = metag_internal_irq_mask,
+	.irq_unmask = metag_internal_irq_unmask,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = metag_internal_irq_set_affinity,
+#endif
+};
+
+/*
+ *	metag_hwvec_addr - get the address of *VECINT regs of irq
+ *
+ *	This function is a table of supported triggers on HWSTATMETA
+ *	Could do with a structure, but better keep it simple. Changes
+ *	in this code should be rare.
+ */
+static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw)
+{
+	void __iomem *addr;
+
+	switch (hw) {
+	case PERF0TRIG_OFFSET:
+		addr = (void __iomem *)PERF0VECINT;
+		break;
+	case PERF1TRIG_OFFSET:
+		addr = (void __iomem *)PERF1VECINT;
+		break;
+	default:
+		addr = NULL;
+		break;
+	}
+	return addr;
+}
+
+/*
+ *	metag_internal_startup - setup an internal irq
+ *	@irq:	the irq to startup
+ *
+ *	Multiplex interrupts for @irq onto TR1. Clear any pending
+ *	interrupts.
+ */
+static unsigned int metag_internal_irq_startup(struct irq_data *data)
+{
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+
+	/* Enable the interrupt by unmasking it */
+	metag_internal_irq_unmask(data);
+
+	return 0;
+}
+
+/*
+ *	metag_internal_irq_shutdown - turn off the irq
+ *	@irq:	the irq number to turn off
+ *
+ *	Mask @irq and clear any pending interrupts.
+ *	Stop muxing @irq onto TR1.
+ */
+static void metag_internal_irq_shutdown(struct irq_data *data)
+{
+	/* Disable the IRQ at the core by masking it. */
+	metag_internal_irq_mask(data);
+
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+}
+
+/*
+ *	metag_internal_irq_ack - acknowledge irq
+ *	@irq:	the irq to ack
+ */
+static void metag_internal_irq_ack(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+
+	if (metag_in32(HWSTATMETA) & bit)
+		metag_out32(bit, HWSTATMETA);
+}
+
+/**
+ * metag_internal_irq_mask() - mask an internal irq by unvectoring
+ * @data:	data for the internal irq to mask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core
+ * and retriggered if necessary later.
+ */
+static void metag_internal_irq_mask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+
+	clear_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the internal irq to unmask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the
+ * core and retriggered if necessary.
+ */
+static void metag_internal_irq_unmask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	set_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(HWSTATMETA) & bit) {
+		metag_out32(bit, HWSTATMETA);
+		while (!(metag_in32(HWSTATMETA) & bit))
+			metag_out32(bit, HWSTATMETA);
+	}
+}
+
+#ifdef CONFIG_SMP
+/*
+ *	metag_internal_irq_set_affinity - set the affinity for an interrupt
+ */
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force)
+{
+	unsigned int cpu, thread;
+	irq_hw_number_t hw = data->hwirq;
+	/*
+	 * Wire up this interrupt from *VECINT to the Meta core.
+	 *
+	 * Note that we can't wire up *VECINT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)),
+		    metag_hwvec_addr(hw));
+
+	return 0;
+}
+#endif
+
+/*
+ *	metag_internal_irq_demux - irq de-multiplexer
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	The cpu receives an interrupt on TR1 when an interrupt has
+ *	occurred. It is this function's job to demux this irq and
+ *	figure out exactly which trigger needs servicing.
+ */
+static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
+	irq_hw_number_t hw;
+	unsigned int irq_no;
+	u32 status;
+
+recalculate:
+	status = metag_in32(HWSTATMETA) & priv->unmasked;
+
+	for (hw = 0; status != 0; status >>= 1, ++hw) {
+		if (status & 0x1) {
+			/*
+			 * Map the hardware IRQ number to a virtual Linux IRQ
+			 * number.
+			 */
+			irq_no = irq_linear_revmap(priv->domain, hw);
+
+			/*
+			 * Only fire off interrupts that are
+			 * registered to be handled by the kernel.
+			 * Other interrupts are probably being
+			 * handled by other Meta hardware threads.
+			 */
+			generic_handle_irq(irq_no);
+
+			/*
+			 * The handler may have re-enabled interrupts
+			 * which could have caused a nested invocation
+			 * of this code and make the copy of the
+			 * status register we are using invalid.
+			 */
+			goto recalculate;
+		}
+	}
+}
+
+/**
+ * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number.
+ * @hw:		Number of the internal IRQ. Must be in range.
+ *
+ * Returns:	The virtual IRQ number of the Meta internal IRQ specified by
+ *		@hw.
+ */
+int internal_irq_map(unsigned int hw)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	if (!priv->domain)
+		return -ENODEV;
+	return irq_create_mapping(priv->domain, hw);
+}
+
+/**
+ *	metag_internal_irq_init_cpu - regsister with the Meta cpu
+ *	@cpu:	the CPU to register on
+ *
+ *	Configure @cpu's TR1 irq so that we can demux irqs.
+ */
+static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv,
+					int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR1(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_handler_data(irq, priv);
+	irq_set_chained_handler(irq, metag_internal_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * metag_internal_intc_map() - map an internal irq
+ * @d:		irq domain of internal trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within internal trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured.
+ */
+static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hw)
+{
+	/* only register interrupt if it is mapped */
+	if (!metag_hwvec_addr(hw))
+		return -EINVAL;
+
+	irq_set_chip_and_handler(irq, &internal_irq_edge_chip,
+				 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops metag_internal_intc_domain_ops = {
+	.map	= metag_internal_intc_map,
+};
+
+/**
+ *	metag_internal_irq_register - register internal IRQs
+ *
+ *	Register the irq chip and handler function for all internal IRQs
+ */
+int __init init_internal_IRQ(void)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	unsigned int cpu;
+
+	/* Set up an IRQ domain */
+	priv->domain = irq_domain_add_linear(NULL, 32,
+					     &metag_internal_intc_domain_ops,
+					     priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-internal-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR1 for all cpus. */
+	for_each_possible_cpu(cpu)
+		metag_internal_irq_init_cpu(priv, cpu);
+
+	return 0;
+};
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a5702d7..3939829 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -322,6 +322,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	return 0;
 }
 
+#ifndef elf_map
+
 static unsigned long elf_map(struct file *filep, unsigned long addr,
 		struct elf_phdr *eppnt, int prot, int type,
 		unsigned long total_size)
@@ -356,6 +358,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 	return(map_addr);
 }
 
+#endif /* !elf_map */
+
 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 {
 	int i, first_idx = -1, last_idx = -1;
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index aba5308..ac9da00 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -346,6 +346,7 @@ extern void ioport_unmap(void __iomem *p);
 #define xlate_dev_kmem_ptr(p)	p
 #define xlate_dev_mem_ptr(p)	__va(p)
 
+#ifdef CONFIG_VIRT_TO_BUS
 #ifndef virt_to_bus
 static inline unsigned long virt_to_bus(volatile void *address)
 {
@@ -357,6 +358,7 @@ static inline void *bus_to_virt(unsigned long address)
 	return (void *) address;
 }
 #endif
+#endif
 
 #ifndef memset_io
 #define memset_io(a, b, c)	memset(__io_virt(a), (b), (c))
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 257c55e..4077b5d 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -17,5 +17,12 @@
  * but it doesn't work on all toolchains, so we just do it by hand
  */
 #ifndef cond_syscall
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#ifdef CONFIG_SYMBOL_PREFIX
+#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define __SYMBOL_PREFIX
+#endif
+#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \
+			    ".set\t" __SYMBOL_PREFIX #x "," \
+			    __SYMBOL_PREFIX "sys_ni_syscall")
 #endif
diff --git a/include/clocksource/metag_generic.h b/include/clocksource/metag_generic.h
new file mode 100644
index 0000000..ac17e7d
--- /dev/null
+++ b/include/clocksource/metag_generic.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Imaginaton Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __CLKSOURCE_METAG_GENERIC_H
+#define __CLKSOURCE_METAG_GENERIC_H
+
+extern int metag_generic_timer_init(void);
+
+#endif /* __CLKSOURCE_METAG_GENERIC_H */
diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h
new file mode 100644
index 0000000..697af0f
--- /dev/null
+++ b/include/linux/irqchip/metag-ext.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_EXT_H_
+#define _LINUX_IRQCHIP_METAG_EXT_H_
+
+struct irq_data;
+struct platform_device;
+
+/* called from core irq code at init */
+int init_external_IRQ(void);
+
+/*
+ * called from SoC init_irq() callback to dynamically indicate the lack of
+ * HWMASKEXT registers.
+ */
+void meta_intc_no_mask(void);
+
+/*
+ * These allow SoCs to specialise the interrupt controller from their init_irq
+ * callbacks.
+ */
+
+extern struct irq_chip meta_intc_edge_chip;
+extern struct irq_chip meta_intc_level_chip;
+
+/* this should be called in the mask callback */
+void meta_intc_mask_irq_simple(struct irq_data *data);
+/* this should be called in the unmask callback */
+void meta_intc_unmask_irq_simple(struct irq_data *data);
+
+#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */
diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h
new file mode 100644
index 0000000..4ebdfb3
--- /dev/null
+++ b/include/linux/irqchip/metag.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_H_
+#define _LINUX_IRQCHIP_METAG_H_
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_METAG_PERFCOUNTER_IRQS
+extern int init_internal_IRQ(void);
+extern int internal_irq_map(unsigned int hw);
+#else
+static inline int init_internal_IRQ(void)
+{
+	return 0;
+}
+static inline int internal_irq_map(unsigned int hw)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* _LINUX_IRQCHIP_METAG_H_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ede55f..7acc9dc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -115,6 +115,8 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
+#elif defined(CONFIG_METAG)
+# define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
 # define VM_GROWSUP	VM_ARCH_1
 #elif !defined(CONFIG_MMU)
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 900b948..8072d35 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -395,6 +395,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_TLS	0x401		/* ARM TLS register */
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
+#define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
+#define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
 
 
 /* Note header in a PT_NOTE section */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7244acd..6989df2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -178,7 +178,7 @@ void tracing_off_permanent(void)
 #define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 #define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
 
-#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
 # define RB_FORCE_8BYTE_ALIGNMENT	0
 # define RB_ARCH_ALIGNMENT		RB_ALIGNMENT
 #else
@@ -186,6 +186,8 @@ void tracing_off_permanent(void)
 # define RB_ARCH_ALIGNMENT		8U
 #endif
 
+#define RB_ALIGN_DATA		__aligned(RB_ARCH_ALIGNMENT)
+
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
@@ -334,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 struct buffer_data_page {
 	u64		 time_stamp;	/* page time stamp */
 	local_t		 commit;	/* write committed index */
-	unsigned char	 data[];	/* data of buffer page */
+	unsigned char	 data[] RB_ALIGN_DATA;	/* data of buffer page */
 };
 
 /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4a7f80..28be08c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -674,7 +674,7 @@ config STACKTRACE
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL && !IA64 && !PARISC
+	depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
 	help
 	  Enables the display of the minimum amount of free stack which each
 	  task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -855,7 +855,7 @@ config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
 	depends on DEBUG_KERNEL && \
 		(CRIS || M68K || FRV || UML || \
-		 AVR32 || SUPERH || BLACKFIN || MN10300) || \
+		 AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \
 		ARCH_WANT_FRAME_POINTERS
 	default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
 	help
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 17e3843..544aa56 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -34,7 +34,7 @@ use strict;
 # $1 (first bracket) matches the dynamic amount of the stack growth
 #
 # use anything else and feel the pain ;)
-my (@stack, $re, $dre, $x, $xs);
+my (@stack, $re, $dre, $x, $xs, $funcre);
 {
 	my $arch = shift;
 	if ($arch eq "") {
@@ -44,6 +44,7 @@ my (@stack, $re, $dre, $x, $xs);
 
 	$x	= "[0-9a-f]";	# hex character
 	$xs	= "[0-9a-f ]";	# hex character or space
+	$funcre = qr/^$x* <(.*)>:$/;
 	if ($arch eq 'arm') {
 		#c0008ffc:	e24dd064	sub	sp, sp, #100	; 0x64
 		$re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -66,6 +67,10 @@ my (@stack, $re, $dre, $x, $xs);
 		#    2b6c:       4e56 fb70       linkw %fp,#-1168
 		#  1df770:       defc ffe4       addaw #-28,%sp
 		$re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o;
+	} elsif ($arch eq 'metag') {
+		#400026fc:       40 00 00 82     ADD       A0StP,A0StP,#0x8
+		$re = qr/.*ADD.*A0StP,A0StP,\#(0x$x{1,8})/o;
+		$funcre = qr/^$x* <[^\$](.*)>:$/;
 	} elsif ($arch eq 'mips64') {
 		#8800402c:       67bdfff0        daddiu  sp,sp,-16
 		$re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -109,7 +114,6 @@ my (@stack, $re, $dre, $x, $xs);
 #
 # main()
 #
-my $funcre = qr/^$x* <(.*)>:$/;
 my ($func, $file, $lastslash);
 
 while (my $line = <STDIN>) {
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 8a10649..d25e4a1 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -826,7 +826,8 @@ int main(int argc, char **argv)
 			genksyms_usage();
 			return 1;
 		}
-	if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0))
+	if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0) ||
+	    (strcmp(arch, "metag") == 0))
 		mod_prefix = "_";
 	{
 		extern int yydebug;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index ee52cb8..9c22317 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -33,6 +33,13 @@
 #include <string.h>
 #include <unistd.h>
 
+#ifndef EM_METAG
+/* Remove this when these make it to the standard system elf.h. */
+#define EM_METAG      174
+#define R_METAG_ADDR32                   2
+#define R_METAG_NONE                     3
+#endif
+
 static int fd_map;	/* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -341,6 +348,12 @@ do_file(char const *const fname)
 			 altmcount = "__gnu_mcount_nc";
 			 break;
 	case EM_IA_64:	 reltype = R_IA64_IMM64;   gpfx = '_'; break;
+	case EM_METAG:	 reltype = R_METAG_ADDR32;
+			 altmcount = "_mcount_wrapper";
+			 rel_type_nop = R_METAG_NONE;
+			 /* We happen to have the same requirement as MIPS */
+			 is_fake_mcount32 = MIPS32_is_fake_mcount;
+			 break;
 	case EM_MIPS:	 /* reltype: e_class    */ gpfx = '_'; break;
 	case EM_PPC:	 reltype = R_PPC_ADDR32;   gpfx = '_'; break;
 	case EM_PPC64:	 reltype = R_PPC64_ADDR64; gpfx = '_'; break;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index d5818c9..74659ec 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -100,6 +100,12 @@
 #define CPUINFO_PROC	"Processor"
 #endif
 
+#ifdef __metag__
+#define rmb()		asm volatile("" ::: "memory")
+#define cpu_relax()	asm volatile("" ::: "memory")
+#define CPUINFO_PROC	"CPU"
+#endif
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>