summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-11-11 13:24:58 -0800
committerH. Peter Anvin <hpa@zytor.com>2008-11-11 13:24:58 -0800
commitb7c6244f13d37592003b46e12500a90e9781ad9d (patch)
tree24b19efe784a1a9975610228ddfa98cee47dac06 /arch
parent4687518c4cb7807fbeff21770e309080f9eb7f2f (diff)
downloadop-kernel-dev-b7c6244f13d37592003b46e12500a90e9781ad9d.zip
op-kernel-dev-b7c6244f13d37592003b46e12500a90e9781ad9d.tar.gz
x86: 32 bits: shrink and align IRQ stubs
Shrink the IRQ stubs on 32 bits down to just over four bytes per (we fit seven into a 32-byte chunk.) This shrinks the total icache consumption of the IRQ stubs down to an even kilobyte, if all of them are in active use. The downside is that we end up with a double jump, which could have a negative effect on some pipelines. The double jump is always inside the same cacheline on any modern chips (the exception being 486/Elan/Geode which have only 16-byte cachelines, but are unlikely to have too many interrupt sources.) To get the most effect, cache-align the IRQ stubs. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/entry_32.S34
1 files changed, 22 insertions, 12 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4aea956..dae81b9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
27:;
/*
- * Build the entry stubs and pointer table with
- * some assembler magic.
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
.section .init.rodata,"a"
ENTRY(interrupt)
.text
-
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
RING0_INT_FRAME
vector=FIRST_EXTERNAL_VECTOR
-.rept NR_VECTORS
- ALIGN
- .if vector != FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector != FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
- .endif
-1: pushl $~(vector)
+ .endif
+1: pushl $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 4
- jmp common_interrupt
- .previous
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6
+ jmp 2f
+ .endif
+ .previous
.long 1b
- .text
+ .text
vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
.endr
END(irq_entries_start)
@@ -652,8 +661,9 @@ END(interrupt)
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
*/
- ALIGN
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
movl %esp,%eax
OpenPOWER on IntegriCloud