summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/gate.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel/gate.S')
-rw-r--r--arch/ia64/kernel/gate.S62
1 files changed, 33 insertions, 29 deletions
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index facf75a..86948ce 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -72,38 +72,40 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
* bundle get executed. The remaining code must be safe even if
* they do not get executed.
*/
- adds r17=-1024,r15
- mov r10=0 // default to successful syscall execution
- epc
+ adds r17=-1024,r15 // A
+ mov r10=0 // A default to successful syscall execution
+ epc // B causes split-issue
}
;;
- rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
- LOAD_FSYSCALL_TABLE(r14)
-
- mov r16=IA64_KR(CURRENT) // 12 cycle read latency
- tnat.nz p10,p9=r15
- mov r19=NR_syscalls-1
+ rsm psr.be | psr.i // M2 (5 cyc to srlz.d)
+ LOAD_FSYSCALL_TABLE(r14) // X
;;
- shladd r18=r17,3,r14
-
- srlz.d
- cmp.ne p8,p0=r0,r0 // p8 <- FALSE
- /* Note: if r17 is a NaT, p6 will be set to zero. */
- cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
- ;;
-(p6) ld8 r18=[r18]
- mov r21=ar.fpsr
- add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
- ;;
-(p6) mov b7=r18
-(p6) tbit.z p8,p0=r18,0
-(p8) br.dptk.many b7
-
-(p6) rsm psr.i
- mov r27=ar.rsc
- mov r26=ar.pfs
+ mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
+ shladd r18=r17,3,r14 // A
+ mov r19=NR_syscalls-1 // A
+ ;;
+ lfetch [r18] // M0|1
+ mov r29=psr // M2 (12 cyc)
+ // If r17 is a NaT, p6 will be zero
+ cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+ ;;
+ mov r21=ar.fpsr // M2 (12 cyc)
+ tnat.nz p10,p9=r15 // I0
+ mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
+ ;;
+ srlz.d // M0 (forces split-issue) ensure PSR.BE==0
+(p6) ld8 r18=[r18] // M0|1
+ nop.i 0
+ ;;
+ nop.m 0
+(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
+ nop.i 0
;;
- mov r29=psr // read psr (12 cyc load latency)
+(p8) ssm psr.i
+(p6) mov b7=r18 // I0
+(p8) br.dptk.many b7 // B
+
+ mov r27=ar.rsc // M2 (12 cyc)
/*
* brl.cond doesn't work as intended because the linker would convert this branch
* into a branch to a PLT. Perhaps there will be a way to avoid this with some
@@ -111,6 +113,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
* instead.
*/
#ifdef CONFIG_ITANIUM
+(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
+ ;;
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
@@ -118,7 +122,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
#else
BRL_COND_FSYS_BUBBLE_DOWN(p6)
#endif
-
+ ssm psr.i
mov r10=-1
(p10) mov r8=EINVAL
(p9) mov r8=ENOSYS
OpenPOWER on IntegriCloud