summaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto/amd64/vpaes-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/amd64/vpaes-x86_64.S')
-rw-r--r--secure/lib/libcrypto/amd64/vpaes-x86_64.S105
1 files changed, 52 insertions, 53 deletions
diff --git a/secure/lib/libcrypto/amd64/vpaes-x86_64.S b/secure/lib/libcrypto/amd64/vpaes-x86_64.S
index 8cb9644..8ec5c40 100644
--- a/secure/lib/libcrypto/amd64/vpaes-x86_64.S
+++ b/secure/lib/libcrypto/amd64/vpaes-x86_64.S
@@ -32,8 +32,8 @@ _vpaes_encrypt_core:
movdqa .Lk_ipt+16(%rip),%xmm0
.byte 102,15,56,0,193
pxor %xmm5,%xmm2
- pxor %xmm2,%xmm0
addq $16,%r9
+ pxor %xmm2,%xmm0
leaq .Lk_mc_backward(%rip),%r10
jmp .Lenc_entry
@@ -41,19 +41,19 @@ _vpaes_encrypt_core:
.Lenc_loop:
movdqa %xmm13,%xmm4
-.byte 102,15,56,0,226
- pxor %xmm5,%xmm4
movdqa %xmm12,%xmm0
+.byte 102,15,56,0,226
.byte 102,15,56,0,195
- pxor %xmm4,%xmm0
+ pxor %xmm5,%xmm4
movdqa %xmm15,%xmm5
-.byte 102,15,56,0,234
+ pxor %xmm4,%xmm0
movdqa -64(%r11,%r10,1),%xmm1
+.byte 102,15,56,0,234
+ movdqa (%r11,%r10,1),%xmm4
movdqa %xmm14,%xmm2
.byte 102,15,56,0,211
- pxor %xmm5,%xmm2
- movdqa (%r11,%r10,1),%xmm4
movdqa %xmm0,%xmm3
+ pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addq $16,%r9
pxor %xmm2,%xmm0
@@ -62,30 +62,30 @@ _vpaes_encrypt_core:
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $48,%r11
- pxor %xmm3,%xmm0
subq $1,%rax
+ pxor %xmm3,%xmm0
.Lenc_entry:
movdqa %xmm9,%xmm1
+ movdqa %xmm11,%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
- movdqa %xmm11,%xmm5
.byte 102,15,56,0,232
- pxor %xmm1,%xmm0
movdqa %xmm10,%xmm3
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,217
- pxor %xmm5,%xmm3
movdqa %xmm10,%xmm4
+ pxor %xmm5,%xmm3
.byte 102,15,56,0,224
- pxor %xmm5,%xmm4
movdqa %xmm10,%xmm2
+ pxor %xmm5,%xmm4
.byte 102,15,56,0,211
- pxor %xmm0,%xmm2
movdqa %xmm10,%xmm3
- movdqu (%r9),%xmm5
+ pxor %xmm0,%xmm2
.byte 102,15,56,0,220
+ movdqu (%r9),%xmm5
pxor %xmm1,%xmm3
jnz .Lenc_loop
@@ -138,62 +138,61 @@ _vpaes_decrypt_core:
movdqa -32(%r10),%xmm4
+ movdqa -16(%r10),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa -16(%r10),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- addq $16,%r9
-
-.byte 102,15,56,0,197
movdqa 0(%r10),%xmm4
-.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 16(%r10),%xmm0
-.byte 102,15,56,0,195
- pxor %xmm4,%xmm0
- subq $1,%rax
+ pxor %xmm1,%xmm0
+ movdqa 16(%r10),%xmm1
-.byte 102,15,56,0,197
- movdqa 32(%r10),%xmm4
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 48(%r10),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
+ movdqa 32(%r10),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 48(%r10),%xmm1
+.byte 102,15,56,0,226
.byte 102,15,56,0,197
+.byte 102,15,56,0,203
+ pxor %xmm4,%xmm0
movdqa 64(%r10),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 80(%r10),%xmm1
+
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 80(%r10),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
-
+ addq $16,%r9
.byte 102,15,58,15,237,12
+ pxor %xmm1,%xmm0
+ subq $1,%rax
.Ldec_entry:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
+ movdqa %xmm11,%xmm2
psrld $4,%xmm1
pand %xmm9,%xmm0
- movdqa %xmm11,%xmm2
.byte 102,15,56,0,208
- pxor %xmm1,%xmm0
movdqa %xmm10,%xmm3
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,217
- pxor %xmm2,%xmm3
movdqa %xmm10,%xmm4
+ pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
- pxor %xmm0,%xmm2
movdqa %xmm10,%xmm3
+ pxor %xmm0,%xmm2
.byte 102,15,56,0,220
- pxor %xmm1,%xmm3
movdqu (%r9),%xmm0
+ pxor %xmm1,%xmm3
jnz .Ldec_loop
@@ -221,7 +220,7 @@ _vpaes_schedule_core:
- call _vpaes_preheat
+ call _vpaes_preheat
movdqa .Lk_rcon(%rip),%xmm8
movdqu (%rdi),%xmm0
@@ -267,7 +266,7 @@ _vpaes_schedule_core:
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
jmp .Loop_schedule_128
@@ -288,7 +287,7 @@ _vpaes_schedule_core:
.align 16
.Lschedule_192:
movdqu 8(%rdi),%xmm0
- call _vpaes_schedule_transform
+ call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
@@ -297,13 +296,13 @@ _vpaes_schedule_core:
.Loop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp .Loop_schedule_192
@@ -320,18 +319,18 @@ _vpaes_schedule_core:
.align 16
.Lschedule_256:
movdqu 16(%rdi),%xmm0
- call _vpaes_schedule_transform
+ call _vpaes_schedule_transform
movl $7,%esi
.Loop_schedule_256:
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
@@ -369,7 +368,7 @@ _vpaes_schedule_core:
.Lschedule_mangle_last_dec:
addq $-16,%rdx
pxor .Lk_s63(%rip),%xmm0
- call _vpaes_schedule_transform
+ call _vpaes_schedule_transform
movdqu %xmm0,(%rdx)
@@ -401,12 +400,12 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
- pshufd $128,%xmm6,%xmm0
- pxor %xmm0,%xmm6
+ pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
+ pxor %xmm1,%xmm6
+ pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
- pxor %xmm1,%xmm1
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
OpenPOWER on IntegriCloud