diff options
Diffstat (limited to 'test')
443 files changed, 18712 insertions, 8378 deletions
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll index 5d3f67e..7555a4c 100644 --- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -168,132 +168,132 @@ define void @caller_a(double* %arg_a0, ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a0 ; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a1 ; CHECK: NoAlias: double* %noalias_ret_a0, double* %noalias_ret_a1 -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1] -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) -; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) -; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) ; CHECK: ===== Alias Analysis Evaluator Report ===== @@ -302,9 +302,9 @@ define void @caller_a(double* %arg_a0, ; CHECK: 36 may alias responses (30.0%) ; CHECK: 0 must alias responses (0.0%) ; CHECK: Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0% -; CHECK: 128 Total ModRef Queries Performed -; CHECK: 44 no mod/ref responses (34.3%) +; CHECK: 184 Total ModRef Queries Performed +; CHECK: 44 no mod/ref responses (23.9%) ; CHECK: 0 mod responses (0.0%) ; CHECK: 0 ref responses (0.0%) -; CHECK: 84 mod & ref responses (65.6%) -; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 34%/0%/0%/65% +; CHECK: 140 mod & ref responses (76.0%) +; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76% diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll index 95f94d0..0e0c45c 100644 --- a/test/Analysis/BasicAA/constant-over-index.ll +++ b/test/Analysis/BasicAA/constant-over-index.ll @@ -1,7 +1,8 @@ -; RUN: opt < %s -aa-eval -print-all-alias-modref-info \ -; RUN: |& grep {MayAlias: double\\* \[%\]p.0.i.0, double\\* \[%\]p3\$} +; RUN: opt < %s -aa-eval -print-all-alias-modref-info |& FileCheck %s ; PR4267 +; CHECK: MayAlias: double* %p.0.i.0, double* %p3 + ; %p3 is equal to %p.0.i.0 on the second iteration of the loop, ; so MayAlias is needed. diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll index 50dc886..47d278f 100644 --- a/test/Analysis/BasicAA/featuretest.ll +++ b/test/Analysis/BasicAA/featuretest.ll @@ -1,17 +1,22 @@ ; This testcase tests for various features the basicaa test should be able to ; determine, as noted in the comments. -; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE +; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @Global = external global { i32 } +declare void @external(i32*) + ; Array test: Test that operations on one local array do not invalidate ; operations on another array. Important for scientific codes. ; define i32 @different_array_test(i64 %A, i64 %B) { %Array1 = alloca i32, i32 100 %Array2 = alloca i32, i32 200 + + call void @external(i32* %Array1) + call void @external(i32* %Array2) %pointer = getelementptr i32* %Array1, i64 %A %val = load i32* %pointer @@ -22,6 +27,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { %REMOVE = load i32* %pointer ; redundant with above load %retval = sub i32 %REMOVE, %val ret i32 %retval +; CHECK: @different_array_test +; CHECK: ret i32 0 } ; Constant index test: Constant indexes into the same array should not @@ -29,6 +36,8 @@ define i32 @different_array_test(i64 %A, i64 %B) { ; define i32 @constant_array_index_test() { %Array = alloca i32, i32 100 + call void @external(i32* %Array) + %P1 = getelementptr i32* %Array, i64 7 %P2 = getelementptr i32* %Array, i64 6 @@ -37,6 +46,8 @@ define i32 @constant_array_index_test() { %BREMOVE = load i32* %P1 %Val = sub i32 %A, %BREMOVE ret i32 %Val +; CHECK: @constant_array_index_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant getelementptr, that @@ -48,6 +59,8 @@ define i32 @gep_distance_test(i32* %A) { %REMOVEv = load i32* %A %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test +; CHECK: ret i32 0 } ; Test that if two pointers are spaced out by a constant offset, that they @@ -60,6 +73,8 @@ define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) { %REMOVEv = load i32* %A1 %r = sub i32 %REMOVEu, %REMOVEv ret i32 %r +; CHECK: @gep_distance_test2 +; CHECK: ret i32 0 } ; Test that we can do funny pointer things and that distance calc will still @@ -68,16 +83,45 @@ define i32 @gep_distance_test3(i32 * %A) { %X = load i32* %A %B = bitcast i32* %A to i8* %C = getelementptr i8* %B, i64 4 - %Y = load i8* %C - ret i32 8 + store i8 42, i8* %C + %Y = load i32* %A + %R = sub i32 %X, %Y + ret i32 %R +; CHECK: @gep_distance_test3 +; CHECK: ret i32 0 } ; Test that we can disambiguate globals reached through constantexpr geps define i32 @constexpr_test() { %X = alloca i32 + call void @external(i32* %X) + %Y = load i32* %X store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0) %REMOVE = load i32* %X %retval = sub i32 %Y, %REMOVE ret i32 %retval +; CHECK: @constexpr_test +; CHECK: ret i32 0 +} + + + +; PR7589 +; These two index expressions are different, this cannot be CSE'd. +define i16 @zext_sext_confusion(i16* %row2col, i5 %j) nounwind{ +entry: + %sum5.cast = zext i5 %j to i64 ; <i64> [#uses=1] + %P1 = getelementptr i16* %row2col, i64 %sum5.cast + %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1] + + %sum13.cast31 = sext i5 %j to i6 ; <i6> [#uses=1] + %sum13.cast = zext i6 %sum13.cast31 to i64 ; <i64> [#uses=1] + %P2 = getelementptr i16* %row2col, i64 %sum13.cast + %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1] + + %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1] + ret i16 %.ret +; CHECK: @zext_sext_confusion +; CHECK: ret i16 %.ret } diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll index 1ed0312..eba9599 100644 --- a/test/Analysis/BasicAA/gep-alias.ll +++ b/test/Analysis/BasicAA/gep-alias.ll @@ -117,12 +117,12 @@ define i32 @test7(i32* %p, i64 %i) { ; P[zext(i)] != p[zext(i+1)] ; PR1143 -define i32 @test8(i32* %p, i32 %i) { - %i1 = zext i32 %i to i64 - %pi = getelementptr i32* %p, i64 %i1 - %i.next = add i32 %i, 1 - %i.next2 = zext i32 %i.next to i64 - %pi.next = getelementptr i32* %p, i64 %i.next2 +define i32 @test8(i32* %p, i16 %i) { + %i1 = zext i16 %i to i32 + %pi = getelementptr i32* %p, i32 %i1 + %i.next = add i16 %i, 1 + %i.next2 = zext i16 %i.next to i32 + %pi.next = getelementptr i32* %p, i32 %i.next2 %x = load i32* %pi store i32 42, i32* %pi.next %y = load i32* %pi diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll new file mode 100644 index 0000000..12b088b --- /dev/null +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s + + +; CHECK: Just Ref: call void @ro() <-> call void @f0() + +declare void @f0() +declare void @ro() readonly + +define void @test0() { + call void @f0() + call void @ro() + ret void +} + +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) + +declare void @llvm.memset.i64(i8*, i8, i64, i32) + +@A = external global i8 +@B = external global i8 +define void @test1() { + call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1) + call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1) + ret void +} diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index a2aabf1..b9a3c5e 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -123,3 +123,14 @@ define i32 @test5(i8* %P, i32 %Len) { ; CHECK: sub i32 %tmp, %tmp } +define i8 @test6(i8* %p, i8* noalias %a) { + %x = load i8* %a + %t = va_arg i8* %p, float + %y = load i8* %a + %z = add i8 %x, %y + ret i8 %z +; CHECK: @test6 +; CHECK: load i8* %a +; CHECK-NOT: load +; CHECK: ret +} diff --git a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll new file mode 100644 index 0000000..218b437 --- /dev/null +++ b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions %s +define i32 @main() nounwind { +entry: + br label %for.cond + +test: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + br i1 true, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %for.body + br label %for.cond + +for.end: ; preds = %for.cond + ret i32 0 +} diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll new file mode 100644 index 0000000..faec45a --- /dev/null +++ b/test/Analysis/RegionInfo/block_sort.ll @@ -0,0 +1,42 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @BZ2_blockSort() nounwind { +start: + br label %while + +while: + br label %while.body134.i.i + +while.body134.i.i: + br i1 1, label %end, label %w + +w: + br label %if.end140.i.i + +if.end140.i.i: + br i1 1, label %while.end186.i.i, label %if.end183.i.i + +if.end183.i.i: + br label %while.body134.i.i + +while.end186.i.i: + br label %while + +end: + ret void +} +; CHECK-NOT: => +; CHECK: [0] start => <Function Return> +; CHECK: [1] while => end + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: start, while, while.body134.i.i, end, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, +; BBIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, + +; RNIT: start, while => end, end, +; RNIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i, diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll new file mode 100644 index 0000000..2ce57c3 --- /dev/null +++ b/test/Analysis/RegionInfo/cond_loop.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +5: + br label %"0" + +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + ret void +3: + br i1 1, label %"1", label %"4" +4: + br label %"0" +} + +; CHECK-NOT: => +; CHECK: [0] 5 => <Function Return> +; CHECK: [1] 0 => 2 + +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 5, 0, 1, 2, 3, 4, +; BBIT: 0, 1, 3, 4, + +; RNIT: 5, 0 => 2, 2, +; RNIT: 0, 1, 3, 4, diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll new file mode 100644 index 0000000..7ca5c7c --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated.ll @@ -0,0 +1,60 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +end165: + br i1 1, label %false239, label %true181 + +true181: + br i1 1, label %then187, label %else232 + +then187: + br label %end265 + +else232: + br i1 1, label %false239, label %then245 + +false239: + br i1 1, label %then245, label %else259 + +then245: + br i1 1, label %then251, label %end253 + +then251: + br label %end253 + +end253: + br label %end265 + +else259: + br label %end265 + +end265: + br i1 1, label %then291, label %end298 + +then291: + br label %end298 + +end298: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] end165 => <Function Return> +; CHECK-NEXT: [1] end165 => end265 +; CHECK-NEXT: [2] then245 => end253 +; CHECK-NEXT: [1] end265 => end298 + +; STAT: 4 region - The # of regions + +; BBIT: end165, false239, then245, then251, end253, end265, then291, end298, else259, true181, then187, else232, +; BBIT: end165, false239, then245, then251, end253, else259, true181, then187, else232, +; BBIT: then245, then251, +; BBIT: end265, then291, + +; RNIT: end165 => end265, end265 => end298, end298, +; RNIT: end165, false239, then245 => end253, end253, else259, true181, then187, else232, +; RNIT: then245, then251, +; RNIT: end265, then291, diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll new file mode 100644 index 0000000..5fa940a --- /dev/null +++ b/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -0,0 +1,44 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc void @compress() nounwind { +end33: + br i1 1, label %end124, label %lor.lhs.false95 + +lor.lhs.false95: + br i1 1, label %then107, label %end172 + +then107: + br i1 1, label %end124, label %then113 + +then113: + br label %end124 + +end124: + br label %exit + +end172: + br label %exit + + +exit: + unreachable + + +} +; CHECK-NOT: => +; CHECK: [0] end33 => <Function Return> +; CHECK-NEXT: [1] end33 => exit +; CHECK-NEXT: [2] then107 => end124 + +; STAT: 3 region - The # of regions + +; BBIT: end33, end124, exit, lor.lhs.false95, then107, then113, end172, +; BBIT: end33, end124, lor.lhs.false95, then107, then113, end172, +; BBIT: then107, then113, + +; RNIT: end33 => exit, exit, +; RNIT: end33, end124, lor.lhs.false95, then107 => end124, end172, +; RNIT: then107, then113, diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll new file mode 100644 index 0000000..098c9b6 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -0,0 +1,26 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"3" +3: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 + +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, + +; RNIT: 0, 1 => 3, 3, +; RNIT: 1, 2, diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll new file mode 100644 index 0000000..1b88596 --- /dev/null +++ b/test/Analysis/RegionInfo/condition_same_exit.ll @@ -0,0 +1,31 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br i1 1, label %"1", label %"4" + +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 4 +; CHECK-NEXT: [2] 1 => 4 +; STAT: 3 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 0, 1, 2, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0 => 4, 4, +; RNIT: 0, 1 => 4, +; RNIT: 1, 2, 3, diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll new file mode 100644 index 0000000..19b154b --- /dev/null +++ b/test/Analysis/RegionInfo/condition_simple.ll @@ -0,0 +1,28 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"4" +3: + br label %"4" +4: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 4 +; STAT: 2 region - The # of regions + +; BBIT: 0, 1, 2, 4, 3, +; BBIT: 1, 2, 3, + +; RNIT: 0, 1 => 4, 4, +; RNIT: 1, 2, 3, diff --git a/test/Transforms/ABCD/dg.exp b/test/Analysis/RegionInfo/dg.exp index f200589..f200589 100644 --- a/test/Transforms/ABCD/dg.exp +++ b/test/Analysis/RegionInfo/dg.exp diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll new file mode 100644 index 0000000..3b152d2 --- /dev/null +++ b/test/Analysis/RegionInfo/exit_in_condition.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %body.i, label %if.end + +body.i: + br i1 1, label %end, label %if.end + +if.end: + br label %if.then64 + +if.then64: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, body.i, end, if.end, if.then64, +; BBIT: outer, body, body.i, if.end, if.then64, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, body.i, if.end, if.then64, diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll new file mode 100644 index 0000000..59cead4 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop.ll @@ -0,0 +1,20 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 4 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll new file mode 100644 index 0000000..80c69b7a --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -0,0 +1,36 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK: [1] 1 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 5, 11, 6, 12, 3, 4, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 1 => 3, 3, 4, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll new file mode 100644 index 0000000..74ceafb --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -0,0 +1,52 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br label %"2" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 3 +; CHECK-NEXT: [1] 7 => 1 +; STAT: 3 region - The # of regions +; STAT: 2 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 12, 3, 4, 8, 9, 13, 10, 14, +; BBIT: 7, 8, 9, 13, 10, 14, +; BBIT: 1, 2, 5, 11, 6, 12, + +; RNIT: 0, 7 => 1, 1 => 3, 3, 4, +; RNIT: 7, 8, 9, 13, 10, 14, +; RNIT: 1, 2, 5, 11, 6, 12, diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll new file mode 100644 index 0000000..fd56af1 --- /dev/null +++ b/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -0,0 +1,48 @@ +; RUN: opt -regions -analyze < %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"7" +7: + br i1 1, label %"1", label %"8" +1: + br i1 1, label %"2", label %"3" +2: + br label %"5" +5: + br i1 1, label %"11", label %"12" +11: + br label %"6" +12: + br label %"6" +6: + br i1 1, label %"2", label %"10" +8: + br label %"9" +9: + br i1 1, label %"13", label %"14" +13: + br label %"10" +14: + br label %"10" +10: + br label %"8" +3: + br label %"4" +4: + ret void +} +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 7 => 3 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, 3, 4, +; BBIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, + +; RNIT: 0, 7 => 3, 3, 4, +; RNIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll new file mode 100644 index 0000000..d1d6898 --- /dev/null +++ b/test/Analysis/RegionInfo/loop_with_condition.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition() nounwind { +0: + br label %"1" +1: + br i1 1, label %"6", label %"2" +2: + br i1 1, label %"3", label %"4" +3: + br label %"5" +4: + br label %"5" +5: + br label %"8" +8: + br i1 1, label %"7", label %"9" +9: + br label %"2" +7: + br label %"6" +6: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 6 +; CHECK-NEXT: [2] 2 => 7 +; CHECK-NEXT: [3] 2 => 5 +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 6, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 1, 2, 3, 5, 8, 7, 9, 4, +; BBIT: 2, 3, 5, 8, 9, 4, +; BBIT: 2, 3, 4, + +; RNIT: 0, 1 => 6, 6, +; RNIT: 1, 2 => 7, 7, +; RNIT: 2 => 5, 5, 8, 9, +; RNIT: 2, 3, 4, diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll new file mode 100644 index 0000000..d4bf3cc --- /dev/null +++ b/test/Analysis/RegionInfo/loops_1.ll @@ -0,0 +1,40 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @loops_1() nounwind { +entry: + br i1 1, label %outer , label %a + +a: + br label %body + +outer: + br label %body + +body: + br i1 1, label %land, label %if + +land: + br i1 1, label %exit, label %end + +exit: + br i1 1, label %if, label %end + +if: + br label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] entry => end +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, land, exit, if, end, a, +; BBIT: entry, outer, body, land, exit, if, a, + +; RNIT: entry => end, end, +; RNIT: entry, outer, body, land, exit, if, a, diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll new file mode 100644 index 0000000..07aa7c3 --- /dev/null +++ b/test/Analysis/RegionInfo/loops_2.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @meread_() nounwind { +entry: + br label %bb23 + +bb23: + br label %bb.i + +bb.i: ; preds = %bb.i, %bb54 + br label %pflini_.exit + +pflini_.exit: ; preds = %bb.i + br label %bb58thread-split + +bb58thread-split: ; preds = %bb64, %bb61, %pflini_.exit + br label %bb58 + +bb58: ; preds = %bb60, %bb58thread-split + br i1 1, label %bb59, label %bb23 + +bb59: ; preds = %bb58 + switch i32 1, label %bb60 [ + i32 1, label %l98 + ] + +bb60: ; preds = %bb59 + br i1 1, label %bb61, label %bb58 + +bb61: ; preds = %bb60 + br label %bb58thread-split + +l98: ; preds = %bb69, %bb59 + ret void +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK: [1] bb23 => l98 +; STAT: 2 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, l98, +; BBIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, + +; RNIT: entry, bb23 => l98, l98, +; RNIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll new file mode 100644 index 0000000..829c157 --- /dev/null +++ b/test/Analysis/RegionInfo/mix_1.ll @@ -0,0 +1,69 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + + br i1 1, label %"1", label %"15" +1: + switch i32 0, label %"2" [ i32 0, label %"3" + i32 1, label %"7"] +2: + br label %"4" +3: + br label %"5" +4: + br label %"6" +5: + br label %"6" +6: + br label %"7" +7: + br label %"15" +15: + br label %"8" +8: + br label %"16" +16: + br label %"9" +9: + br i1 1, label %"10", label %"11" +11: + br i1 1, label %"13", label %"12" +13: + br label %"14" +12: + br label %"14" +14: + br label %"8" +10: + br label %"17" +17: + br label %"18" +18: + ret void +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 0 => 15 +; CHECK-NEXT: [2] 1 => 7 +; CHECK-NEXT: [1] 8 => 10 +; CHECK-NEXT: [2] 11 => 14 +; STAT: 5 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 4, 6, 7, 15, 8, 16, 9, 10, 17, 18, 11, 13, 14, 12, 3, 5, +; BBIT: 0, 1, 2, 4, 6, 7, 3, 5, +; BBIT: 1, 2, 4, 6, 3, 5, +; BBIT: 8, 16, 9, 11, 13, 14, 12, +; BBIT: 11, 13, 12, + +; RNIT: 0 => 15, 15, 8 => 10, 10, 17, 18, +; RNIT: 0, 1 => 7, 7, +; RNIT: 1, 2, 4, 6, 3, 5, +; RNIT: 8, 16, 9, 11 => 14, 14, +; RNIT: 11, 13, 12, diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll new file mode 100644 index 0000000..7bc0e46 --- /dev/null +++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll @@ -0,0 +1,38 @@ +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @normal_condition_0() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb42 [ + i32 67, label %bb42 + i32 90, label %bb41 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb42, bb41, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, + +define void @normal_condition_1() nounwind { +bb38: ; preds = %bb34, %bb34, %bb37 + switch i32 undef, label %bb41 [ + i32 67, label %bb42 + i32 90, label %bb42 + ] +bb41: ; preds = %bb38 + br label %bb42 +bb42: ; preds = %bb38, %bb38, %bb41 + ret void +} + +; BBIT: bb38, bb41, bb42, +; BBIT: bb38, bb41, + +; RNIT: bb38 => bb42, bb42, +; RNIT: bb38, bb41, diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll new file mode 100644 index 0000000..9d8c455 --- /dev/null +++ b/test/Analysis/RegionInfo/nested_loops.ll @@ -0,0 +1,33 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s + +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %exit172, label %end + +exit172: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end + +; STAT: 2 region - The # of regions + +; BBIT: entry, outer, body, exit172, end, +; BBIT: outer, body, exit172, + +; RNIT: entry, outer => end, end, +; RNIT: outer, body, exit172, diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll new file mode 100644 index 0000000..d986387 --- /dev/null +++ b/test/Analysis/RegionInfo/next.ll @@ -0,0 +1,49 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @MAIN__() nounwind { +entry: + br label %__label_002001.outer + +__label_002001.outer: ; preds = %bb236, %bb92 + br label %__label_002001 + +__label_002001: ; preds = %bb229, %__label_002001.outer + br i1 1, label %bb93, label %__label_000020 + +bb93: ; preds = %__label_002001 + br i1 1, label %__label_000020, label %bb197 + +bb197: ; preds = %bb193 + br i1 1, label %bb229, label %bb224 + +bb224: ; preds = %bb223, %bb227 + br i1 1, label %bb229, label %bb224 + +bb229: ; preds = %bb227, %bb223 + br i1 1, label %__label_002001, label %__label_002001.outer + +__label_000020: ; preds = %__label_002001, %bb194 + ret void +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] __label_002001.outer => __label_000020 +; CHECK-NEXT; [2] bb197 => bb229 +; CHECK-NEXT; [3] bb224 => bb229 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, __label_002001.outer, __label_002001, bb93, __label_000020, bb197, bb229, bb224, +; BBIT: __label_002001.outer, __label_002001, bb93, bb197, bb229, bb224, +; BBIT: bb197, bb224, +; BBIT: bb224, + +; RNIT: entry, __label_002001.outer => __label_000020, __label_000020, +; RNIT: __label_002001.outer, __label_002001, bb93, bb197 => bb229, bb229, +; RNIT: bb197, bb224 => bb229, +; RNIT: bb224, diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll new file mode 100644 index 0000000..00b544b --- /dev/null +++ b/test/Analysis/RegionInfo/paper.ll @@ -0,0 +1,55 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define void @a_linear_impl_fig_1() nounwind { +0: + br label %"1" +1: + br label %"2" +2: + br label %"3" +3: + br i1 1, label %"13", label %"4" +4: + br i1 1, label %"5", label %"1" +5: + br i1 1, label %"8", label %"6" +6: + br i1 1, label %"7", label %"4" +7: + ret void +8: + br i1 1, label %"9", label %"1" +9: + br label %"10" +10: + br i1 1, label %"12", label %"11" +11: + br i1 1, label %"9", label %"8" +13: + br i1 1, label %"2", label %"1" +12: + switch i32 0, label %"1" [ i32 0, label %"9" + i32 1, label %"8"] +} + +; CHECK-NOT: => +; CHECK: [0] 0 => <Function Return> +; CHECK-NEXT: [1] 1 => 7 +; CHECK-NEXT: [2] 1 => 4 +; CHECK-NEXT: [2] 8 => 1 + +; STAT: 4 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: 0, 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, 7, +; BBIT: 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, +; BBIT: 1, 2, 3, 13, +; BBIT: 8, 9, 10, 12, 11, + +; RNIT: 0, 1 => 7, 7, +; RNIT: 1 => 4, 4, 5, 8 => 1, 6, +; RNIT: 1, 2, 3, 13, +; RNIT: 8, 9, 10, 12, 11, diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll new file mode 100644 index 0000000..a97182b --- /dev/null +++ b/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -0,0 +1,46 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s +; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s + +define internal fastcc zeroext i8 @handle_compress() nounwind { +entry: + br label %outer + +outer: + br label %body + +body: + br i1 1, label %else, label %true77 + +true77: + br i1 1, label %then83, label %else + +then83: + br label %outer + +else: + br label %else106 + +else106: + br i1 1, label %end, label %outer + +end: + ret i8 1 +} + +; CHECK-NOT: => +; CHECK: [0] entry => <Function Return> +; CHECK-NEXT: [1] outer => end +; CHECK-NEXT: [2] outer => else + +; STAT: 3 region - The # of regions +; STAT: 1 region - The # of simple regions + +; BBIT: entry, outer, body, else, else106, end, true77, then83, +; BBIT: outer, body, else, else106, true77, then83, +; BBIT: outer, body, true77, then83, + +; RNIT: entry, outer => end, end, +; RNIT: outer => else, else, else106, +; RNIT: outer, body, true77, then83, diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index 0bc9ce8..89e8b98 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -indvars -S > %t ; RUN: grep select %t | count 2 -; RUN: grep {icmp ne i32.\* %w } %t +; RUN: grep {icmp ne i32.\* } %t ; Indvars should be able to insert a canonical induction variable ; for the bb6 loop without using a maximum calculation (icmp, select) diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index a8966be..843fb07 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,8 +1,9 @@ -; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {\{%d,+,\[^\{\}\]\*\}<%bb>} +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s ; ScalarEvolution should be able to understand the loop and eliminate the casts. +; CHECK: {%d,+,sizeof(i32)} + define void @foo(i32* nocapture %d, i32 %n) nounwind { entry: %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] @@ -32,3 +33,40 @@ bb1.return_crit_edge: ; preds = %bb1 return: ; preds = %bb1.return_crit_edge, %entry ret void } + +; ScalarEvolution should be able to find the maximum tripcount +; of this multiple-exit loop, and if it doesn't know the exact +; count, it should say so. + +; PR7845 +; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond: max backedge-taken count is 5 + +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=2] + +define i32 @main() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5] + %cmp = icmp slt i32 %g_4.0, 5 ; <i1> [#uses=1] + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = trunc i32 %g_4.0 to i16 ; <i16> [#uses=1] + %tobool.not = icmp eq i16 %conv, 0 ; <i1> [#uses=1] + %tobool3 = icmp ne i32 %g_4.0, 0 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.not, %tobool3 ; <i1> [#uses=1] + br i1 %or.cond, label %for.end, label %for.inc + +for.inc: ; preds = %for.body + %add = add nsw i32 %g_4.0, 1 ; <i32> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.body, %for.cond + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8*, ...) diff --git a/test/Archive/README.txt b/test/Archive/README.txt index da6cfa4..6810bef 100644 --- a/test/Archive/README.txt +++ b/test/Archive/README.txt @@ -5,7 +5,7 @@ This directory contains various tests of llvm-ar and llvm-ranlib to ensure compatibility reading other ar(1) formats. It also provides a basic functionality test for these tools. -There are four archives stored in CVS with these tests: +There are four archives accompanying these tests: GNU.a - constructed on Linux with GNU ar MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar diff --git a/test/Assembler/2010-01-06-UnionType.ll b/test/Assembler/2010-01-06-UnionType.ll deleted file mode 100644 index 37130d6..0000000 --- a/test/Assembler/2010-01-06-UnionType.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llvm-as %s -o /dev/null - -%X = type union { i32, i32* } diff --git a/test/Assembler/align-inst-alloca.ll b/test/Assembler/align-inst-alloca.ll new file mode 100644 index 0000000..0343beb --- /dev/null +++ b/test/Assembler/align-inst-alloca.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + %p = alloca i1, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-load.ll b/test/Assembler/align-inst-load.ll new file mode 100644 index 0000000..3586be2 --- /dev/null +++ b/test/Assembler/align-inst-load.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + load i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst-store.ll b/test/Assembler/align-inst-store.ll new file mode 100644 index 0000000..8c3b712 --- /dev/null +++ b/test/Assembler/align-inst-store.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s -o /dev/null 2>/dev/null + +define void @foo() { + store i1 false, i1* %p, align 1073741824 + ret void +} diff --git a/test/Assembler/align-inst.ll b/test/Assembler/align-inst.ll new file mode 100644 index 0000000..6f7100e --- /dev/null +++ b/test/Assembler/align-inst.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as %s -o /dev/null + +@A = global i1 0, align 536870912 + +define void @foo() { + %p = alloca i1, align 536870912 + load i1* %p, align 536870912 + store i1 false, i1* %p, align 536870912 + ret void +} diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll new file mode 100644 index 0000000..fe23d26 --- /dev/null +++ b/test/Assembler/comment.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s +; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s + +; The bare version of this file should not have any #uses lines. +; BARE: @B = +; BARE-NOT: #uses +; BARE: } + +@B = external global i32 +; ANNOT: @B = external global i32 ; [#uses=0] + +define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + ret <4 x i1> %cmp +} + +; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] + + diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll index 803d6d3..ebef58f 100644 --- a/test/Assembler/getelementptr.ll +++ b/test/Assembler/getelementptr.ll @@ -3,9 +3,9 @@ ; Verify that over-indexed getelementptrs are folded. @A = external global [2 x [3 x [5 x [7 x i32]]]] @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523) -; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; <i32**> [#uses=0] +; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523) -; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; <i32**> [#uses=0] +; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ;; Verify that i16 indices work. @x = external global {i32, i32} diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml index e830106..bf21782 100644 --- a/test/Bindings/Ocaml/analysis.ml +++ b/test/Bindings/Ocaml/analysis.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml index 112ca61..30b07d2 100644 --- a/test/Bindings/Ocaml/bitreader.ml +++ b/test/Bindings/Ocaml/bitreader.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml index ef1c9ab..8eb923e 100644 --- a/test/Bindings/Ocaml/bitwriter.ml +++ b/test/Bindings/Ocaml/bitwriter.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml index 2caeb82..63040e4 100644 --- a/test/Bindings/Ocaml/executionengine.ml +++ b/test/Bindings/Ocaml/executionengine.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t *) open Llvm diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml index f28eff2..8a6af01 100644 --- a/test/Bindings/Ocaml/scalar_opts.ml +++ b/test/Bindings/Ocaml/scalar_opts.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -13,8 +14,11 @@ let context = global_context () let void_type = Llvm.void_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml index 3c3b733..bfaf37c 100644 --- a/test/Bindings/Ocaml/target.ml +++ b/test/Bindings/Ocaml/target.ml @@ -1,4 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t + * RUN: %t %t.bc *) (* Note: It takes several seconds for ocamlopt to link an executable with @@ -8,13 +9,17 @@ open Llvm open Llvm_target + let context = global_context () let i32_type = Llvm.i32_type context let i64_type = Llvm.i64_type context (* Tiny unit test framework - really just to help find which line is busted *) +let print_checkpoints = false + let suite name f = - prerr_endline (name ^ ":"); + if print_checkpoints then + prerr_endline (name ^ ":"); f () diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index 506bf50..e55ab96 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -1,5 +1,5 @@ (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t - * RUN: ./%t %t.bc + * RUN: %t %t.bc * RUN: llvm-dis < %t.bc > %t.ll *) @@ -296,12 +296,6 @@ let test_constants () = insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |]) = (type_of c)); - group "union"; - let t = union_type context [| i1_type; i16_type; i64_type; double_type |] in - let c = const_union t one in - ignore (define_global "const_union" c m); - insist (t = (type_of c)); - (* RUN: grep {const_null.*zeroinit} < %t.ll *) group "null"; @@ -436,7 +430,7 @@ let test_constants () = * RUN: grep {const_select.*select} < %t.ll * RUN: grep {const_extractelement.*extractelement} < %t.ll * RUN: grep {const_insertelement.*insertelement} < %t.ll - * RUN: grep {const_shufflevector.*shufflevector} < %t.ll + * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll *) ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m); ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m); @@ -455,7 +449,8 @@ let test_constants () = ignore (define_global "const_shufflevector" (const_shufflevector (const_vector [| zero; one |]) (const_vector [| one; zero |]) - (const_bitcast foldbomb (vector_type i32_type 2))) m); + (const_vector [| const_int i32_type 0; const_int i32_type 1; + const_int i32_type 2; const_int i32_type 3 |])) m); group "asm"; begin let ft = function_type void_type [| i32_type; i32_type; i32_type |] in @@ -642,11 +637,18 @@ let test_users () = let p1 = param fn 0 in let p2 = param fn 1 in + let a3 = build_alloca i32_type "user_alloca" b in + let p3 = build_load a3 "user_load" b in let i = build_add p1 p2 "sum" b in + insist ((num_operands i) = 2); insist ((operand i 0) = p1); insist ((operand i 1) = p2); + set_operand i 1 p3; + insist ((operand i 1) != p2); + insist ((operand i 1) = p3); + ignore (build_unreachable b) @@ -1154,13 +1156,13 @@ let test_builder () = group "comparisons"; begin (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll - * RUN: grep {%build_icmp_false = fcmp false float %F1, %F2} < %t.ll - * RUN: grep {%build_icmp_true = fcmp true float %F2, %F1} < %t.ll + * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll + * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll *) ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry); ignore (build_icmp Icmp.Sle p2 p1 "build_icmp_sle" atentry); - ignore (build_fcmp Fcmp.False f1 f2 "build_icmp_false" atentry); - ignore (build_fcmp Fcmp.True f2 f1 "build_icmp_true" atentry) + ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry); + ignore (build_fcmp Fcmp.True f2 f1 "build_fcmp_true" atentry) end; group "miscellaneous"; begin @@ -1229,13 +1231,19 @@ let test_builder () = group "dbg"; begin (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll - * RUN: grep {!1 = metadata !\{i32 2, metadata !"dbg test"\}} < %t.ll + * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll *) - let m1 = const_int i32_type 2 in - let m2 = mdstring context "dbg test" in - let md = mdnode context [| m1; m2 |] in + insist ((current_debug_location atentry) = None); + + let m_line = const_int i32_type 2 in + let m_col = const_int i32_type 3 in + let m_scope = mdnode context [| |] in + let m_inlined = mdnode context [| |] in + let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in set_current_debug_location atentry md; + insist ((current_debug_location atentry) = Some md); + let i = build_add p1 p2 "dbg" atentry in insist ((has_metadata i) = true); diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll new file mode 100644 index 0000000..8a87673 --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll @@ -0,0 +1,3 @@ +; This isn't really an assembly file. It just runs test on bitcode to ensure +; it is auto-upgraded. +; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh} diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc Binary files differnew file mode 100644 index 0000000..1abe968 --- /dev/null +++ b/test/Bitcode/AutoUpgradeGlobals.ll.bc diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll new file mode 100644 index 0000000..272cd42 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll @@ -0,0 +1,213 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s + +; vmovls should be auto-upgraded to sext + +; CHECK: vmovls8 +; CHECK-NOT: arm.neon.vmovls.v8i16 +; CHECK: sext <8 x i8> + +; CHECK: vmovls16 +; CHECK-NOT: arm.neon.vmovls.v4i32 +; CHECK: sext <4 x i16> + +; CHECK: vmovls32 +; CHECK-NOT: arm.neon.vmovls.v2i64 +; CHECK: sext <2 x i32> + +; vmovlu should be auto-upgraded to zext + +; CHECK: vmovlu8 +; CHECK-NOT: arm.neon.vmovlu.v8i16 +; CHECK: zext <8 x i8> + +; CHECK: vmovlu16 +; CHECK-NOT: arm.neon.vmovlu.v4i32 +; CHECK: zext <4 x i16> + +; CHECK: vmovlu32 +; CHECK-NOT: arm.neon.vmovlu.v2i64 +; CHECK: zext <2 x i32> + +; vaddl/vaddw should be auto-upgraded to add with sext/zext + +; CHECK: vaddls16 +; CHECK-NOT: arm.neon.vaddls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vaddlu32 +; CHECK-NOT: arm.neon.vaddlu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vaddws8 +; CHECK-NOT: arm.neon.vaddws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: add <8 x i16> + +; CHECK: vaddwu16 +; CHECK-NOT: arm.neon.vaddwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; vsubl/vsubw should be auto-upgraded to subtract with sext/zext + +; CHECK: vsubls16 +; CHECK-NOT: arm.neon.vsubls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vsublu32 +; CHECK-NOT: arm.neon.vsublu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: sub <2 x i64> + +; CHECK: vsubws8 +; CHECK-NOT: arm.neon.vsubws.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sub <8 x i16> + +; CHECK: vsubwu16 +; CHECK-NOT: arm.neon.vsubwu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: sub <4 x i32> + +; vmull should be auto-upgraded to multiply with sext/zext +; (but vmullp should remain an intrinsic) + +; CHECK: vmulls8 +; CHECK-NOT: arm.neon.vmulls.v8i16 +; CHECK: sext <8 x i8> +; CHECK-NEXT: sext <8 x i8> +; CHECK-NEXT: mul <8 x i16> + +; CHECK: vmullu16 +; CHECK-NOT: arm.neon.vmullu.v4i32 +; CHECK: zext <4 x i16> +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: mul <4 x i32> + +; CHECK: vmullp8 +; CHECK: arm.neon.vmullp.v8i16 + +; vmlal should be auto-upgraded to multiply/add with sext/zext + +; CHECK: vmlals32 +; CHECK-NOT: arm.neon.vmlals.v2i64 +; CHECK: sext <2 x i32> +; CHECK-NEXT: sext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: add <2 x i64> + +; CHECK: vmlalu8 +; CHECK-NOT: arm.neon.vmlalu.v8i16 +; CHECK: zext <8 x i8> +; CHECK-NEXT: zext <8 x i8> +; CHECK-NEXT: mul <8 x i16> +; CHECK-NEXT: add <8 x i16> + +; vmlsl should be auto-upgraded to multiply/sub with sext/zext + +; CHECK: vmlsls16 +; CHECK-NOT: arm.neon.vmlsls.v4i32 +; CHECK: sext <4 x i16> +; CHECK-NEXT: sext <4 x i16> +; CHECK-NEXT: mul <4 x i32> +; CHECK-NEXT: sub <4 x i32> + +; CHECK: vmlslu32 +; CHECK-NOT: arm.neon.vmlslu.v2i64 +; CHECK: zext <2 x i32> +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: mul <2 x i64> +; CHECK-NEXT: sub <2 x i64> + +; vaba should be auto-upgraded to vabd + add + +; CHECK: vabas32 +; CHECK-NOT: arm.neon.vabas.v2i32 +; CHECK: arm.neon.vabds.v2i32 +; CHECK-NEXT: add <2 x i32> + +; CHECK: vabaQu8 +; CHECK-NOT: arm.neon.vabau.v16i8 +; CHECK: arm.neon.vabdu.v16i8 +; CHECK-NEXT: add <16 x i8> + +; vabal should be auto-upgraded to vabd with zext + add + +; CHECK: vabals16 +; CHECK-NOT: arm.neon.vabals.v4i32 +; CHECK: arm.neon.vabds.v4i16 +; CHECK-NEXT: zext <4 x i16> +; CHECK-NEXT: add <4 x i32> + +; CHECK: vabalu32 +; CHECK-NOT: arm.neon.vabalu.v2i64 +; CHECK: arm.neon.vabdu.v2i32 +; CHECK-NEXT: zext <2 x i32> +; CHECK-NEXT: add <2 x i64> + +; vabdl should be auto-upgraded to vabd with zext + +; CHECK: vabdls8 +; CHECK-NOT: arm.neon.vabdls.v8i16 +; CHECK: arm.neon.vabds.v8i8 +; CHECK-NEXT: zext <8 x i8> + +; CHECK: vabdlu16 +; CHECK-NOT: arm.neon.vabdlu.v4i32 +; CHECK: arm.neon.vabdu.v4i16 +; CHECK-NEXT: zext <4 x i16> + +; vmovn should be auto-upgraded to trunc + +; CHECK: vmovni16 +; CHECK-NOT: arm.neon.vmovn.v8i8 +; CHECK: trunc <8 x i16> + +; CHECK: vmovni32 +; CHECK-NOT: arm.neon.vmovn.v4i16 +; CHECK: trunc <4 x i32> + +; CHECK: vmovni64 +; CHECK-NOT: arm.neon.vmovn.v2i32 +; CHECK: trunc <2 x i64> + +; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1) + +; CHECK: vld1i8 +; CHECK: i32 1 +; CHECK: vld2Qi16 +; CHECK: i32 1 +; CHECK: vld3i32 +; CHECK: i32 1 +; CHECK: vld4Qf +; CHECK: i32 1 + +; CHECK: vst1i8 +; CHECK: i32 1 +; CHECK: vst2Qi16 +; CHECK: i32 1 +; CHECK: vst3i32 +; CHECK: i32 1 +; CHECK: vst4Qf +; CHECK: i32 1 + +; CHECK: vld2laneQi16 +; CHECK: i32 1 +; CHECK: vld3lanei32 +; CHECK: i32 1 +; CHECK: vld4laneQf +; CHECK: i32 1 + +; CHECK: vst2laneQi16 +; CHECK: i32 1 +; CHECK: vst3lanei32 +; CHECK: i32 1 +; CHECK: vst4laneQf +; CHECK: i32 1 diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc Binary files differnew file mode 100644 index 0000000..cabc3c9 --- /dev/null +++ b/test/Bitcode/neon-intrinsics.ll.bc diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index 6ad09d2..18a31eb 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -1,6 +1,8 @@ ; Test that bugpoint can narrow down the testcase to the important function +; FIXME: This likely fails on windows ; -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; XFAIL: mingw define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll new file mode 100644 index 0000000..f2541ee --- /dev/null +++ b/test/BugPoint/metadata.ll @@ -0,0 +1,35 @@ +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw + +; Bugpoint should keep the call's metadata attached to the call. + +; CHECK: call void @foo(), !dbg !0, !attach !2 +; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1} +; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +; CHECK: !2 = metadata !{metadata !"the call to foo"} + +%rust_task = type {} +define void @test(i32* %a, i8* %b) { + %s = mul i8 22, 9, !attach !0, !dbg !10 + store i8 %s, i8* %b, !attach !1, !dbg !11 + call void @foo(), !attach !2, !dbg !12 + store i32 7, i32* %a, !attach !3, !dbg !13 + %t = add i32 0, 5, !attach !4, !dbg !14 + ret void +} + +declare void @foo() + +!0 = metadata !{metadata !"boring"} +!1 = metadata !{metadata !"uninteresting"} +!2 = metadata !{metadata !"the call to foo"} +!3 = metadata !{metadata !"noise"} +!4 = metadata !{metadata !"filler"} + +!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0} +!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9} +!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9} +!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9} +!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9} +!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9} diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 439ea54..791ec69 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,5 +1,7 @@ -; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes +; FIXME: This likely fails on windows +; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; XFAIL: mingw ; Test to make sure that arguments are removed from the function if they are ; unnecessary. And clean up any types that that frees up too. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 433af90..ad9a243 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,6 +24,23 @@ endif() include(FindPythonInterp) if(PYTHONINTERP_FOUND) + get_directory_property(DEFINITIONS COMPILE_DEFINITIONS) + foreach(DEF ${DEFINITIONS}) + set(DEFS "${DEFS} -D${DEF}") + endforeach() + get_directory_property(INC_DIRS INCLUDE_DIRECTORIES) + foreach(INC_DIR ${INC_DIRS}) + set(IDIRS "${IDIRS} -I${INC_DIR}") + endforeach() + string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + string(REPLACE "<DEFINES>" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "<FLAGS>" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}") + if(NOT MSVC) + set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++") + endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in ${CMAKE_CURRENT_BINARY_DIR}/site.exp) diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 81483cb..ee63656 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] +; CHECK: dct_luma_sp: define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { entry: +; Make sure to use base-updating stores for saving callee-saved registers. +; CHECK-NOT: sub sp +; CHECK: vstmdb sp! %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1] br label %cond_next489 diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll index d741112..76fa364 100644 --- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll @@ -2,7 +2,7 @@ ; PR1266 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 } %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] } %struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] } diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll index 030486a..7ba2a19 100644 --- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll @@ -2,7 +2,7 @@ ; PR1424 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* } %struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* } %struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 } diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll index 198faeb..f89a5de 100644 --- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll +++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll @@ -17,3 +17,17 @@ entry: store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16 ret void } + +; Radar 8290937: Ignore undef shuffle indices. +; CHECK: t2 +; CHECK: vtrn.16 +define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind { +entry: + %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> + %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0 + store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16 + %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0 + store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16 + ret void +} diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index ff60fa8..e47c038 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -5,32 +5,32 @@ %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1] %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1] - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1] - %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] - %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1] %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1] - %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1] %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1] %tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd) + call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1) %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1] %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1] @@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1] %tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1] %tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1) %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1) ret <8 x i8> %tmp4 } diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index ce959d1..cd1c9c8 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -36,8 +36,8 @@ entry: %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 %19 = fmul <4 x float> %tmp5, %2 %20 = bitcast float* %fltp to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19) + tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll index e4f2099..6f48796 100644 --- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -12,8 +12,8 @@ entry: %tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1] %tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1] %tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i) nounwind + tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll index 7650d88..ac8e809 100755 --- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll +++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi | FileCheck %s ; PR 7433 +; XFAIL: * %0 = type { i8*, i8* } %1 = type { i8*, i8*, i8* } diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index 0c5b180..ffc47eb 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10" define i32 @test(i8* %arg) nounwind { entry: - %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg) + %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1) %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2> store <2 x i64> %1, <2 x i64>* undef, align 16 ret i32 undef } -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll new file mode 100644 index 0000000..c03c815 --- /dev/null +++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll @@ -0,0 +1,95 @@ +; RUN: llc -enable-correct-eh-support < %s +; PR7716 +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +%0 = type { i8*, i8* } +%struct.A = type { i32 } + +@d = internal global i32 0, align 4 ; <i32*> [#uses=6] +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1A = internal constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1] +@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1] +@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1] +@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1] +@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1] +@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1] +@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1] +@str1 = internal constant [8 x i8] c"Caught.\00" ; <[8 x i8]*> [#uses=1] + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i8* @__cxa_allocate_exception(i32) + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_sj0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare i32 @llvm.eh.typeid.for(i8*) nounwind + +declare void @_Unwind_SjLj_Resume(i8*) + +define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 { +entry: + %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp2.i = load i32* %tmp.i ; <i32> [#uses=1] + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0] + %tmp3.i = load i32* @d ; <i32> [#uses=1] + %inc.i = add nsw i32 %tmp3.i, 1 ; <i32> [#uses=1] + store i32 %inc.i, i32* @d + ret void +} + +declare void @__cxa_throw(i8*, i8*, i8*) + +define i32 @main() ssp { +entry: + %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0] + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2] + %tmp2.i.i.i = bitcast i8* %exception.i to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp2.i.i.i + %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0] + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn + to label %.noexc unwind label %lpad + +.noexc: ; preds = %entry + unreachable + +try.cont: ; preds = %lpad + %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0] + %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0] + %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0] + %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0] + %tmp3.i.i = load i32* @d ; <i32> [#uses=1] + %inc.i.i4 = add nsw i32 %tmp3.i.i, 1 ; <i32> [#uses=1] + store i32 %inc.i.i4, i32* @d + tail call void @__cxa_end_catch() + %tmp13 = load i32* @d ; <i32> [#uses=1] + %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0] + %tmp16 = load i32* @d ; <i32> [#uses=1] + %cmp = icmp ne i32 %tmp16, 2 ; <i1> [#uses=1] + %conv = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv + +lpad: ; preds = %entry + %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4] + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1] + %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1] + %3 = icmp eq i32 %eh.selector, %2 ; <i1> [#uses=1] + br i1 %3, label %try.cont, label %eh.resume + +eh.resume: ; preds = %lpad + tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn + unreachable +} + +declare i8* @__cxa_get_exception_ptr(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll new file mode 100644 index 0000000..f57b7e6 --- /dev/null +++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 +; <rdar://problem/8264008> + +define linkonce_odr arm_apcscc void @func1() { +entry: + %save_filt.936 = alloca i32 ; <i32*> [#uses=2] + %save_eptr.935 = alloca i8* ; <i8**> [#uses=2] + %eh_exception = alloca i8* ; <i8**> [#uses=5] + %eh_selector = alloca i32 ; <i32*> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call arm_apcscc void @func2() + br label %return + +bb: ; No predecessors! + %eh_select = load i32* %eh_selector ; <i32> [#uses=1] + store i32 %eh_select, i32* %save_filt.936, align 4 + %eh_value = load i8** %eh_exception ; <i8*> [#uses=1] + store i8* %eh_value, i8** %save_eptr.935, align 4 + invoke arm_apcscc void @func3() + to label %invcont unwind label %lpad + +invcont: ; preds = %bb + %tmp6 = load i8** %save_eptr.935, align 4 ; <i8*> [#uses=1] + store i8* %tmp6, i8** %eh_exception, align 4 + %tmp7 = load i32* %save_filt.936, align 4 ; <i32> [#uses=1] + store i32 %tmp7, i32* %eh_selector, align 4 + br label %Unwind + +bb12: ; preds = %ppad + call arm_apcscc void @_ZSt9terminatev() noreturn nounwind + unreachable + +return: ; preds = %entry + ret void + +lpad: ; preds = %bb + %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1] + store i8* %eh_ptr, i8** %eh_exception + %eh_ptr13 = load i8** %eh_exception ; <i8*> [#uses=1] + %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1) + store i32 %eh_select14, i32* %eh_selector + br label %ppad + +ppad: + br label %bb12 + +Unwind: + %eh_ptr15 = load i8** %eh_exception + call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr15) + unreachable +} + +declare arm_apcscc void @func2() + +declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare arm_apcscc void @_Unwind_SjLj_Resume(i8*) + +declare arm_apcscc void @func3() + +declare arm_apcscc i32 @__gxx_personality_sj0(...) diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index cc71839..bb7853e 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -1,11 +1,43 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN -define i32 @f(i32 %a, i64 %b) { +define i32 @f1(i32 %a, i64 %b) { +; ELF: f1: ; ELF: mov r0, r2 +; DARWIN: f1: ; DARWIN: mov r0, r1 - %tmp = call i32 @g(i64 %b) + %tmp = call i32 @g1(i64 %b) ret i32 %tmp } -declare i32 @g(i64) +; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi. +define i32 @f2() nounwind optsize { +; ELF: f2: +; ELF: mov r0, #128 +; ELF: str r0, [sp] +; DARWIN: f2: +; DARWIN: mov r3, #128 +entry: + %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1] + %not. = icmp ne i32 %0, 128 ; <i1> [#uses=1] + %.0 = zext i1 %not. to i32 ; <i32> [#uses=1] + ret i32 %.0 +} + +; test that on gnueabi a 64 bit value at this position will cause r3 to go +; unused and the value stored in [sp] +; ELF: f3: +; ELF: ldr r0, [sp] +; ELF-NEXT: mov pc, lr +; DARWIN: f3: +; DARWIN: mov r0, r3 +; DARWIN-NEXT: mov pc, lr +define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) { +entry: + %0 = trunc i64 %l to i32 + ret i32 %0 +} + +declare i32 @g1(i64) + +declare i32 @g2(i32 %i, ...) diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll new file mode 100644 index 0000000..59e2b43 --- /dev/null +++ b/test/CodeGen/ARM/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: mov r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: mov r1, r1, lsr #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: mov r2, r0, lsr #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index f1269d5..db5afe3 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ ; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll new file mode 100644 index 0000000..25c5568 --- /dev/null +++ b/test/CodeGen/ARM/code-placement.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; PHI elimination shouldn't break backedge. +; rdar://8263994 + +%struct.list_data_s = type { i16, i16 } +%struct.list_head = type { %struct.list_head*, %struct.list_data_s* } + +define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind { +entry: + %0 = icmp eq %struct.list_head* %list, null + br i1 %0, label %bb2, label %bb + +bb: +; CHECK: LBB0_2: +; CHECK: bne LBB0_2 +; CHECK-NOT: b LBB0_2 +; CHECK: bx lr + %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] + %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] + %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0 + %2 = load %struct.list_head** %1, align 4 + store %struct.list_head* %next.04, %struct.list_head** %1, align 4 + %3 = icmp eq %struct.list_head* %2, null + br i1 %3, label %bb2, label %bb + +bb2: + %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ] + ret %struct.list_head* %next.0.lcssa +} diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index d833afa..448b437 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,13 +1,9 @@ ; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM -; RUN: llc < %s -march=arm -mcpu=cortex-m3 \ -; RUN: | FileCheck %s -check-prefix=CHECK-ARMV7M define i32 @f1(i32 %a, i32 %b) { entry: ; CHECK-ARM: f1 ; CHECK-ARM: __divsi3 -; CHECK-ARMV7M: f1 -; CHECK-ARMV7M: sdiv %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -16,8 +12,6 @@ define i32 @f2(i32 %a, i32 %b) { entry: ; CHECK-ARM: f2 ; CHECK-ARM: __udivsi3 -; CHECK-ARMV7M: f2 -; CHECK-ARMV7M: udiv %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -26,8 +20,6 @@ define i32 @f3(i32 %a, i32 %b) { entry: ; CHECK-ARM: f3 ; CHECK-ARM: __modsi3 -; CHECK-ARMV7M: f3 -; CHECK-ARMV7M: sdiv %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } @@ -36,8 +28,6 @@ define i32 @f4(i32 %a, i32 %b) { entry: ; CHECK-ARM: f4 ; CHECK-ARM: __umodsi3 -; CHECK-ARMV7M: f4 -; CHECK-ARMV7M: udiv %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll new file mode 100644 index 0000000..3bee84d --- /dev/null +++ b/test/CodeGen/ARM/fast-isel.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin +; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin + +; Very basic fast-isel functionality. + +define i32 @add(i32 %a, i32 %b) nounwind ssp { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr + store i32 %b, i32* %b.addr + %tmp = load i32* %a.addr + %tmp1 = load i32* %b.addr + %add = add nsw i32 %tmp, %tmp1 + ret i32 %add +} + +define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind { +entry: + %r = load i32* %p + %s = load i32* %q + %y = load i32** %z + br label %fast + +fast: + %t0 = add i32 %r, %s + %t1 = mul i32 %t0, %s + %t2 = sub i32 %t1, %s + %t3 = and i32 %t2, %s + %t4 = xor i32 %t3, 3 + %t5 = xor i32 %t4, %s + %t6 = add i32 %t5, 2 + %t7 = getelementptr i32* %y, i32 1 + %t8 = getelementptr i32* %t7, i32 %t6 + br label %exit + +exit: + ret i32* %t8 +} diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll index efd87d2..3223885 100644 --- a/test/CodeGen/ARM/fnmuls.ll +++ b/test/CodeGen/ARM/fnmuls.ll @@ -1,20 +1,18 @@ -; XFAIL: * ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -define float @test1(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fsub float -0.0, %0 ret float %1 } -define float @test2(float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind { +; CHECK: vnmul.f32 s0, s0, s1 entry: %0 = fmul float %a, %b %1 = fmul float -1.0, %0 diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 6875288..6435059 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s ; rdar://7461510 diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll index 7f9d62a..5614637 100644 --- a/test/CodeGen/ARM/fpowi.ll +++ b/test/CodeGen/ARM/fpowi.ll @@ -3,7 +3,7 @@ ; ModuleID = '<stdin>' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "arm-linux-gnueabi" +target triple = "arm-unknown-linux-gnueabi" define double @_ZSt3powdi(double %__x, i32 %__i) { entry: diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 688b7bc..1ec4d15 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,8 +24,7 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, asr r2 %a = ashr i64 %x, %y @@ -37,8 +36,7 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: mov r0, r0, lsr r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: sub r2, r2, #32 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r2, r2, #32 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, lsr r2 %a = lshr i64 %x, %y diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 25cf135..866be42 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r9, #-128] -; CHECK: vstr.32 s0, [r9, #-96] -; CHECK: vstr.32 s0, [r9, #-64] -; CHECK: vstr.32 s0, [r9, #-32] -; CHECK: vstr.32 s0, [r9] -; CHECK: vstr.32 s0, [r9, #32] -; CHECK: vstr.32 s0, [r9, #64] -; CHECK: vstr.32 s0, [r9, #96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64] +; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -628,8 +628,7 @@ bb24: ; preds = %bb23 ; CHECK: @ %bb24 ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1 -; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0 +; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1 ; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll index 1e2e7aa..4905dc2 100644 --- a/test/CodeGen/ARM/pack.ll +++ b/test/CodeGen/ARM/pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=arm -mattr=+v6 | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 89b6577..2e4f10d 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -23,21 +23,21 @@ entry: %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1] %3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2] %7 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1] %8 = bitcast double %7 to <4 x i16> ; <<4 x i16>> [#uses=1] - %9 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %8) ; <<4 x i32>> [#uses=1] + %9 = sext <4 x i16> %8 to <4 x i32> ; <<4 x i32>> [#uses=1] %10 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1] %11 = bitcast double %10 to <4 x i16> ; <<4 x i16>> [#uses=1] - %12 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %11) ; <<4 x i32>> [#uses=1] + %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1] %13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1] %14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1] %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1] %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1] %18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1) ret void } @@ -45,10 +45,10 @@ define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vld1.16 ; CHECK: vmul.i16 +; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vst1.16 ; CHECK: vst1.16 @@ -57,17 +57,17 @@ entry: %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] %3 = load <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1] %4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1] - %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1] + %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1] %6 = getelementptr inbounds i16* %i_ptr, i32 8 ; <i16*> [#uses=1] %7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1] - %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7) ; <<8 x i16>> [#uses=1] + %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1] %9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1] %10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1] %11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1) %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1] %13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10) + tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1) ret void } @@ -77,14 +77,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind { ; CHECK: vmul.i8 ; CHECK-NOT: vmov ; CHECK: vst3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1] %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1] %tmp5 = sub <8 x i8> %tmp3, %tmp4 %tmp6 = add <8 x i8> %tmp2, %tmp3 ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> %tmp4, %tmp2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1) ret <8 x i8> %tmp4 } @@ -97,10 +97,10 @@ entry: ; CHECK-NOT: vmov ; CHECK: bne %tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1] - %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp3 = getelementptr inbounds i32* %in, i32 8 ; <i32*> [#uses=1] %tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1] - %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1] br i1 undef, label %return1, label %return2 @@ -116,7 +116,7 @@ return1: %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp6 = add <4 x i32> %tmp52, %tmp ; <<4 x i32>> [#uses=1] %tmp7 = add <4 x i32> %tmp57, %tmp39 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1) ret void return2: @@ -128,7 +128,7 @@ return2: %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101) + tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1) call void @llvm.trap() unreachable } @@ -143,7 +143,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1] %tmp1 = load <8 x i16>* %B ; <<8 x i16>> [#uses=2] - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1] %tmp5 = add <8 x i16> %tmp3, %tmp4 ; <<8 x i16>> [#uses=1] @@ -156,7 +156,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: vmov d1, d0 ; CHECK-NEXT: vld2.8 {d0[1], d1[1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1] %tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1] @@ -174,14 +174,14 @@ entry: ; CHECK: vuzp.32 q0, q1 ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2] - %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2] + %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1] %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1] %2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2] - tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60) - %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1] + tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1) + %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1] %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4) + tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1) ret void } @@ -304,44 +304,43 @@ bb14: ; preds = %bb6 ; This test crashes the coalescer because live variables were not updated properly. define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2efgh = mul <8 x i8> %tmp2ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh) + call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1) %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd ; <<8 x i8>> [#uses=1] %tmp7 = mul <8 x i8> undef, %tmp2 ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7) + tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1) ret <8 x i8> undef } -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly - -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) +nounwind -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 1e780e6..6b86f1a 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization" +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization" define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind { entry: diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 29c55c6..7413bed 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON define i32 @f1(i32 %a.s) { ;CHECK: f1: @@ -65,3 +66,27 @@ define double @f7(double %a, double %b) { %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 } + +; <rdar://problem/7260094> +; +; We used to generate really horrible code for this function. The main cause was +; a lack of a custom lowering routine for an ISD::SELECT. This would result in +; two "it" blocks in the code: one for the "icmp" and another to move the index +; into the constant pool based on the value of the "icmp". If we have one "it" +; block generated, odds are good that we have close to the ideal code for this: +; +; CHECK-NEON: _f8: +; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123 +; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0 +; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]] +; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI +; CHECK-NEON-NEXT: it eq +; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4 +; CHECK-NEON-NEXT: ldr +; CHECK-NEON: bx + +define arm_apcscc float @f8(i32 %a) nounwind { + %tmp = icmp eq i32 %a, 1123 + %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 + ret float %tmp1 +} diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index 792ef79..ae1ba2f 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}sp, :128 ; CHECK: vld1.64 {{.*}}sp, :128 entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll index 848a4df..8b41459 100644 --- a/test/CodeGen/ARM/t2-imm.ll +++ b/test/CodeGen/ARM/t2-imm.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f6(i32 %a) { ; CHECK:f6 -; CHECK: movw r0, #:lower16:65537123 -; CHECK: movt r0, #:upper16:65537123 +; CHECK: movw r0, #1123 +; CHECK: movt r0, #1000 %tmp = add i32 0, 65537123 ret i32 %tmp } diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll index e2dca46..4fe1c43 100644 --- a/test/CodeGen/ARM/vaba.ll +++ b/test/CodeGen/ARM/vaba.ll @@ -6,8 +6,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -16,8 +17,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -26,8 +28,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -36,8 +39,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i8> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = add <8 x i8> %tmp1, %tmp4 + ret <8 x i8> %tmp5 } define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -46,8 +50,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i16> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = add <4 x i16> %tmp1, %tmp4 + ret <4 x i16> %tmp5 } define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -56,8 +61,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i32> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = add <2 x i32> %tmp1, %tmp4 + ret <2 x i32> %tmp5 } define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -66,8 +72,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -76,8 +83,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -86,8 +94,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { @@ -96,8 +105,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C - %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3) - ret <16 x i8> %tmp4 + %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) + %tmp5 = add <16 x i8> %tmp1, %tmp4 + ret <16 x i8> %tmp5 } define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { @@ -106,8 +116,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) + %tmp5 = add <8 x i16> %tmp1, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { @@ -116,25 +127,26 @@ define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) + %tmp5 = add <4 x i32> %tmp1, %tmp4 + ret <4 x i32> %tmp5 } -declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { ;CHECK: vabals8: @@ -142,8 +154,10 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -152,8 +166,10 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -162,8 +178,10 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -172,8 +190,10 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) + %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> + %tmp6 = add <8 x i16> %tmp1, %tmp5 + ret <8 x i16> %tmp6 } define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -182,8 +202,10 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) + %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> + %tmp6 = add <4 x i32> %tmp1, %tmp5 + ret <4 x i32> %tmp6 } define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -192,14 +214,8 @@ define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) + %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> + %tmp6 = add <2 x i64> %tmp1, %tmp5 + ret <2 x i64> %tmp6 } - -declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll index 2b45393..9ec734f 100644 --- a/test/CodeGen/ARM/vabd.ll +++ b/test/CodeGen/ARM/vabd.ll @@ -151,8 +151,9 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -160,8 +161,9 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -169,8 +171,9 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -178,8 +181,9 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vabdl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -187,8 +191,9 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vabdl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -196,14 +201,7 @@ define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vabdl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index 9bb8bf5..a830e96 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -157,8 +157,10 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddws8: ;CHECK: vaddw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vaddw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = add <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vaddw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = add <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vaddw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = add <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index c11a67c..e460a84 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd_undef: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq_undef: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6> + ret <16 x i8> %tmp3 +} + diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c61ea8c9..2488e8a 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -3,7 +3,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: ;CHECK: vld1.8 - %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A) + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1) ret <8 x i8> %tmp1 } @@ -11,7 +11,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;CHECK: vld1i16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0) + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) ret <4 x i16> %tmp1 } @@ -19,7 +19,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;CHECK: vld1i32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0) + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) ret <2 x i32> %tmp1 } @@ -27,7 +27,7 @@ define <2 x float> @vld1f(float* %A) nounwind { ;CHECK: vld1f: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0) + %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1) ret <2 x float> %tmp1 } @@ -35,14 +35,14 @@ define <1 x i64> @vld1i64(i64* %A) nounwind { ;CHECK: vld1i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0) + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1) ret <1 x i64> %tmp1 } define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;CHECK: vld1Qi8: ;CHECK: vld1.8 - %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A) + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1) ret <16 x i8> %tmp1 } @@ -50,7 +50,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind { ;CHECK: vld1Qi16: ;CHECK: vld1.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0) + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1) ret <8 x i16> %tmp1 } @@ -58,7 +58,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind { ;CHECK: vld1Qi32: ;CHECK: vld1.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0) + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1) ret <4 x i32> %tmp1 } @@ -66,7 +66,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind { ;CHECK: vld1Qf: ;CHECK: vld1.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0) + %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1) ret <4 x float> %tmp1 } @@ -74,18 +74,31 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind { ;CHECK: vld1Qi64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0) + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1) ret <2 x i64> %tmp1 } -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly + +; Radar 8355607 +; Do not crash if the vld1 result is not used. +define void @unused_vld1_result() { +entry: +;CHECK: unused_vld1_result +;CHECK: vld1.32 + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index 0838636..811f6e6 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind { ;CHECK: vld2i16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind { ;CHECK: vld2i32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld2f(float* %A) nounwind { ;CHECK: vld2f: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { ;CHECK: vld2i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -68,7 +68,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { define <16 x i8> @vld2Qi8(i8* %A) nounwind { ;CHECK: vld2Qi8: ;CHECK: vld2.8 - %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -79,7 +79,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind { ;CHECK: vld2Qi16: ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -90,7 +90,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind { ;CHECK: vld2Qi32: ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -101,20 +101,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind { ;CHECK: vld2Qf: ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 65a2448..92538c3 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;CHECK: vld3i16: ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind { ;CHECK: vld3i32: ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld3f(float* %A) nounwind { ;CHECK: vld3f: ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ;CHECK: vld3i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK: vld3Qi8: ;CHECK: vld3.8 ;CHECK: vld3.8 - %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind { ;CHECK: vld3.16 ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index e0b8706..d1bf957 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A) + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -25,7 +25,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind { ;CHECK: vld4i16: ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -36,7 +36,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind { ;CHECK: vld4i32: ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -47,7 +47,7 @@ define <2 x float> @vld4f(float* %A) nounwind { ;CHECK: vld4f: ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -58,7 +58,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { ;CHECK: vld4i64: ;CHECK: vld1.64 %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0) + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -69,7 +69,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK: vld4Qi8: ;CHECK: vld4.8 ;CHECK: vld4.8 - %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A) + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1) %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -81,7 +81,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind { ;CHECK: vld4.16 ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0) + %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -93,7 +93,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0) + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -105,20 +105,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind { ;CHECK: vld4.32 ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0) + %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly -declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index b32c590..31ee64f 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -13,7 +13,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld2lanei8: ;CHECK: vld2.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 %tmp5 = add <8 x i8> %tmp3, %tmp4 @@ -25,7 +25,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 %tmp5 = add <4 x i16> %tmp3, %tmp4 @@ -37,7 +37,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 %tmp5 = add <2 x i32> %tmp3, %tmp4 @@ -49,7 +49,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 %tmp5 = fadd <2 x float> %tmp3, %tmp4 @@ -61,7 +61,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 %tmp5 = add <8 x i16> %tmp3, %tmp4 @@ -73,7 +73,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 %tmp5 = add <4 x i32> %tmp3, %tmp4 @@ -85,21 +85,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } -declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -114,7 +114,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld3lanei8: ;CHECK: vld3.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 @@ -128,7 +128,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 @@ -142,7 +142,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 @@ -156,7 +156,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 @@ -170,7 +170,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 @@ -184,7 +184,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3) + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 @@ -198,7 +198,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 @@ -207,14 +207,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp7 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @@ -229,7 +229,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld4lanei8: ;CHECK: vld4.8 %tmp1 = load <8 x i8>* %B - %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 @@ -245,7 +245,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 @@ -261,7 +261,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 @@ -277,7 +277,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 @@ -293,7 +293,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vld4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 @@ -309,7 +309,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 @@ -325,7 +325,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vld4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 @@ -336,11 +336,11 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp9 } -declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly -declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly -declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly -declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll index 77cf10a..9c6b210 100644 --- a/test/CodeGen/ARM/vmla.ll +++ b/test/CodeGen/ARM/vmla.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = add <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = add <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = add <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlal_lanes16 ; CHECK: vmlal.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlal_lanes32 ; CHECK: vmlal.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlal_laneu16 ; CHECK: vmlal.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = add <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlal_laneu32 ; CHECK: vmlal.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = add <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll index 2b70a78..65e7fe4 100644 --- a/test/CodeGen/ARM/vmls.ll +++ b/test/CodeGen/ARM/vmls.ll @@ -94,8 +94,11 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -104,8 +107,11 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -114,8 +120,11 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { @@ -124,8 +133,11 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C - %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) - ret <8 x i16> %tmp4 + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> + %tmp6 = mul <8 x i16> %tmp4, %tmp5 + %tmp7 = sub <8 x i16> %tmp1, %tmp6 + ret <8 x i16> %tmp7 } define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { @@ -134,8 +146,11 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C - %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) - ret <4 x i32> %tmp4 + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> + %tmp6 = mul <4 x i32> %tmp4, %tmp5 + %tmp7 = sub <4 x i32> %tmp1, %tmp6 + ret <4 x i32> %tmp7 } define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { @@ -144,8 +159,11 @@ define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C - %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) - ret <2 x i64> %tmp4 + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> + %tmp6 = mul <2 x i64> %tmp4, %tmp5 + %tmp7 = sub <2 x i64> %tmp1, %tmp6 + ret <2 x i64> %tmp7 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { @@ -153,8 +171,11 @@ entry: ; CHECK: test_vmlsl_lanes16 ; CHECK: vmlsl.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_int32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { @@ -162,8 +183,11 @@ entry: ; CHECK: test_vmlsl_lanes32 ; CHECK: vmlsl.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_int64x2_t, %3 + ret <2 x i64> %4 } define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { @@ -171,8 +195,11 @@ entry: ; CHECK: test_vmlsl_laneu16 ; CHECK: vmlsl.u16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + %4 = sub <4 x i32> %arg0_uint32x4_t, %3 + ret <4 x i32> %4 } define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { @@ -180,14 +207,9 @@ entry: ; CHECK: test_vmlsl_laneu32 ; CHECK: vmlsl.u32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + %4 = sub <2 x i64> %arg0_uint64x2_t, %3 + ret <2 x i64> %4 } - -declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index 5e872ab..8cd9457 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -192,7 +192,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { ;CHECK: vmovls8: ;CHECK: vmovl.s8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1) + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -200,7 +200,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { ;CHECK: vmovls16: ;CHECK: vmovl.s16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1) + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -208,7 +208,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { ;CHECK: vmovls32: ;CHECK: vmovl.s32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1) + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } @@ -216,7 +216,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { ;CHECK: vmovlu8: ;CHECK: vmovl.u8 %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1) + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> ret <8 x i16> %tmp2 } @@ -224,7 +224,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { ;CHECK: vmovlu16: ;CHECK: vmovl.u16 %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1) + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } @@ -232,23 +232,15 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { ;CHECK: vmovlu32: ;CHECK: vmovl.u32 %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1) + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> ret <2 x i64> %tmp2 } -declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone - define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { ;CHECK: vmovni16: ;CHECK: vmovn.i16 %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) + %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 } @@ -256,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { ;CHECK: vmovni32: ;CHECK: vmovn.i32 %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) + %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> ret <4 x i16> %tmp2 } @@ -264,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { ;CHECK: vmovni64: ;CHECK: vmovn.i64 %tmp1 = load <2 x i64>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) + %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone - define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { ;CHECK: vqmovns16: ;CHECK: vqmovn.s16 diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 1d91680..5383425 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -152,8 +152,10 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -161,8 +163,10 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -170,8 +174,10 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -179,8 +185,10 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmull.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = mul <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -188,8 +196,10 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmull.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = mul <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -197,8 +207,10 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmull.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = mul <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -215,8 +227,10 @@ entry: ; CHECK: test_vmull_lanes16 ; CHECK: vmull.s16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32> + %2 = sext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { @@ -224,8 +238,10 @@ entry: ; CHECK: test_vmull_lanes32 ; CHECK: vmull.s32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64> + %2 = sext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { @@ -233,8 +249,10 @@ entry: ; CHECK: test_vmull_laneu16 ; CHECK: vmull.u16 q0, d0, d1[1] %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] - %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %1 + %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32> + %2 = zext <4 x i16> %0 to <4 x i32> + %3 = mul <4 x i32> %1, %2 + ret <4 x i32> %3 } define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { @@ -242,16 +260,10 @@ entry: ; CHECK: test_vmull_laneu32 ; CHECK: vmull.u32 q0, d0, d1[1] %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] - %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %1 + %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64> + %2 = zext <2 x i32> %0 to <2 x i64> + %3 = mul <2 x i64> %1, %2 + ret <2 x i64> %3 } -declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index deed554..e1fe64b 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> ret <16 x i8> %tmp2 } + +; Undef shuffle indices should not prevent matching to VREV: + +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8_undef: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %tmp2 +} + +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16_undef: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef> + ret <8 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index 95414c3..2b535ad 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst1i8: ;CHECK: vst1.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1) + call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1Qi8: ;CHECK: vst1.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst1.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) ret void } @@ -75,7 +75,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1) + call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) ret void } @@ -84,18 +84,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <2 x i64>* %B - call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1) + call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 3c98a2c..aed15fd 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2i8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -48,7 +48,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst2Qi8: ;CHECK: vst2.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -57,7 +57,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -66,7 +66,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -75,17 +75,17 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index 2599bc0..1feaed5 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst3.8 ;CHECK: vst3.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index 878f0ef..d302f09 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4i8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst1.64 %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B - call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst4.8 ;CHECK: vst4.8 %tmp1 = load <16 x i8>* %B - call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1) ret void } @@ -59,7 +59,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ret void } @@ -69,7 +69,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) ret void } @@ -79,17 +79,17 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index cf50756..30ec52a 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -4,7 +4,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2lanei8: ;CHECK: vst2.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -13,7 +13,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -22,7 +22,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -31,7 +31,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -40,7 +40,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ret void } @@ -49,7 +49,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -58,24 +58,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst2.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3) + call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1) ret void } -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3lanei8: ;CHECK: vst3.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -84,7 +84,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -93,7 +93,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -102,7 +102,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -111,7 +111,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst3.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6) + call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1) ret void } @@ -120,7 +120,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0) + call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1) ret void } @@ -129,25 +129,25 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst3.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4lanei8: ;CHECK: vst4.8 %tmp1 = load <8 x i8>* %B - call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ret void } @@ -156,7 +156,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B - call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) ret void } @@ -165,7 +165,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B - call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) ret void } @@ -174,7 +174,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <2 x float>* %B - call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) ret void } @@ -183,7 +183,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4.16 %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B - call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7) + call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1) ret void } @@ -192,7 +192,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B - call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ret void } @@ -201,15 +201,15 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst4.32 %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B - call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) ret void } -declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind -declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 3416de7..df77bb3 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -157,8 +157,10 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -166,8 +168,10 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -175,8 +179,10 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { @@ -184,8 +190,10 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp5 = sub <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { @@ -193,8 +201,10 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp5 = sub <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { @@ -202,25 +212,20 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp5 = sub <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 } -declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone - define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubws8: ;CHECK: vsubw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -228,8 +233,9 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -237,8 +243,9 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { @@ -246,8 +253,9 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { ;CHECK: vsubw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) - ret <8 x i16> %tmp3 + %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> + %tmp4 = sub <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 } define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { @@ -255,8 +263,9 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { ;CHECK: vsubw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) - ret <4 x i32> %tmp3 + %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> + %tmp4 = sub <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 } define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { @@ -264,14 +273,7 @@ define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { ;CHECK: vsubw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) - ret <2 x i64> %tmp3 + %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> + %tmp4 = sub <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 } - -declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone - -declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 10bb10a..b1c2f93 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VTRN: + +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8_undef: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16_undef: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 6cef188..9130f62 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VUZP: + +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8_undef: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16_undef: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a9ecdca..926970a 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VZIP: + +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll new file mode 100644 index 0000000..b838ec9 --- /dev/null +++ b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=alpha | FileCheck %s + +define fastcc i64 @getcount(i64 %s) { + %tmp431 = mul i64 %s, 12884901888 + ret i64 %tmp431 +} + +; CHECK: sll $16,33,$0 +; CHECK-NEXT: sll $16,32,$1 +; CHECK-NEXT: addq $0,$1,$0 + diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll new file mode 100644 index 0000000..743292a --- /dev/null +++ b/test/CodeGen/CellSPU/arg_ret.ll @@ -0,0 +1,33 @@ +; Test parameter passing and return values +;RUN: llc --march=cellspu %s -o - | FileCheck %s + +; this fits into registers r3-r74 +%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32, + i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32} +define ccc i32 @test_regs( %paramstruct %prm ) +{ +;CHECK: lr $3, $74 +;CHECK: bi $lr + %1 = extractvalue %paramstruct %prm, 71 + ret i32 %1 +} + +define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm ) +{ +;CHECK-NOT: a $3, $74, $75 + %1 = extractvalue %paramstruct %prm, 71 + %2 = add i32 %1, %stackprm + ret i32 %2 +} + +define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm ) +{ +;CHECK: lqd $75, 80($sp) +;CHECK: lr $3, $4 + ret %paramstruct %prm +} + diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll index 5483f46..63293e2 100644 --- a/test/CodeGen/CellSPU/bigstack.ll +++ b/test/CodeGen/CellSPU/bigstack.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep lqx %t1.s | count 4 -; RUN: grep il %t1.s | grep -v file | count 7 -; RUN: grep stqx %t1.s | count 2 +; RUN: grep lqx %t1.s | count 3 +; RUN: grep il %t1.s | grep -v file | count 5 +; RUN: grep stqx %t1.s | count 1 define i32 @bigstack() nounwind { entry: diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll index eb7cf2c..559b266 100644 --- a/test/CodeGen/CellSPU/call.ll +++ b/test/CodeGen/CellSPU/call.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s ; RUN: grep brsl %t1.s | count 1 -; RUN: grep brasl %t1.s | count 1 -; RUN: grep stqd %t1.s | count 80 +; RUN: grep brasl %t1.s | count 2 +; RUN: grep stqd %t1.s | count 82 ; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -29,3 +29,25 @@ define i32 @stub_2(...) { entry: ret i32 0 } + +; check that struct is passed in r3-> +; assert this by changing the second field in the struct +%0 = type { i32, i32, i32 } +declare %0 @callee() +define %0 @test_structret() +{ +;CHECK: stqd $lr, 16($sp) +;CHECK: stqd $sp, -48($sp) +;CHECK: ai $sp, $sp, -48 +;CHECK: brasl $lr, callee + %rv = call %0 @callee() +;CHECK: ai $4, $4, 1 +;CHECK: lqd $lr, 64($sp) +;CHECK: ai $sp, $sp, 48 +;CHECK: bi $lr + %oldval = extractvalue %0 %rv, 1 + %newval = add i32 %oldval,1 + %newrv = insertvalue %0 %rv, i32 %newval, 1 + ret %0 %newrv +} + diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index d94d77c..141361d 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -12,7 +12,7 @@ ; RUN: grep rotqby %t2.s | count 5 ; RUN: grep lqd %t2.s | count 13 ; RUN: grep ilhu %t2.s | count 2 -; RUN: grep ai %t2.s | count 8 +; RUN: grep ai %t2.s | count 9 ; RUN: grep dispatch_tab %t2.s | count 6 ; ModuleID = 'call_indirect.bc' diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll index 04accb9..f37d2ae 100644 --- a/test/CodeGen/CellSPU/shuffles.ll +++ b/test/CodeGen/CellSPU/shuffles.ll @@ -16,3 +16,26 @@ define <4 x float> @splat(float %param1) { ret <4 x float> %val } +define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) { + %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0 +;CHECK: lqa $6, +;CHECK: shufb $4, $4, $5, $6 + %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1 + +;CHECK: cdd $5, 0($3) +;CHECK: lqd $6, 0($3) +;CHECK: shufb $4, $4, $6, $5 +;CHECK: stqd $4, 0($3) +;CHECK: bi $lr + store <2 x float> %sl2_17, <2 x float>* %ptr + ret void +} + +define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) { +;CHECK: cwd $5, 4($sp) +;CHECK: shufb $3, $4, $3, $5 +;CHECK: bi $lr + %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1 + ret <4 x float> %rv +} + diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll new file mode 100644 index 0000000..b81c0cd --- /dev/null +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -0,0 +1,75 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x float> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: fa {{\$.}}, $3, $3 + %1 = fadd %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: fs {{\$.}}, $3, $3 + %1 = fsub %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: fm {{\$.}}, $3, $3 + %1 = fmul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_splat(float %param ) { +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x float> undef, float %param, i32 0 + %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_store(%vec %val, %vec* %ptr){ + +;CHECK: stqd + store %vec undef, %vec* null + +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} + +define %vec @test_insert(){ +;CHECK: cwd +;CHECK: shufb $3 + %rv = insertelement %vec undef, float 0.0e+00, i32 undef +;CHECK: bi $lr + ret %vec %rv +} + +define void @test_unaligned_store() { +;CHECK: cdd $3, 8($3) +;CHECK: lqd +;CHECK: shufb +;CHECK: stqd + %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] + %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1] + %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] + store <2 x float> undef, <2 x float>* %vptr + ret void +} + diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll new file mode 100644 index 0000000..dd51be5 --- /dev/null +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -0,0 +1,64 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x i32> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: a {{\$.}}, $3, $3 + %1 = add %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: sf {{\$.}}, $4, $3 + %1 = sub %vec %param, <i32 1, i32 1> + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: mpyu +;CHECK: mpyh +;CHECK: a {{\$., \$., \$.}} +;CHECK: a {{\$., \$., \$.}} + %1 = mul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define <2 x i32> @test_splat(i32 %param ) { +;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the +; somewhat redundant: +;CHECK-NOT or $3, $3, $3 +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x i32> undef, i32 %param, i32 0 + %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret <2 x i32> %rv +} + +define i32 @test_extract() { +;CHECK: shufb $3 + %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1] +;CHECK: bi $lr + ret i32 %rv +} + +define void @test_store( %vec %val, %vec* %ptr) +{ +;CHECK: stqd $3, 0(${{.}}) +;CHECK: bi $lr + store %vec %val, %vec* %ptr + ret void +} diff --git a/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll new file mode 100644 index 0000000..a2945aa --- /dev/null +++ b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s + +define float @test1() +{ + ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1); +} diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll index 8e7b70e..9d8e391 100644 --- a/test/CodeGen/Mips/2008-06-05-Carry.ll +++ b/test/CodeGen/Mips/2008-06-05-Carry.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i64 @add64(i64 %u, i64 %v) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll index b2aaa00..b1d20d9 100644 --- a/test/CodeGen/Mips/2008-07-03-SRet.ll +++ b/test/CodeGen/Mips/2008-07-03-SRet.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.sret0 = type { i32, i32, i32 } define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind { diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll index 6bb6bd8..a1f0504 100644 --- a/test/CodeGen/Mips/2008-07-05-ByVal.ll +++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.byval0 = type { i32, i32 } define i64 @test0(%struct.byval0* byval %b, i64 %sum) nounwind { diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll index 808ce16..ecd8521 100644 --- a/test/CodeGen/Mips/2008-07-06-fadd64.ll +++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __adddf3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(double %a, double %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll index 7ac0f5f..681788e 100644 --- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll +++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __extendsfdf2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @dofloat(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll index ca99636..d804c7d 100644 --- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll +++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep trunc.w.s | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @fptoint(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll index 20de18a..b8b4c5c 100644 --- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll +++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll @@ -5,7 +5,7 @@ ; RUN: grep __fixunsdfsi %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @int2fp(i32 %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll index f6b2045..bda4a31 100644 --- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll +++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll @@ -6,7 +6,7 @@ ; RUN: not grep {gp_rel} %t target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" @.str = internal constant [10 x i8] c"AAAAAAAAA\00" @i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll index 26eb4db..91efd68 100644 --- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll +++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll @@ -10,7 +10,7 @@ ; RUN: grep {\%lo} %t1 | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.anon = type { i32, i32 } @s0 = global [8 x i8] c"AAAAAAA\00", align 4 diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll index 59599b3..41ae5dd 100644 --- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll +++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll @@ -3,7 +3,7 @@ ; RUN: grep seb %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll index 21ff960..20bd888 100644 --- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll +++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll @@ -2,7 +2,7 @@ ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @F(float %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index 80101fa..ca837ff 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -3,7 +3,7 @@ ; RUN: grep {bc1\[tf\]} %t | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index 042cad6..52a4b08 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %a, float %b, i32 %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll index 77680bc..47382f9 100644 --- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll +++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll @@ -3,7 +3,7 @@ ; RUN: grep neg.s %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(float %i, float %j) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll index cd35cca..23ed64a 100644 --- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll +++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll @@ -4,7 +4,7 @@ ; RUN: grep multu %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" %struct.DWstruct = type { i32, i32 } define i32 @A0(i32 %u, i32 %v) nounwind { diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll index 2f33e9b..0fc45f7 100644 --- a/test/CodeGen/Mips/2008-08-03-fabs64.ll +++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll @@ -3,7 +3,7 @@ ; RUN: grep {ori.*65535} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define double @A(double %c, double %d) nounwind readnone { entry: diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll index ca90b50..f8eb028 100644 --- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll +++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll @@ -3,7 +3,7 @@ ; RUN: grep mfc1 %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll index 79e49a3..7be7974 100644 --- a/test/CodeGen/Mips/2008-08-06-Alloca.ll +++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @twoalloca(i32 %size) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll index 54d454c..63c2595 100644 --- a/test/CodeGen/Mips/2008-08-07-CC.ll +++ b/test/CodeGen/Mips/2008-08-07-CC.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define internal fastcc i32 @A(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll index f3bb965..67f86d7 100644 --- a/test/CodeGen/Mips/2008-08-07-FPRound.ll +++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define float @round2float(double %a) nounwind { entry: diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll index 1da1db2..fb33323 100644 --- a/test/CodeGen/Mips/2008-08-08-ctlz.ll +++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=mips | grep clz | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "mipsallegrexel-psp-elf" +target triple = "mipsallegrexel-unknown-psp-elf" define i32 @A0(i32 %u) nounwind { entry: diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll new file mode 100644 index 0000000..8b7f9a9 --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Select.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s +; Fix PR7473 + +define i32 @main() nounwind readnone { +entry: + %a = alloca i32, align 4 ; <i32*> [#uses=2] + %c = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 1, i32* %a, align 4 + volatile store i32 0, i32* %c, align 4 + %0 = volatile load i32* %a, align 4 ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] +; CHECK: addiu $4, $zero, 3 + %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; <i32> [#uses=1] + %2 = volatile load i32* %c, align 4 ; <i32> [#uses=1] + %3 = icmp eq i32 %2, 0 ; <i1> [#uses=1] +; CHECK: addu $4, $zero, $3 +; CHECK: addu $2, $5, $4 + %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; <i32> [#uses=1] + %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; <i32> [#uses=1] + ret i32 %4 +} diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll new file mode 100644 index 0000000..07fc10c --- /dev/null +++ b/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s + +define i32 @main() nounwind readnone { +entry: + %x = alloca i32, align 4 ; <i32*> [#uses=2] + volatile store i32 2, i32* %x, align 4 + %0 = volatile load i32* %x, align 4 ; <i32> [#uses=1] +; CHECK: lui $3, %hi($JTI0_0) +; CHECK: sll $2, $2, 2 +; CHECK: addiu $3, $3, %lo($JTI0_0) + switch i32 %0, label %bb4 [ + i32 0, label %bb5 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + ] + +bb1: ; preds = %entry + ret i32 2 + +; CHECK: $BB0_2 +bb2: ; preds = %entry + ret i32 0 + +bb3: ; preds = %entry + ret i32 3 + +bb4: ; preds = %entry + ret i32 4 + +bb5: ; preds = %entry + ret i32 1 +} diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll deleted file mode 100644 index db2ab87..0000000 --- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=ppc32 | grep nop -target triple = "powerpc-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll new file mode 100644 index 0000000..3a2907d --- /dev/null +++ b/test/CodeGen/PowerPC/empty-functions.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: nop diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll index 32c6f48..399f19f 100644 --- a/test/CodeGen/PowerPC/vec_constants.ll +++ b/test/CodeGen/PowerPC/vec_constants.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI -define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { +define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { %tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1] %tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] store <4 x i32> %tmp4, <4 x i32>* %P1 @@ -15,26 +15,30 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { ret void } -define <4 x i32> @test_30() { +define <4 x i32> @test_30() nounwind { ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > } -define <4 x i32> @test_29() { +define <4 x i32> @test_29() nounwind { ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > } -define <8 x i16> @test_n30() { +define <8 x i16> @test_n30() nounwind { ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > } -define <16 x i8> @test_n104() { +define <16 x i8> @test_n104() nounwind { ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > } -define <4 x i32> @test_vsldoi() { +define <4 x i32> @test_vsldoi() nounwind { ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > } -define <4 x i32> @test_rol() { +define <8 x i16> @test_vsldoi_65023() nounwind { + ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 > +} + +define <4 x i32> @test_rol() nounwind { ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > } diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll index cf12063..eabeb0a 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {st %} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll index 1e6232a..53bb641 100644 --- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll +++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s | grep {sth.%} | count 2 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind { entry: diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll index e0bc302..ac6067a 100644 --- a/test/CodeGen/SystemZ/07-BrUnCond.ll +++ b/test/CodeGen/SystemZ/07-BrUnCond.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo() noreturn nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll index 27189ab..30810ce 100644 --- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll +++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @foo(i64 %N) nounwind { entry: diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll index 6e0c1ab..50a26e2 100644 --- a/test/CodeGen/SystemZ/09-Globals.ll +++ b/test/CodeGen/SystemZ/09-Globals.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | grep larl | count 3 target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @bar = common global i64 0, align 8 ; <i64*> [#uses=3] define i64 @foo() nounwind readonly { diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll index cc32538..f291e5f 100644 --- a/test/CodeGen/SystemZ/10-FuncsPic.ll +++ b/test/CodeGen/SystemZ/10-FuncsPic.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -relocation-model=pic | grep PLT | count 1 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @ptr = external global void (...)* ; <void (...)**> [#uses=2] define void @foo1() nounwind { diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll index a77671e..c581ad9 100644 --- a/test/CodeGen/SystemZ/10-GlobalsPic.ll +++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" @src = external global i32 ; <i32*> [#uses=2] @dst = external global i32 ; <i32*> [#uses=2] @ptr = external global i32* ; <i32**> [#uses=2] diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll index 609d9dc..b170a80 100644 --- a/test/CodeGen/SystemZ/11-BSwap.ll +++ b/test/CodeGen/SystemZ/11-BSwap.ll @@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i16 @foo(i16 zeroext %a) zeroext { diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll index 07a164d..54424e1 100644 --- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll +++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=systemz | grep rll target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll index 99d0ee7..89b2225 100644 --- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll +++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll index a35167f..68ccb84 100644 --- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll +++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind { entry: diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index b37f7e9..98feb83 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll index 5457b12..f4e176e 100644 --- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll +++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define float @foo(i32 signext %a) { entry: diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll index a91e29e..63fd855 100644 --- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll +++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" -target triple = "s390x-linux" +target triple = "s390x-ibm-linux" define signext i32 @dfg_parse() nounwind { entry: diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll index 2074bfd..929c472 100644 --- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll +++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s | not grep r11 -target triple = "thumb-linux-gnueabi" +target triple = "thumb-unknown-linux-gnueabi" %struct.__sched_param = type { i32 } %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 } @i.1882 = internal global i32 1 ; <i32*> [#uses=2] diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll new file mode 100644 index 0000000..9a6321b --- /dev/null +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -0,0 +1,147 @@ +; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s +; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s +; Stripping out debug info formerly caused the last two multiplies to be emitted in +; the other order. 7797940 (part of it dated 6/29/2010..7/15/2010). + +%0 = type { [3 x double] } + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { +; CHECK: blx ___muldf3 +; CHECK: blx ___muldf3 +; CHECK: beq LBB0_8 +; CHECK: blx ___muldf3 +; <label>:3 + switch i32 %1, label %4 [ + i32 0, label %5 + i32 3, label %5 + ] + +; <label>:4 ; preds = %3 + br label %5, !dbg !0 + +; <label>:5 ; preds = %4, %3, %3 + %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1] + %v_6 = icmp slt i32 %1, 2 ; <i1> [#uses=1] + %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0 + %v_7 = icmp eq i32 %2, 1, !dbg !92 ; <i1> [#uses=1] + %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3] + %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1] + %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1] + %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1] + %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1] + %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93 ; <double> [#uses=3] + %v_17 = fmul double %storemerge, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_17, double* %v_8, align 4, !dbg !97 + %v_19 = fmul double %storemerge2, %v_16, !dbg !97 ; <double> [#uses=1] + store double %v_19, double* %v_10, align 4, !dbg !97 + ret void, !dbg !98 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +declare double @sqrt(double) nounwind readonly + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!0 = metadata !{i32 46, i32 0, metadata !1, null} +!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{metadata !8, metadata !22, metadata !22} +!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] +!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90} +!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] +!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ] +!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!14 = metadata !{metadata !15} +!15 = metadata !{i32 524321, i64 0, i64 2} ; [ DW_TAG_subrange_type ] +!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ] +!18 = metadata !{null, metadata !19, metadata !20} +!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] +!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ] +!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ] +!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] +!25 = metadata !{null, metadata !19} +!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ] +!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13} +!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ] +!32 = metadata !{metadata !13, metadata !33} +!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] +!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ] +!39 = metadata !{metadata !40, metadata !19} +!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ] +!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ] +!45 = metadata !{null, metadata !19, metadata !13} +!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ] +!50 = metadata !{null, metadata !19, metadata !51} +!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ] +!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ] +!56 = metadata !{metadata !51, metadata !33} +!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ] +!59 = metadata !{metadata !8, metadata !33} +!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ] +!62 = metadata !{metadata !13, metadata !33, metadata !22} +!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ] +!65 = metadata !{metadata !40, metadata !19, metadata !22} +!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ] +!68 = metadata !{metadata !69, metadata !19, metadata !51} +!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ] +!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ] +!73 = metadata !{metadata !69, metadata !19, metadata !13} +!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ] +!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13} +!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ] +!87 = metadata !{metadata !22, metadata !33} +!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ] +!92 = metadata !{i32 48, i32 0, metadata !1, null} +!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96} +!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!96 = metadata !{i32 51, i32 0, metadata !1, null} +!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96} +!98 = metadata !{i32 52, i32 0, metadata !1, null} diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll new file mode 100644 index 0000000..c611b86 --- /dev/null +++ b/test/CodeGen/Thumb/barrier.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -march=thumb -mattr=+v6m | FileCheck %s -check-prefix=V6M + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; V6: t1: +; V6: blx {{_*}}sync_synchronize + +; V6M: t1: +; V6M: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; V6: t2: +; V6: blx {{_*}}sync_synchronize + +; V6M: t2: +; V6M: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index acfdc91..5c8ad97 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=thumb | not grep {ldr sp} ; RUN: llc < %s -mtriple=thumb-apple-darwin | \ ; RUN: not grep {sub.*r7} -; RUN: llc < %s -march=thumb | grep 4294967280 +; RUN: llc < %s -march=thumb | grep {mov.*r6, sp} %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 02de36a..b289484 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -1,20 +1,35 @@ -; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5 +; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s define void @test1() { +; CHECK: test1: +; CHECK: sub sp, #256 +; CHECK: add sp, #256 %tmp = alloca [ 64 x i32 ] , align 4 ret void } define void @test2() { +; CHECK: test2: +; CHECK: ldr r0, LCPI +; CHECK: add sp, r0 +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } define i32 @test3() { - %retval = alloca i32, align 4 - %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 - store i32 0, i32* %tmp - %tmp1 = load i32* %tmp - ret i32 %tmp1 +; CHECK: test3: +; CHECK: ldr r2, LCPI +; CHECK: add sp, r2 +; CHECK: ldr r1, LCPI +; CHECK: add r1, sp +; CHECK: mov sp, r7 +; CHECK: sub sp, #4 + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32* %tmp + ret i32 %tmp1 } diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll index 16a9c44..c2ba208 100644 --- a/test/CodeGen/Thumb/vargs.ll +++ b/test/CodeGen/Thumb/vargs.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=thumb -; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1 +; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2 ; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2 @str = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index 98a5263..45d356c 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -11,8 +11,8 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) { ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq -; CHECK-NEXT: subeq.w r0, r6, r8 -; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc} +; CHECK-NEXT: subeq r0, r6, r7 +; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3] diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll index 3f1b9eb..2246de3 100644 --- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll +++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll @@ -7,17 +7,12 @@ define void @t() nounwind ssp { entry: ; CHECK: t: -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 -; Yes, this is stupid codegen, but it's correct. -; CHECK: mov r0, sp -; CHECK: bfc r0, #0, #3 -; CHECK: subs r0, #16 -; CHECK: mov sp, r0 %size = mul i32 8, 2 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_a = alloca i8, i32 %size, align 8 +; CHECK: subs r0, #16 +; CHECK: mov sp, r0 %vla_b = alloca i8, i32 %size, align 8 unreachable } diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll new file mode 100644 index 0000000..abcf13a --- /dev/null +++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s + +@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1] + +define internal fastcc i32 @Callee(i32 %i) nounwind { +entry: +; CHECK: Callee: + %0 = icmp eq i32 %i, 0 ; <i1> [#uses=1] + br i1 %0, label %bb2, label %bb + +bb: ; preds = %entry + %1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1] + %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2] + %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0] + %3 = load i8* %.sub, align 4 ; <i8> [#uses=1] + %4 = sext i8 %3 to i32 ; <i32> [#uses=1] + ret i32 %4 + +bb2: ; preds = %entry +; Must restore sp from fp here +; CHECK: mov sp, r7 +; CHECK: sub sp, #8 +; CHECK: pop + ret i32 0 +} + +declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind + +define i32 @main() nounwind { +; CHECK: main: +bb.nph: + br label %bb + +bb: ; preds = %bb, %bb.nph + %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2] + %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1] + %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1] + %2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2] + %3 = add nsw i32 %0, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb +; No need to restore sp from fp here. +; CHECK: printf +; CHECK-NOT: mov sp, r7 +; CHECK-NOT: sub sp, #12 +; CHECK: pop + %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll new file mode 100644 index 0000000..22473bb --- /dev/null +++ b/test/CodeGen/Thumb2/bfi.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: movs r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} + +define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f2 +; CHECK: lsrs r1, r1, #7 +; CHECK: bfi r0, r1, #7, #16 + %and = and i32 %A, -8388481 ; <i32> [#uses=1] + %and2 = and i32 %B, 8388480 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} + +define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize { +entry: +; CHECK: f3 +; CHECK: lsrs r2, r0, #7 +; CHECK: mov r0, r1 +; CHECK: bfi r0, r2, #7, #16 + %and = and i32 %A, 8388480 ; <i32> [#uses=1] + %and2 = and i32 %B, -8388481 ; <i32> [#uses=1] + %or = or i32 %and2, %and ; <i32> [#uses=1] + ret i32 %or +} diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll new file mode 100644 index 0000000..f7ec5a3 --- /dev/null +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 + + +define float @foo(float %a, float %b) { +entry: +; CHECK: foo +; CORTEXM3: blx ___mulsf3 +; CORTEXM4: vmul.f32 s0, s1, s0 +; CORTEXA8: vmul.f32 d0, d1, d0 + %0 = fmul float %a, %b + ret float %0 +} + +define double @bar(double %a, double %b) { +entry: +; CHECK: bar + %0 = fmul double %a, %b +; CORTEXM3: blx ___muldf3 +; CORTEXM4: blx ___muldf3 +; CORTEXA8: vmul.f64 d0, d1, d0 + ret double %0 +} diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll index 87af9d1..d8b51ec 100644 --- a/test/CodeGen/Thumb2/crash.ll +++ b/test/CodeGen/Thumb2/crash.ll @@ -14,11 +14,11 @@ entry: %6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %7 = load <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1] %8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7) + tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1) ret void } -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind @sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5] @dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2] @@ -44,6 +44,6 @@ bb2: ; preds = %bb %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1] - tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind + tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind ret i32 0 } diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll index 0cddd48..e63a115 100644 --- a/test/CodeGen/Thumb2/div.ll +++ b/test/CodeGen/Thumb2/div.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMB -; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \ +; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M define i32 @f1(i32 %a, i32 %b) { diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 29b8e75..650d788 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,7 +22,7 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #28] +; CHECK: ldr.w {{(r[0-9])|(lr)}}, [r7, #28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br label %bb20 @@ -46,9 +46,9 @@ bb119: ; preds = %bb20, %bb20 bb420: ; preds = %bb20, %bb20 ; CHECK: bb420 -; CHECK: str r{{[0-7]}}, [sp] -; CHECK: str r{{[0-7]}}, [sp, #4] -; CHECK: str r{{[0-7]}}, [sp, #8] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8] ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7fa782f..ad957a1 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -21,8 +21,8 @@ entry: bb: ; preds = %bb, %entry ; CHECK: LBB0_1: ; CHECK: cmp r2, #0 -; CHECK: sub.w r9, r2, #1 -; CHECK: mov r2, r9 +; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1 +; CHECK: mov r2, [[REGISTER]] %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll new file mode 100644 index 0000000..fde2ee0 --- /dev/null +++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s +; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375. +; Eventually this should become the default and be moved into machine-licm.ll. +; FIXME: the vdup should be hoisted out of the loop, 8248029. + +define void @t2(i8* %ptr1, i8* %ptr2) nounwind { +entry: +; CHECK: t2: +; CHECK: mov.w r3, #1065353216 + br i1 undef, label %bb1, label %bb2 + +bb1: +; CHECK-NEXT: %bb1 +; CHECK: vdup.32 q1, r3 + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tmp1 = shl i32 %indvar, 2 + %gep1 = getelementptr i8* %ptr1, i32 %tmp1 + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) + %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) + %gep2 = getelementptr i8* %ptr2, i32 %tmp1 + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) + %indvar.next = add i32 %indvar, 1 + %cond = icmp eq i32 %indvar.next, 10 + br i1 %cond, label %bb2, label %bb1 + +bb2: + ret void +} + +; CHECK-NOT: LCPI1_0: +; CHECK: .subsections_via_symbols + +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind + +declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index cdb3041..b949b2f 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -64,10 +64,10 @@ bb1: %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tmp1 = shl i32 %indvar, 2 %gep1 = getelementptr i8* %ptr1, i32 %tmp1 - %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1) + %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) %gep2 = getelementptr i8* %ptr2, i32 %tmp1 - call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3) + call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) %indvar.next = add i32 %indvar, 1 %cond = icmp eq i32 %indvar.next, 10 br i1 %cond, label %bb2, label %bb1 @@ -79,8 +79,8 @@ bb2: ; CHECK: LCPI1_0: ; CHECK: .section -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll index 76c56d0..7b0432d 100644 --- a/test/CodeGen/Thumb2/thumb2-and2.ll +++ b/test/CodeGen/Thumb2/thumb2-and2.ll @@ -30,7 +30,7 @@ define i32 @f4(i32 %a) { ret i32 %tmp } ; CHECK: f4: -; CHECK: and r0, r0, #1448498774 +; CHECK: bic r0, r0, #-1448498775 ; 66846720 = 0x03fc0000 define i32 @f5(i32 %a) { diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll new file mode 100644 index 0000000..4df06b8 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +define void @b(i32 %x) nounwind optsize { +entry: +; CHECK: b +; CHECK: mov r2, sp +; CHECK: mls r0, r0, r1, r2 +; CHECK: mov sp, r0 + %0 = mul i32 %x, 24 ; <i32> [#uses=1] + %vla = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1] + call arm_aapcscc void @a(i8* %vla) nounwind optsize + ret void +} + +declare void @a(i8*) optsize diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll new file mode 100644 index 0000000..a54d09e --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-barrier.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; CHECK: t1: +; CHECK: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; CHECK: t2: +; CHECK: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll index 24502b0..2e4da1b 100644 --- a/test/CodeGen/Thumb2/thumb2-call-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN ; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX +; XFAIL: * @t = weak global i32 ()* null ; <i32 ()**> [#uses=1] diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll index d4773bb..63249f4 100644 --- a/test/CodeGen/Thumb2/thumb2-cmp.ll +++ b/test/CodeGen/Thumb2/thumb2-cmp.ll @@ -39,3 +39,17 @@ define i1 @f5(i32 %a) { %tmp = icmp eq i32 %a, 1114112 ret i1 %tmp } + +; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform. +; +; CHECK: f6: +; CHECK-NOT: cmp.w r0, #-2147483648 +; CHECK: bx lr +define i32 @f6(i32 %a) { + %tmp = icmp sgt i32 %a, 2147483647 + br i1 %tmp, label %true, label %false +true: + ret i32 2 +false: + ret i32 0 +} diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll index c024415..5315535 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s +; XFAIL: * define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t1: diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll index c8302df..2e8bb1d 100644 --- a/test/CodeGen/Thumb2/thumb2-pack.ll +++ b/test/CodeGen/Thumb2/thumb2-pack.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhbt | count 5 -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \ -; RUN: grep pkhtb | count 4 +; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; CHECK: test1 +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test1a +; CHECK: pkhbt r0, r0, r1, lsl #16 define i32 @test1a(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1] @@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test2 +; CHECK: pkhbt r0, r0, r1, lsl #12 define i32 @test2(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1] @@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) { ret i32 %tmp57 } +; CHECK: test3 +; CHECK: pkhbt r0, r0, r1, lsl #18 define i32 @test3(i32 %X, i32 %Y) { %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1] @@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test4 +; CHECK: pkhbt r0, r0, r1 define i32 @test4(i32 %X, i32 %Y) { %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1] %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1] @@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) { ret i32 %tmp46 } +; CHECK: test5 +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5(i32 %X, i32 %Y) { %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1] @@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test5a +; CHECK: pkhtb r0, r0, r1, asr #16 define i32 @test5a(i32 %X, i32 %Y) { %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1] @@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) { ret i32 %tmp5 } +; CHECK: test6 +; CHECK: pkhtb r0, r0, r1, asr #12 define i32 @test6(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1] @@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) { ret i32 %tmp59 } +; CHECK: test7 +; CHECK: pkhtb r0, r0, r1, asr #18 define i32 @test7(i32 %X, i32 %Y) { %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1] %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1] @@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) { %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1] ret i32 %tmp57 } + +; CHECK: test8 +; CHECK: pkhtb r0, r0, r1, asr #22 +define i32 @test8(i32 %X, i32 %Y) { + %tmp1 = and i32 %X, -65536 + %tmp3 = lshr i32 %Y, 22 + %tmp57 = or i32 %tmp3, %tmp1 + ret i32 %tmp57 +} diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 3946371..4f92c93 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -7,7 +7,7 @@ %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: @@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}[{{.*}}, :128] ; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 - %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 - %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll index 1fa4e5c..2074f98 100644 --- a/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -6,7 +6,7 @@ define i32 @test1(i32 %x) { ; ARMv7A: uxtb16 r0, r0 ; ARMv7M: test1 -; ARMv7M: and r0, r0, #16711935 +; ARMv7M: bic r0, r0, #-16711936 %tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1] ret i32 %tmp1 } diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll index 2d7bd27..35b0159 100644 --- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll +++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 | grep setnp -; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: not grep setnp define i32 @test(float %f) { diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll deleted file mode 100644 index 2680b15..0000000 --- a/test/CodeGen/X86/2007-06-14-branchfold.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp -; check that branch folding understands FP_REG_KILL is not a branch - -target triple = "i686-pc-linux-gnu" - %struct.FRAME.c34003a = type { float, float } -@report_E = global i8 0 ; <i8*> [#uses=0] - -define void @main() { -entry: - %FRAME.31 = alloca %struct.FRAME.c34003a, align 8 ; <%struct.FRAME.c34003a*> [#uses=4] - %tmp20 = call i32 @report__ident_int( i32 -50 ) ; <i32> [#uses=1] - %tmp2021 = sitofp i32 %tmp20 to float ; <float> [#uses=5] - %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond = or i1 %tmp23, %tmp26 ; <i1> [#uses=1] - br i1 %bothcond, label %bb, label %bb30 - -bb: ; preds = %entry - unwind - -bb30: ; preds = %entry - %tmp35 = call i32 @report__ident_int( i32 50 ) ; <i32> [#uses=1] - %tmp3536 = sitofp i32 %tmp35 to float ; <float> [#uses=4] - %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond226 = or i1 %tmp38, %tmp44 ; <i1> [#uses=1] - br i1 %bothcond226, label %bb47, label %bb49 - -bb47: ; preds = %bb30 - unwind - -bb49: ; preds = %bb30 - %tmp60 = fcmp ult float %tmp3536, %tmp2021 ; <i1> [#uses=1] - %tmp60.not = xor i1 %tmp60, true ; <i1> [#uses=1] - %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond227 = and i1 %tmp65, %tmp60.not ; <i1> [#uses=1] - br i1 %bothcond227, label %cond_true68, label %cond_next70 - -cond_true68: ; preds = %bb49 - unwind - -cond_next70: ; preds = %bb49 - %tmp71 = call i32 @report__ident_int( i32 -30 ) ; <i32> [#uses=1] - %tmp7172 = sitofp i32 %tmp71 to float ; <float> [#uses=3] - %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond228 = or i1 %tmp74, %tmp80 ; <i1> [#uses=1] - br i1 %bothcond228, label %bb83, label %bb85 - -bb83: ; preds = %cond_next70 - unwind - -bb85: ; preds = %cond_next70 - %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1 ; <float*> [#uses=3] - store float %tmp7172, float* %tmp90 - %tmp92 = call i32 @report__ident_int( i32 30 ) ; <i32> [#uses=1] - %tmp9293 = sitofp i32 %tmp92 to float ; <float> [#uses=7] - %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000 ; <i1> [#uses=1] - %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000 ; <i1> [#uses=1] - %bothcond229 = or i1 %tmp95, %tmp101 ; <i1> [#uses=1] - br i1 %bothcond229, label %bb104, label %bb106 - -bb104: ; preds = %bb85 - unwind - -bb106: ; preds = %bb85 - %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0 ; <float*> [#uses=2] - store float %tmp9293, float* %tmp111 - %tmp123 = load float* %tmp90 ; <float> [#uses=4] - %tmp125 = fcmp ult float %tmp9293, %tmp123 ; <i1> [#uses=1] - br i1 %tmp125, label %cond_next147, label %cond_true128 - -cond_true128: ; preds = %bb106 - %tmp133 = fcmp olt float %tmp123, %tmp2021 ; <i1> [#uses=1] - %tmp142 = fcmp ogt float %tmp9293, %tmp3536 ; <i1> [#uses=1] - %bothcond230 = or i1 %tmp133, %tmp142 ; <i1> [#uses=1] - br i1 %bothcond230, label %bb145, label %cond_next147 - -bb145: ; preds = %cond_true128 - unwind - -cond_next147: ; preds = %cond_true128, %bb106 - %tmp157 = fcmp ugt float %tmp123, -3.000000e+01 ; <i1> [#uses=1] - %tmp165 = fcmp ult float %tmp9293, -3.000000e+01 ; <i1> [#uses=1] - %bothcond231 = or i1 %tmp157, %tmp165 ; <i1> [#uses=1] - br i1 %bothcond231, label %bb168, label %bb169 - -bb168: ; preds = %cond_next147 - unwind - -bb169: ; preds = %cond_next147 - %tmp176 = fcmp ugt float %tmp123, 3.000000e+01 ; <i1> [#uses=1] - %tmp184 = fcmp ult float %tmp9293, 3.000000e+01 ; <i1> [#uses=1] - %bothcond232 = or i1 %tmp176, %tmp184 ; <i1> [#uses=1] - br i1 %bothcond232, label %bb187, label %bb188 - -bb187: ; preds = %bb169 - unwind - -bb188: ; preds = %bb169 - %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 ) ; <float> [#uses=2] - %tmp194 = load float* %tmp90 ; <float> [#uses=1] - %tmp196 = fcmp ugt float %tmp194, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp196, label %bb207, label %cond_next200 - -cond_next200: ; preds = %bb188 - %tmp202 = load float* %tmp111 ; <float> [#uses=1] - %tmp204 = fcmp ult float %tmp202, 0.000000e+00 ; <i1> [#uses=1] - br i1 %tmp204, label %bb207, label %bb208 - -bb207: ; preds = %cond_next200, %bb188 - unwind - -bb208: ; preds = %cond_next200 - %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 ) ; <float> [#uses=1] - %tmp214 = fcmp oge float %tmp212, %tmp192 ; <i1> [#uses=1] - %tmp217 = fcmp oge float %tmp192, 1.000000e+02 ; <i1> [#uses=1] - %tmp221 = or i1 %tmp214, %tmp217 ; <i1> [#uses=1] - br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock - -cond_true224: ; preds = %bb208 - call void @abort( ) noreturn - ret void - -UnifiedReturnBlock: ; preds = %bb208 - ret void -} - -declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x) - -declare i32 @report__ident_int(i32 %x) - -declare void @abort() noreturn diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll deleted file mode 100644 index b936686..0000000 --- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=x86 | grep nop -target triple = "i686-apple-darwin8" - - -define void @bork() noreturn nounwind { -entry: - unreachable -} diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll index 99cb856..99cb856 100644 --- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll +++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll diff --git a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll index 36cc535..36cc535 100644 --- a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll +++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 4a97ac3..bb01e5a 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm} ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s ; rdar://6627786 ; rdar://7792037 diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll index 8d42627..2853930 100644 --- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll +++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s ; Check the register copy comes after the call to f and before the call to g ; PR3784 diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll index da493d4..b13d33e 100644 --- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll +++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s ; Check that register copies in the landing pad come after the EH_LABEL declare i32 @f() diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index b5873ba..90dabb8 100644 --- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t +; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t ; RUN: not grep spill %t ; RUN: not grep {%rsp} %t ; RUN: not grep {%rbp} %t diff --git a/test/DebugInfo/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll index 001f853..85ee091 100644 --- a/test/DebugInfo/2010-01-18-DbgValue.ll +++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll @@ -1,7 +1,4 @@ -; RUN: llc -O0 < %s | FileCheck %s -; ModuleID = 'try.c' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.8" +; RUN: llc -march=x86 -O0 < %s | FileCheck %s ; Currently, dbg.declare generates a DEBUG_VALUE comment. Eventually it will ; generate DWARF and this test will need to be modified or removed. diff --git a/test/DebugInfo/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll index 70103e5..2113263 100644 --- a/test/DebugInfo/2010-02-01-DbgValueCrash.ll +++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll @@ -1,6 +1,5 @@ ; RUN: llc -O1 < %s ; ModuleID = 'pr6157.bc' -target triple = "x86_64-unknown-linux-gnu" ; formerly crashed in SelectionDAGBuilder %tart.reflect.ComplexType = type { double, double } diff --git a/test/DebugInfo/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll index 52e9484..d211549 100644 --- a/test/DebugInfo/2010-05-25-DotDebugLoc.ll +++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 < %s -mtriple=x86_64-apple-darwin | grep debug_loc12 +; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12 ; Test to check .debug_loc support. This test case emits 13 debug_loc entries. %0 = type { double } diff --git a/test/DebugInfo/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll index 80643d0..80643d0 100644 --- a/test/DebugInfo/2010-05-28-Crash.ll +++ b/test/CodeGen/X86/2010-05-28-Crash.ll diff --git a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index 812d372..812d372 100644 --- a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll +++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll new file mode 100644 index 0000000..be7d94c --- /dev/null +++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mcpu=i486 +; PR7375 +; +; This function contains a block (while.cond) with a lonely RFP use that is +; not a kill. We still need an FP_REG_KILL for that block since the register +; allocator will insert a reload. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 { +entry: + %tmp2 = load double* %t ; <double> [#uses=1] + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %entry + br i1 undef, label %if.end, label %bb.nph + +while.cond: ; preds = %bb.nph, %while.cond + store double %tmp2, double* undef + br i1 undef, label %if.end, label %while.cond + +bb.nph: ; preds = %if.then + br label %while.cond + +if.end: ; preds = %while.cond, %if.then, %entry + ret void +} diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll new file mode 100644 index 0000000..3ac4cf5 --- /dev/null +++ b/test/CodeGen/X86/2010-07-15-Crash.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null +; PR7653 + +@__FUNCTION__.1623 = external constant [4 x i8] ; <[4 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr +inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll new file mode 100644 index 0000000..96016cf --- /dev/null +++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define i32 @extend2bit_v2(i32 %val) { +entry: + %0 = trunc i32 %val to i2 ; <i2> [#uses=1] + %1 = sext i2 %0 to i32 ; <i32> [#uses=1] + %2 = icmp eq i32 %1, 3 ; <i1> [#uses=1] + %3 = zext i1 %2 to i32 ; <i32> [#uses=1] + ret i32 %3 +} + +; CHECK: extend2bit_v2: +; CHECK: xorl %eax, %eax +; CHECK-NEXT: ret diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll new file mode 100644 index 0000000..1919d2e --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; PR7814 + +@g_16 = global i64 -3738643449681751625, align 8 ; <i64*> [#uses=1] +@g_38 = global i32 0, align 4 ; <i32*> [#uses=2] +@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %tmp = load i64* @g_16 ; <i64> [#uses=1] + %not.lnot = icmp ne i64 %tmp, 0 ; <i1> [#uses=1] + %conv = sext i1 %not.lnot to i64 ; <i64> [#uses=1] + %and = and i64 %conv, 150 ; <i64> [#uses=1] + %conv.i = trunc i64 %and to i8 ; <i8> [#uses=1] + %cmp = icmp sgt i8 %conv.i, 0 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +; CHECK: andl $150 +; CHECK-NEXT: testb +; CHECK-NEXT: jg + +entry.if.end_crit_edge: ; preds = %entry + %tmp4.pre = load i32* @g_38 ; <i32> [#uses=1] + br label %if.end + +if.then: ; preds = %entry + store i32 1, i32* @g_38 + br label %if.end + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1] + %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll new file mode 100644 index 0000000..98a0887 --- /dev/null +++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=i386-pc-mingw32 + +define void @func() nounwind { +invoke.cont: + %call = tail call i8* @malloc() + %a = invoke i32 @bar() + to label %bb1 unwind label %lpad + +bb1: + ret void + +lpad: + %exn = tail call i8* @llvm.eh.exception() nounwind + %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind + %ehspec.fails = icmp slt i32 %eh.selector, 0 + br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup + +cleanup: + tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind + unreachable + +ehspec.unexpected: + tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind + unreachable +} + +declare noalias i8* @malloc() + +declare i8* @llvm.eh.exception() nounwind readonly + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind + +declare void @_Unwind_Resume_or_Rethrow(i8*) + +declare void @__cxa_call_unexpected(i8*) + +declare i32 @bar() diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll new file mode 100644 index 0000000..d98ef14 --- /dev/null +++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=x86 -O0 < %s | FileCheck %s +; CHECK: DW_TAG_constant +; CHECK-NEXT: ascii "ro" #{{#?}} DW_AT_name + +define void @foo() nounwind ssp { +entry: + call void @bar(i32 201), !dbg !8 + ret void, !dbg !8 +} + +declare void @bar(i32) + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!5} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{null} +!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ] +!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ] +!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 3, i32 14, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll new file mode 100644 index 0000000..e5542ba --- /dev/null +++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef. + +define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp { + %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3] + %tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1] +; CHECK: movl (%r{{.*}}), % + %x4 = load i32* %h, align 4 ; <i32> [#uses=1] + +; The imull clobbers a 32-bit register. +; CHECK: imull %{{...}}, %e[[CLOBBER:..]] + %x5 = mul nsw i32 %x4, %tmp1 ; <i32> [#uses=1] + +; So we cannot use the corresponding 64-bit register anymore. +; CHECK-NOT: shrq $32, %r[[CLOBBER]] + %btmp3 = lshr i64 %x1, 32 ; <i64> [#uses=1] + %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] + +; CHECK: idiv + %x6 = sdiv i32 %x5, %btmp4 ; <i32> [#uses=1] + store i32 %x6, i32* %w, align 4 + ret void +} + +declare i64 @g(i8*, i8*) diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp index f200589..629a147 100644 --- a/test/CodeGen/X86/GC/dg.exp +++ b/test/CodeGen/X86/GC/dg.exp @@ -1,3 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] +} diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll new file mode 100644 index 0000000..728e377 --- /dev/null +++ b/test/CodeGen/X86/MachineSink-PHIUse.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink} + +define fastcc void @t() nounwind ssp { +entry: + br i1 undef, label %bb, label %bb4 + +bb: ; preds = %entry + br i1 undef, label %return, label %bb3 + +bb3: ; preds = %bb + unreachable + +bb4: ; preds = %entry + br i1 undef, label %bb.nph, label %return + +bb.nph: ; preds = %bb4 + br label %bb5 + +bb5: ; preds = %bb9, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1] + %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2] + %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0] + br i1 undef, label %bb9, label %bb6 + +bb6: ; preds = %bb5 + br i1 undef, label %bb9, label %bb7 + +bb7: ; preds = %bb6 + br i1 undef, label %bb9, label %bb8 + +bb8: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb7, %bb6, %bb5 + br i1 undef, label %bb5, label %return + +return: ; preds = %bb9, %bb4, %bb + ret void +} diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll new file mode 100644 index 0000000..a72160b --- /dev/null +++ b/test/CodeGen/X86/avx-128.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@z = common global <4 x float> zeroinitializer, align 16 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vpxor + ; CHECK: vmovaps + store <4 x float> zeroinitializer, <4 x float>* @z, align 16 + ret void +} + diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll new file mode 100644 index 0000000..20d31e7 --- /dev/null +++ b/test/CodeGen/X86/avx-256.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +@x = common global <8 x float> zeroinitializer, align 32 +@y = common global <4 x double> zeroinitializer, align 32 + +define void @zero() nounwind ssp { +entry: + ; CHECK: vxorps + ; CHECK: vmovaps + ; CHECK: vmovaps + store <8 x float> zeroinitializer, <8 x float>* @x, align 32 + store <4 x double> zeroinitializer, <4 x double>* @y, align 32 + ret void +} + diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll new file mode 100644 index 0000000..9de9023 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -0,0 +1,2587 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s + +define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdec + %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesdeclast + %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenc + %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vaesenclast + %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { + ; CHECK: vaesimc + %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone + + +define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { + ; CHECK: vaeskeygenassist + %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsd + %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcmpordsd + %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vcomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { + ; CHECK: vcvtpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { + ; CHECK: vcvtpd2ps + %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { + ; CHECK: vcvtsd2ss + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { + ; CHECK: vcvtss2sd + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { + ; CHECK: vcvttpd2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdivsd + %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovdqu + %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovupd + %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly + + +define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { + ; CHECK: pushl + ; CHECK: movl + ; CHECK: vmaskmovdqu + ; CHECK: popl + call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) + ret void +} +declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind + + +define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxpd + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmaxsd + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminpd + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vminsd + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { + ; CHECK: movl + ; CHECK: vmovntdq + call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind + + +define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovntpd + call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vmulsd + %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackssdw + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpacksswb + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpackuswb + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddsb + %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddsw + %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpaddusb + %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpaddusw + %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpavgb + %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpavgw + %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpeqb + %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpeqd + %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpeqw + %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpgtb + %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpcmpgtd + %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpcmpgtw + %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddwd + %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxsw + %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxub + %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminsw + %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminub + %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { + ; CHECK: vpmovmskb + %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhw + %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhuw + %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuludq + %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsadbw + %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { + ; CHECK: vpslldq + %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { + ; CHECK: vpslld + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { + ; CHECK: vpsllq + %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { + ; CHECK: vpsllw + %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { + ; CHECK: vpsrad + %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { + ; CHECK: vpsraw + %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { + ; CHECK: vpsrldq + %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { + ; CHECK: vpsrld + %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { + ; CHECK: vpsrlq + %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { + ; CHECK: vpsrlw + %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubsb + %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubsw + %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsubusb + %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsubusw + %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { + ; CHECK: vsqrtsd + %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone + + +define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { + ; CHECK: movl + ; CHECK: vmovq + call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) + ret void +} +declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind + + +define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { + ; CHECK: movl + ; CHECK: vmovdqu + call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind + + +define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: movl + ; CHECK: vmovupd + call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) + ret void +} +declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind + + +define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vsubsd + %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vucomisd + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddsubps + %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhaddpd + %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhaddps + %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vhsubpd + %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vhsubps + %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { + ; CHECK: movl + ; CHECK: vlddqu + %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly + + +define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vblendpd + %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vblendps + %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vblendvpd + %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vblendvps + %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vdppd + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdpps + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertps + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovntdqa + %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly + + +define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vmpsadbw + %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpackusdw + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { + ; CHECK: vpblendvb + %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpblendw + %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpeqq + %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { + ; CHECK: vphminposuw + %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpmaxsb + %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxsd + %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmaxud + %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaxuw + %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpminsb + %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminsd + %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpminud + %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpminuw + %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { + ; CHECK: vpmovsxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { + ; CHECK: vpmovsxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { + ; CHECK: vpmovsxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { + ; CHECK: vpmovsxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { + ; CHECK: vpmovsxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { + ; CHECK: vpmovsxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { + ; CHECK: vpmovzxbd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { + ; CHECK: vpmovzxbq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { + ; CHECK: vpmovzxbw + %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { + ; CHECK: vpmovzxdq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { + ; CHECK: vpmovzxwd + %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { + ; CHECK: vpmovzxwq + %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpmuldq + %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone + + +define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { + ; CHECK: vroundpd + %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { + ; CHECK: vroundps + %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vroundsd + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vroundss + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { + ; CHECK: movl + ; CHECK: movl + ; CHECK: vpcmpestrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone + + +define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpcmpgtq + %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistri + ; CHECK: movl + %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpcmpistrm + %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vaddss + %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordps + %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcmpordss + %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone + + +define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vcomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { + ; CHECK: movl + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone + + +define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vdivss + %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_ldmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vldmxcsr + call void @llvm.x86.sse.ldmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind + + +define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) { + ; CHECK: movl + ; CHECK: vmovups + %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly + + +define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxps + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmaxss + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminps + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vminss + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovntps + call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vmulss + %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { + ; CHECK: vrcpps + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { + ; CHECK: vrcpss + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { + ; CHECK: vrsqrtss + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { + ; CHECK: vsqrtps + %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { + ; CHECK: vsqrtss + %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_stmxcsr(i8* %a0) { + ; CHECK: movl + ; CHECK: vstmxcsr + call void @llvm.x86.sse.stmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.stmxcsr(i8*) nounwind + + +define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: movl + ; CHECK: vmovups + call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) + ret void +} +declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind + + +define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vsubss + %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setae + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setbe + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vucomiss + ; CHECK: setne + ; CHECK: movzbl + %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { + ; CHECK: vpabsb + %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { + ; CHECK: vpabsd + %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { + ; CHECK: vpabsw + %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddd + %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphaddsw + %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphaddw + %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vphsubd + %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubsw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vphsubw + %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmaddubsw + %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpmulhrsw + %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpshufb + %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { + ; CHECK: vpsignb + %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsignd + %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { + ; CHECK: vpsignw + %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vaddsubpd + %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vaddsubps + %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vblendpd + %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vblendps + %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vblendvpd + %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vblendvps + %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vcmpordpd + %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vcmpordps + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { + ; CHECK: vcvtpd2psy + %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvtpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { + ; CHECK: vcvtps2pd + %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvtps2dq + %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { + ; CHECK: vcvtdq2pd + %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { + ; CHECK: vcvtdq2ps + %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { + ; CHECK: vcvttpd2dqy + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { + ; CHECK: vcvttps2dq + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + +define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vdpps + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone + + +define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhaddpd + %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhaddps + %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vhsubpd + %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vhsubps + %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { + ; CHECK: vlddqu + %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly + + +define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { + ; CHECK: vmovdqu + %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { + ; CHECK: vmovupd + %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) { + ; CHECK: vmovups + %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly + + +define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmaskmovpd + %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly + + +define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly + + +define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmaskmovps + %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly + + +define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind + + +define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { + ; CHECK: vmaskmovpd + call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind + + +define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind + + +define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: vmaskmovps + call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vmaxpd + %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vmaxps + %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vminpd + %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vminps + %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { + ; CHECK: vmovmskpd + %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { + ; CHECK: vmovmskps + %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { + ; CHECK: vmovntdq + call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind + + +define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovntpd + call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovntps + call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind + + +define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vptest + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { + ; CHECK: vrcpps + %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { + ; CHECK: vroundpd + %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { + ; CHECK: vroundps + %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { + ; CHECK: vrsqrtps + %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { + ; CHECK: vsqrtpd + %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { + ; CHECK: vsqrtps + %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone + + +define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { + ; CHECK: vmovdqu + call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind + + +define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { + ; CHECK: vmovupd + call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind + + +define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { + ; CHECK: vmovups + call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) + ret void +} +declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { + ; CHECK: vbroadcastsd + %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly + + +define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { + ; CHECK: vbroadcastf128 + %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly + + +define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly + + +define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { + ; CHECK: vbroadcastss + %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { + ; CHECK: vextractf128 + %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { + ; CHECK: vextractf128 + %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone + + +define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { + ; CHECK: vextractf128 + %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { + ; CHECK: vinsertf128 + %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vinsertf128 + %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vperm2f128 + %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vperm2f128 + %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone + + +define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { + ; CHECK: vpermilpd + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone + + +define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { + ; CHECK: vpermilps + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { + ; CHECK: vpermilps + %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: setb + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: seta + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { + ; CHECK: vtestpd + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vtestps + ; CHECK: sete + ; CHECK: movzbl + %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define void @test_x86_avx_vzeroall() { + ; CHECK: vzeroall + call void @llvm.x86.avx.vzeroall() + ret void +} +declare void @llvm.x86.avx.vzeroall() nounwind + + +define void @test_x86_avx_vzeroupper() { + ; CHECK: vzeroupper + call void @llvm.x86.avx.vzeroupper() + ret void +} +declare void @llvm.x86.avx.vzeroupper() nounwind + + diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll new file mode 100644 index 0000000..b186710 --- /dev/null +++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s + +define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { + ; CHECK: vcvtsd2si + %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { + ; CHECK: vcvtsi2sd + %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone + + +define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone + + +define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { + ; CHECK: vcvtss2si + %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { + ; CHECK: vcvtsi2ss + %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone + + +define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { + ; CHECK: vcvttss2si + %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] + ret i64 %res +} +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone + + diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll new file mode 100644 index 0000000..6190c36 --- /dev/null +++ b/test/CodeGen/X86/barrier-sse.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER + + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false) + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false) + ret void +} diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll new file mode 100644 index 0000000..fad6ef6 --- /dev/null +++ b/test/CodeGen/X86/barrier.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll index 255adfb..3857fb1 100644 --- a/test/CodeGen/X86/call-imm.ll +++ b/test/CodeGen/X86/call-imm.ll @@ -5,7 +5,7 @@ ; Call to immediate is not safe on x86-64 unless we *know* that the ; call will be within 32-bits pcrel from the dest immediate. -; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax} +; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax} ; PR3666 ; PR3773 diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll index 1f7f6ec..1f7f6ec 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll index cb63809..cb63809 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll index ae27383..ae27383 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll +++ b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll index 05388f9..2a44463 100644 --- a/test/CodeGen/X86/constant-pool-remat-0.ll +++ b/test/CodeGen/X86/constant-pool-remat-0.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 | grep LCPI | count 3 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3 -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 declare float @qux(float %y) diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll index f29cbf3..96fef0f 100644 --- a/test/CodeGen/X86/critical-edge-split.ll +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29 +; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29 %CC = type { %Register } %II = type { %"struct.XX::II::$_74" } diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll index bdbaac0..bf57e78 100644 --- a/test/CodeGen/X86/dllexport.ll +++ b/test/CodeGen/X86/dllexport.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s ; PR2936 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define dllexport x86_fastcallcc i32 @foo() nounwind { entry: diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll index 1df0920..e577611 100644 --- a/test/CodeGen/X86/dyn-stackalloc.ll +++ b/test/CodeGen/X86/dyn-stackalloc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7} -; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16} -; RUN: llc < %s -march=x86-64 | grep {\\-16} +; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7} +; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16} +; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16} define void @t() nounwind { A: diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll new file mode 100644 index 0000000..b303cd1 --- /dev/null +++ b/test/CodeGen/X86/empty-functions.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +define void @func() { +entry: + unreachable +} +; CHECK-NO-FP: _func: +; CHECK-NO-FP-NOT: movq %rsp, %rbp +; CHECK-NO-FP: nop + +; CHECK-FP: _func: +; CHECK-FP: movq %rsp, %rbp +; CHECK-FP-NEXT: Ltmp1: +; CHECK-FP: nop diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll index 23b45eb..9ded7e0 100644 --- a/test/CodeGen/X86/fabs.ll +++ b/test/CodeGen/X86/fabs.ll @@ -1,7 +1,7 @@ ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \ ; RUN: count 2 -; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \ +; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \ ; RUN: grep fabs\$ | count 3 declare float @fabsf(float) diff --git a/test/CodeGen/X86/fast-isel-atomic.ll b/test/CodeGen/X86/fast-isel-atomic.ll new file mode 100644 index 0000000..74c5868 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-atomic.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -march=x86-64 +; rdar://8204072 +; PR7652 + +@sc = external global i8 +@uc = external global i8 + +declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind + +define void @test_fetch_and_op() nounwind { +entry: + %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1] + store i8 %tmp40, i8* @sc + %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1] + store i8 %tmp41, i8* @uc + ret void +} diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll new file mode 100644 index 0000000..4ab1bc6 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s +; rdar://8337108 + +; Fast-isel shouldn't try to look through the compare because it's in a +; different basic block, so its operands aren't necessarily exported +; for cross-block usage. + +; CHECK: movb %al, 7(%rsp) +; CHECK: callq {{_?}}bar +; CHECK: movb 7(%rsp), %al + +declare void @bar() + +define void @foo(i32 %a, i32 %b) nounwind { +entry: + %q = add i32 %a, 7 + %r = add i32 %b, 9 + %t = icmp ult i32 %q, %r + invoke void @bar() to label %next unwind label %unw +next: + br i1 %t, label %true, label %return +true: + call void @bar() + br label %return +return: + ret void +unw: + unreachable +} diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 1270ab7..577dd72 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test1: -; X32: movl (%ecx,%eax,4), %eax +; X32: movl (%eax,%ecx,4), %eax ; X32: ret ; X64: test1: @@ -23,7 +23,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind { %t15 = load i32* %t9 ; <i32> [#uses=1] ret i32 %t15 ; X32: test2: -; X32: movl (%eax,%ecx,4), %eax +; X32: movl (%edx,%ecx,4), %eax ; X32: ret ; X64: test2: diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll index 7759bb0..5c62c18 100644 --- a/test/CodeGen/X86/fast-isel-shift-imm.ll +++ b/test/CodeGen/X86/fast-isel-shift-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax} +; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %e} ; PR3242 define void @foo(i32 %x, i32* %p) nounwind { diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll new file mode 100644 index 0000000..ffcbf8a --- /dev/null +++ b/test/CodeGen/X86/force-align-stack.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s +; Tests to make sure that we always align the stack out to the minimum needed - +; in this case 16-bytes. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.3" + +define void @a() nounwind ssp { +entry: +; CHECK: _a: +; CHECK: andl $-16, %esp + %z = alloca <16 x i8> ; <<16 x i8>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16 + call void @b(<16 x i8>* %z) nounwind + br label %return + +return: ; preds = %entry + ret void +} + +declare void @b(<16 x i8>*) diff --git a/test/Transforms/LoopStrengthReduce/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll index 1a695f3..1a695f3 100644 --- a/test/Transforms/LoopStrengthReduce/insert-positions.ll +++ b/test/CodeGen/X86/insert-positions.ll diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll new file mode 100644 index 0000000..45a9b0f --- /dev/null +++ b/test/CodeGen/X86/int-intrinsic.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s + +declare void @llvm.x86.int(i8) nounwind + +; CHECK: int3 +; CHECK: ret +define void @primitive_int3 () { +bb.entry: + call void @llvm.x86.int(i8 3) nounwind + ret void +} + +; CHECK: int $-128 +; CHECK: ret +define void @primitive_int128 () { +bb.entry: + call void @llvm.x86.int(i8 128) nounwind + ret void +} diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index 71685bb..b0105ac 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll new file mode 100644 index 0000000..03468e2 --- /dev/null +++ b/test/CodeGen/X86/lock-inst-encoding.ll @@ -0,0 +1,22 @@ +; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: f0: +; CHECK: addq %rax, (%rdi) +; CHECK: # encoding: [0xf0,0x48,0x01,0x07] +; CHECK: ret +define void @f0(i64* %a0) { + %t0 = and i64 1, 1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 6c0eb8c..6556fde 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC ; By starting the IV at -64 instead of 0, a cmp is eliminated, ; as the flags from the add can be used directly. diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll new file mode 100644 index 0000000..4b7050b --- /dev/null +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu + +; The inner loop should require only one add (and no leas either). +; rdar://8100380 + +; CHECK: BB0_4: +; CHECK-NEXT: movb $0, flags(%rdx) +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: cmpq $8192, %rdx +; CHECK-NEXT: jl + +@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] + +define void @foo() nounwind { +entry: + %tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp, label %bb, label %bb21 + +bb: ; preds = %entry + br label %bb7 + +bb7: ; preds = %bb, %bb17 + %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2] + %tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1] + br i1 %tmp9, label %bb10, label %bb17 + +bb10: ; preds = %bb7 + br label %bb11 + +bb11: ; preds = %bb10, %bb11 + %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2] + %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1] + store i8 0, i8* %tmp13, align 1 + %tmp14 = add nsw i64 %tmp12, %tmp8 ; <i64> [#uses=2] + %tmp15 = icmp slt i64 %tmp14, 8192 ; <i1> [#uses=1] + br i1 %tmp15, label %bb11, label %bb16 + +bb16: ; preds = %bb11 + br label %bb17 + +bb17: ; preds = %bb16, %bb7 + %tmp18 = add nsw i64 %tmp8, 1 ; <i64> [#uses=2] + %tmp19 = icmp slt i64 %tmp18, 8192 ; <i1> [#uses=1] + br i1 %tmp19, label %bb7, label %bb20 + +bb20: ; preds = %bb17 + br label %bb21 + +bb21: ; preds = %bb20, %entry + ret void +} diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll new file mode 100644 index 0000000..932141d --- /dev/null +++ b/test/CodeGen/X86/lsr-normalization.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -march=x86-64 | grep div | count 1 +; rdar://8168938 + +; This testcase involves SCEV normalization with the exit value from +; one loop involved with the increment value for an addrec on another +; loop. The expression should be properly normalized and simplified, +; and require only a single division. + +%0 = type { %0*, %0* } + +@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1] +@1 = internal constant [5 x i8] c"Huh?\00" ; <[5 x i8]*> [#uses=1] + +define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind { +bb: + %tmp = alloca %0, align 8 ; <%0*> [#uses=11] + %tmp2 = bitcast %0* %tmp to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind + %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3] + store %0* %tmp, %0** %tmp3 + %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1] + store %0* %tmp, %0** %tmp4 + %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2] + %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2] + %tmp7 = icmp eq i8* %tmp6, null ; <i1> [#uses=1] + br i1 %tmp7, label %bb10, label %bb8 + +bb8: ; preds = %bb + %tmp9 = bitcast i8* %tmp6 to i32* ; <i32*> [#uses=1] + store i32 1, i32* %tmp9 + br label %bb10 + +bb10: ; preds = %bb8, %bb + %tmp11 = bitcast i8* %tmp5 to %0* ; <%0*> [#uses=1] + call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind + %tmp12 = load %0** %tmp3 ; <%0*> [#uses=3] + %tmp13 = icmp eq %0* %tmp12, %tmp ; <i1> [#uses=1] + br i1 %tmp13, label %bb14, label %bb16 + +bb14: ; preds = %bb10 + %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0)) + br label %bb35 + +bb16: ; preds = %bb16, %bb10 + %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1] + %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1] + %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp20 = load %0** %tmp19 ; <%0*> [#uses=2] + %tmp21 = icmp eq %0* %tmp20, %tmp ; <i1> [#uses=1] + %tmp22 = add i64 %tmp17, 1 ; <i64> [#uses=2] + br i1 %tmp21, label %bb23, label %bb16 + +bb23: ; preds = %bb16 + %tmp24 = udiv i64 100, %tmp22 ; <i64> [#uses=1] + br label %bb25 + +bb25: ; preds = %bb25, %bb23 + %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1] + %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1] + %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp29 = load %0** %tmp28 ; <%0*> [#uses=2] + %tmp30 = icmp eq %0* %tmp29, %tmp ; <i1> [#uses=1] + %tmp31 = add i64 %tmp26, 1 ; <i64> [#uses=2] + br i1 %tmp30, label %bb32, label %bb25 + +bb32: ; preds = %bb25 + %tmp33 = mul i64 %tmp31, %tmp24 ; <i64> [#uses=1] + %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind + br label %bb35 + +bb35: ; preds = %bb32, %bb14 + %tmp36 = load %0** %tmp3 ; <%0*> [#uses=2] + %tmp37 = icmp eq %0* %tmp36, %tmp ; <i1> [#uses=1] + br i1 %tmp37, label %bb44, label %bb38 + +bb38: ; preds = %bb38, %bb35 + %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2] + %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1] + %tmp41 = load %0** %tmp40 ; <%0*> [#uses=2] + %tmp42 = bitcast %0* %tmp39 to i8* ; <i8*> [#uses=1] + call void @_ZdlPv(i8* %tmp42) nounwind + %tmp43 = icmp eq %0* %tmp41, %tmp ; <i1> [#uses=1] + br i1 %tmp43, label %bb44, label %bb38 + +bb44: ; preds = %bb38, %bb35 + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*) + +declare noalias i8* @_Znwm(i64) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +declare void @_ZdlPv(i8*) nounwind + +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index b7e69b8..d2ff58b 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry ; And the one at %bb68, where we want to be sure to use superhero mode: -; CHECK: BB10_10: +; CHECK: BB10_9: ; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}} ; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}} @@ -484,7 +484,7 @@ bb5: ; preds = %bb3, %entry ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $64, %r{{.*}} ; CHECK-NEXT: addq $-16, %r{{.*}} -; CHECK-NEXT: BB10_11: +; CHECK-NEXT: BB10_10: ; CHECK-NEXT: cmpq $15, %r{{.*}} ; CHECK-NEXT: jg diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll new file mode 100644 index 0000000..c9ed3e5 --- /dev/null +++ b/test/CodeGen/X86/lsr-static-addr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s + +; CHECK: xorl %eax, %eax +; CHECK: movsd .LCPI0_0(%rip), %xmm0 +; CHECK: align +; CHECK-NEXT: BB0_2: +; CHECK-NEXT: movsd A(,%rax,8) +; CHECK-NEXT: mulsd +; CHECK-NEXT: movsd +; CHECK-NEXT: incq %rax + +@A = external global [0 x double] + +define void @foo(i64 %n) nounwind { +entry: + %cmp5 = icmp sgt i64 %n, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: + %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06 + %tmp3 = load double* %arrayidx, align 8 + %mul = fmul double %tmp3, 2.300000e+00 + store double %mul, double* %arrayidx, align 8 + %inc = add nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll index ec8db50..d605e4f 100644 --- a/test/CodeGen/X86/lsr-wrap.ll +++ b/test/CodeGen/X86/lsr-wrap.ll @@ -3,7 +3,7 @@ ; LSR would like to use a single IV for both of these, however it's ; not safe due to wraparound. -; CHECK: addb $-4, %r +; CHECK: addb $-4, % ; CHECK: decw % @g_19 = common global i32 0 ; <i32*> [#uses=2] diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll deleted file mode 100644 index 796ef7a..0000000 --- a/test/CodeGen/X86/narrow_op-2.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s - - %struct.bf = type { i64, i16, i16, i32 } -@bfi = external global %struct.bf* - -define void @t1() nounwind ssp { -entry: - -; CHECK: andb $-2, 10( -; CHECK: andb $-3, 10( - - %0 = load %struct.bf** @bfi, align 8 - %1 = getelementptr %struct.bf* %0, i64 0, i32 1 - %2 = bitcast i16* %1 to i32* - %3 = load i32* %2, align 1 - %4 = and i32 %3, -65537 - store i32 %4, i32* %2, align 1 - %5 = load %struct.bf** @bfi, align 8 - %6 = getelementptr %struct.bf* %5, i64 0, i32 1 - %7 = bitcast i16* %6 to i32* - %8 = load i32* %7, align 1 - %9 = and i32 %8, -131073 - store i32 %9, i32* %7, align 1 - ret void -} diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll index 9f9f921..8bed624 100644 --- a/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/test/CodeGen/X86/phi-immediate-factoring.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" -define i32 @foo(i32 %A, i32 %B, i32 %C) { +define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind { entry: switch i32 %A, label %out [ i32 1, label %bb diff --git a/test/CodeGen/X86/pr7882.ll b/test/CodeGen/X86/pr7882.ll new file mode 100644 index 0000000..88404db --- /dev/null +++ b/test/CodeGen/X86/pr7882.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \ +; RUN: | FileCheck %s +; make sure scheduler honors the flags clobber. PR 7882. + +define i32 @main(i32 %argc, i8** %argv) nounwind +{ +entry: +; CHECK: InlineAsm End +; CHECK: cmpl + %res = icmp slt i32 1, %argc + %tmp = call i32 asm sideeffect alignstack + "push $$0 + popf + mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res) + %ret = select i1 %res, i32 %tmp, i32 42 + ret i32 %ret +} diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll new file mode 100644 index 0000000..10d489b --- /dev/null +++ b/test/CodeGen/X86/shl-anyext.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Codegen should be able to use a 32-bit shift instead of a 64-bit shift. +; CHECK: shll $16 + +define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind { +if.end523: ; preds = %if.end453 + %conv7981749 = zext i32 %level to i64 ; <i64> [#uses=1] + %and799 = shl i64 %conv7981749, 16 ; <i64> [#uses=1] + %shl800 = and i64 %and799, 16711680 ; <i64> [#uses=1] + %or801 = or i64 %shl800, %a ; <i64> [#uses=1] + %or806 = or i64 %or801, %b ; <i64> [#uses=1] + %or811 = or i64 %or806, %c ; <i64> [#uses=1] + %or819 = or i64 %or811, %d ; <i64> [#uses=1] + %conv820 = trunc i64 %or819 to i32 ; <i32> [#uses=1] + store i32 %conv820, i32* %p + ret void +} + +; CHECK: foo: + +declare void @bar(i64) + +define fastcc void @foo(i32 %t) { +bb: + %tmp = add i32 %t, -1 ; <i32> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = zext i32 %tmp to i64 ; <i64> [#uses=2] + %tmp3 = add i64 %tmp2, 1 ; <i64> [#uses=1] + %tmp4 = xor i64 %tmp2, 536870911 ; <i64> [#uses=1] + %tmp5 = and i64 %tmp3, %tmp4 ; <i64> [#uses=1] + %tmp6 = shl i64 %tmp5, 3 ; <i64> [#uses=1] + %tmp7 = sub i64 64, %tmp6 ; <i64> [#uses=1] + %tmp8 = and i64 %tmp7, 4294967288 ; <i64> [#uses=1] + %tmp9 = lshr i64 -1, %tmp8 ; <i64> [#uses=1] + call void @bar(i64 %tmp9) + ret void +} diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index 4b27f2e..a3c9957 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64 +; Darwin 8 generates stubs, which don't match +; XFAIL: apple-darwin8 define void @t1(i32 %x) nounwind ssp { entry: diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index ebcdc65..348121a 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s -; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of ; then can be matched provided that the operands are swapped. diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll new file mode 100644 index 0000000..73f88ae --- /dev/null +++ b/test/CodeGen/X86/sse1.ll @@ -0,0 +1,45 @@ +; Tests for SSE1 and below, without SSE2+. +; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s + +define <8 x i16> @test1(<8 x i32> %a) nounwind { +; CHECK: test1 + ret <8 x i16> zeroinitializer +} + +define <8 x i16> @test2(<8 x i32> %a) nounwind { +; CHECK: test2 + %c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1] + ret <8 x i16> %c +} + +; PR7993 +;define <4 x i32> @test3(<4 x i16> %a) nounwind { +; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1] +; ret <4 x i32> %c +;} + +; This should not emit shuffles to populate the top 2 elements of the 4-element +; vector that this ends up returning. +; rdar://8368414 +define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fsub float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; CHECK: test4: +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: shufps $1, +; CHECK-NOT: shufps $16 +; CHECK: unpcklps +; CHECK-NOT: shufps $16 +; CHECK: ret +} diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 20b8eac..6fc0190 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -1,14 +1,14 @@ ; Tests for SSE2 and below, without SSE3+. ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s -define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t1: +; CHECK: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movlpd 12(%esp), %xmm0 @@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: ret } -define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { +define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp3 = load <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void -; CHECK: t2: +; CHECK: test2: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 @@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } + + +define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind { + %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] + %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] + %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] + %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] + %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] + %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] + %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp13, <4 x float>* %res + ret void +; CHECK: @test3 +; CHECK: unpcklps +} + +define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { + %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp5, <4 x float>* %res + ret void +; CHECK: @test4 +; CHECK: pshufd $50, %xmm0, %xmm0 +} + +define <4 x i32> @test5(i8** %ptr) nounwind { +; CHECK: test5: +; CHECK: pxor +; CHECK: punpcklbw +; CHECK: punpcklwd + + %tmp = load i8** %ptr ; <i8*> [#uses=1] + %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] + %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] + %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] + %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] + %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] + %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] + %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] + %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] + %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp36 +} + +define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp2, <4 x float>* %res + ret void + +; CHECK: test6: +; CHECK: movaps (%eax), %xmm0 +; CHECK: movaps %xmm0, (%eax) +} + +define void @test7() nounwind { + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] + shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] + store <4 x float> %2, <4 x float>* null + ret void + +; CHECK: test7: +; CHECK: pxor %xmm0, %xmm0 +; CHECK: movaps %xmm0, 0 +} + +@x = external global [4 x i32] + +define <2 x i64> @test8() nounwind { + %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] + %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] + %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] + %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] + %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] + %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] + %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] + %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] + %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp16 +; CHECK: test8: +; CHECK: movups (%eax), %xmm0 +} + +define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test9: +; CHECK: movups 8(%esp), %xmm0 +} + +define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { + %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] + %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] + %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] + %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp13 +; CHECK: test10: +; CHECK: movaps 4(%esp), %xmm0 +} + +define <2 x double> @test11(double %a, double %b) nounwind { + %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] + %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp7 +; CHECK: test11: +; CHECK: movapd 4(%esp), %xmm0 +} + +define void @test12() nounwind { + %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] + store <4 x float> %tmp4, <4 x float>* null + ret void +; CHECK: test12: +; CHECK: movhlps +; CHECK: shufps +} + +define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { + %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] + %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] + %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] + store <4 x float> %tmp11, <4 x float>* %res + ret void +; CHECK: test13 +; CHECK: shufps $69, (%eax), %xmm0 +; CHECK: pshufd $-40, %xmm0, %xmm0 +} + +define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] + %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] + %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] + %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp27 +; CHECK: test14: +; CHECK: addps %xmm1, %xmm0 +; CHECK: subps %xmm1, %xmm2 +; CHECK: movlhps %xmm2, %xmm0 +} + +define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { +entry: + %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] + %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] + %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp4 +; CHECK: test15: +; CHECK: movhlps %xmm1, %xmm0 +} diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index ef66d1a..3a14fa2 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} + diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll deleted file mode 100644 index 001a540..0000000 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ /dev/null @@ -1,361 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep asm-printer %t | grep 166 -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5 - - type { [62 x %struct.Bitvec*] } ; type %0 - type { i8* } ; type %1 - type { double } ; type %2 - %struct..5sPragmaType = type { i8*, i32 } - %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 } - %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* } - %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 } - %struct.AuxData = type { i8*, void (i8*)* } - %struct.Bitvec = type { i32, i32, i32, %0 } - %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* } - %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* } - %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* } - %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* } - %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] } - %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 } - %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 } - %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* } - %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 } - %struct.Context = type { i64, i32, %struct.Fifo } - %struct.CountCtx = type { i64 } - %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* } - %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* } - %struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* } - %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 } - %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* } - %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 } - %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 } - %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* } - %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] } - %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] } - %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* } - %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 } - %struct.IdList = type { %struct..5sPragmaType*, i32, i32 } - %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** } - %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] } - %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* } - %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* } - %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* } - %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 } - %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] } - %struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* } - %struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* } - %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* } - %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] } - %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] } - %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 } - %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* } - %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* } - %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* } - %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 } - %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] } - %struct._OvflCell = type { i8*, i16 } - %struct._ht = type { i32, %struct.HashElem* } - %struct.sColMap = type { i32, i8* } - %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 } - %struct.sqlite3InitInfo = type { i32, i32, i8 } - %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* } - %struct.sqlite3_file = type { %struct.sqlite3_io_methods* } - %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 } - %struct.sqlite3_index_constraint_usage = type { i32, i8 } - %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double } - %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* } - %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* } - %struct.sqlite3_mutex = type opaque - %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* } - %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* } - %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* } -@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp { -entry: - %0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18 ; <i8**> [#uses=1] - %1 = load i8** %0, align 8 ; <i8*> [#uses=34] - %2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12 ; <i16*> [#uses=1] - %3 = load i16* %2, align 2 ; <i16> [#uses=1] - %4 = zext i16 %3 to i32 ; <i32> [#uses=2] - %5 = shl i32 %idx, 1 ; <i32> [#uses=2] - %6 = add i32 %4, %5 ; <i32> [#uses=1] - %7 = sext i32 %6 to i64 ; <i64> [#uses=2] - %8 = getelementptr i8* %1, i64 %7 ; <i8*> [#uses=1] - %9 = load i8* %8, align 1 ; <i8> [#uses=2] - %10 = zext i8 %9 to i32 ; <i32> [#uses=1] - %11 = shl i32 %10, 8 ; <i32> [#uses=1] - %.sum3 = add i64 %7, 1 ; <i64> [#uses=1] - %12 = getelementptr i8* %1, i64 %.sum3 ; <i8*> [#uses=1] - %13 = load i8* %12, align 1 ; <i8> [#uses=2] - %14 = zext i8 %13 to i32 ; <i32> [#uses=1] - %15 = or i32 %11, %14 ; <i32> [#uses=3] - %16 = icmp slt i32 %sz, 4 ; <i1> [#uses=1] - %size_addr.0.i = select i1 %16, i32 4, i32 %sz ; <i32> [#uses=3] - %17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8 ; <i8*> [#uses=5] - %18 = load i8* %17, align 8 ; <i8> [#uses=1] - %19 = zext i8 %18 to i32 ; <i32> [#uses=4] - %20 = add i32 %19, 1 ; <i32> [#uses=2] - br label %bb3.i - -bb3.i: ; preds = %bb3.i, %entry - %addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ] ; <i32> [#uses=1] - %21 = sext i32 %addr.0.i to i64 ; <i64> [#uses=2] - %22 = getelementptr i8* %1, i64 %21 ; <i8*> [#uses=2] - %23 = load i8* %22, align 1 ; <i8> [#uses=2] - %24 = zext i8 %23 to i32 ; <i32> [#uses=1] - %25 = shl i32 %24, 8 ; <i32> [#uses=1] - %.sum34.i = add i64 %21, 1 ; <i64> [#uses=1] - %26 = getelementptr i8* %1, i64 %.sum34.i ; <i8*> [#uses=2] - %27 = load i8* %26, align 1 ; <i8> [#uses=2] - %28 = zext i8 %27 to i32 ; <i32> [#uses=1] - %29 = or i32 %25, %28 ; <i32> [#uses=3] - %.not.i = icmp uge i32 %29, %15 ; <i1> [#uses=1] - %30 = icmp eq i32 %29, 0 ; <i1> [#uses=1] - %or.cond.i = or i1 %30, %.not.i ; <i1> [#uses=1] - br i1 %or.cond.i, label %bb5.i, label %bb3.i - -bb5.i: ; preds = %bb3.i - store i8 %9, i8* %22, align 1 - store i8 %13, i8* %26, align 1 - %31 = zext i32 %15 to i64 ; <i64> [#uses=2] - %32 = getelementptr i8* %1, i64 %31 ; <i8*> [#uses=1] - store i8 %23, i8* %32, align 1 - %.sum32.i = add i64 %31, 1 ; <i64> [#uses=1] - %33 = getelementptr i8* %1, i64 %.sum32.i ; <i8*> [#uses=1] - store i8 %27, i8* %33, align 1 - %34 = add i32 %15, 2 ; <i32> [#uses=1] - %35 = zext i32 %34 to i64 ; <i64> [#uses=2] - %36 = getelementptr i8* %1, i64 %35 ; <i8*> [#uses=1] - %37 = lshr i32 %size_addr.0.i, 8 ; <i32> [#uses=1] - %38 = trunc i32 %37 to i8 ; <i8> [#uses=1] - store i8 %38, i8* %36, align 1 - %39 = trunc i32 %size_addr.0.i to i8 ; <i8> [#uses=1] - %.sum31.i = add i64 %35, 1 ; <i64> [#uses=1] - %40 = getelementptr i8* %1, i64 %.sum31.i ; <i8*> [#uses=1] - store i8 %39, i8* %40, align 1 - %41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14 ; <i16*> [#uses=4] - %42 = load i16* %41, align 2 ; <i16> [#uses=1] - %43 = trunc i32 %size_addr.0.i to i16 ; <i16> [#uses=1] - %44 = add i16 %42, %43 ; <i16> [#uses=1] - store i16 %44, i16* %41, align 2 - %45 = load i8* %17, align 8 ; <i8> [#uses=1] - %46 = zext i8 %45 to i32 ; <i32> [#uses=1] - %47 = add i32 %46, 1 ; <i32> [#uses=1] - br label %bb11.outer.i - -bb11.outer.i: ; preds = %bb6.i, %bb5.i - %addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ] ; <i32> [#uses=1] - %48 = sext i32 %addr.1.ph.i to i64 ; <i64> [#uses=2] - %49 = getelementptr i8* %1, i64 %48 ; <i8*> [#uses=1] - %.sum30.i = add i64 %48, 1 ; <i64> [#uses=1] - %50 = getelementptr i8* %1, i64 %.sum30.i ; <i8*> [#uses=1] - br label %bb11.i - -bb6.i: ; preds = %bb11.i - %51 = zext i32 %111 to i64 ; <i64> [#uses=2] - %52 = getelementptr i8* %1, i64 %51 ; <i8*> [#uses=2] - %53 = load i8* %52, align 1 ; <i8> [#uses=1] - %54 = zext i8 %53 to i32 ; <i32> [#uses=1] - %55 = shl i32 %54, 8 ; <i32> [#uses=1] - %.sum24.i = add i64 %51, 1 ; <i64> [#uses=1] - %56 = getelementptr i8* %1, i64 %.sum24.i ; <i8*> [#uses=2] - %57 = load i8* %56, align 1 ; <i8> [#uses=3] - %58 = zext i8 %57 to i32 ; <i32> [#uses=1] - %59 = or i32 %55, %58 ; <i32> [#uses=5] - %60 = add i32 %111, 2 ; <i32> [#uses=1] - %61 = zext i32 %60 to i64 ; <i64> [#uses=2] - %62 = getelementptr i8* %1, i64 %61 ; <i8*> [#uses=2] - %63 = load i8* %62, align 1 ; <i8> [#uses=1] - %64 = zext i8 %63 to i32 ; <i32> [#uses=1] - %65 = shl i32 %64, 8 ; <i32> [#uses=1] - %.sum23.i = add i64 %61, 1 ; <i64> [#uses=1] - %66 = getelementptr i8* %1, i64 %.sum23.i ; <i8*> [#uses=2] - %67 = load i8* %66, align 1 ; <i8> [#uses=2] - %68 = zext i8 %67 to i32 ; <i32> [#uses=1] - %69 = or i32 %65, %68 ; <i32> [#uses=1] - %70 = add i32 %111, 3 ; <i32> [#uses=1] - %71 = add i32 %70, %69 ; <i32> [#uses=1] - %72 = icmp sge i32 %71, %59 ; <i1> [#uses=1] - %73 = icmp ne i32 %59, 0 ; <i1> [#uses=1] - %74 = and i1 %72, %73 ; <i1> [#uses=1] - br i1 %74, label %bb9.i, label %bb11.outer.i - -bb9.i: ; preds = %bb6.i - %75 = load i8* %17, align 8 ; <i8> [#uses=1] - %76 = zext i8 %75 to i32 ; <i32> [#uses=1] - %77 = add i32 %76, 7 ; <i32> [#uses=1] - %78 = zext i32 %77 to i64 ; <i64> [#uses=1] - %79 = getelementptr i8* %1, i64 %78 ; <i8*> [#uses=2] - %80 = load i8* %79, align 1 ; <i8> [#uses=1] - %81 = sub i8 %109, %57 ; <i8> [#uses=1] - %82 = add i8 %81, %67 ; <i8> [#uses=1] - %83 = add i8 %82, %80 ; <i8> [#uses=1] - store i8 %83, i8* %79, align 1 - %84 = zext i32 %59 to i64 ; <i64> [#uses=2] - %85 = getelementptr i8* %1, i64 %84 ; <i8*> [#uses=1] - %86 = load i8* %85, align 1 ; <i8> [#uses=1] - store i8 %86, i8* %52, align 1 - %.sum22.i = add i64 %84, 1 ; <i64> [#uses=1] - %87 = getelementptr i8* %1, i64 %.sum22.i ; <i8*> [#uses=1] - %88 = load i8* %87, align 1 ; <i8> [#uses=1] - store i8 %88, i8* %56, align 1 - %89 = add i32 %59, 2 ; <i32> [#uses=1] - %90 = zext i32 %89 to i64 ; <i64> [#uses=2] - %91 = getelementptr i8* %1, i64 %90 ; <i8*> [#uses=1] - %92 = load i8* %91, align 1 ; <i8> [#uses=1] - %93 = zext i8 %92 to i32 ; <i32> [#uses=1] - %94 = shl i32 %93, 8 ; <i32> [#uses=1] - %.sum20.i = add i64 %90, 1 ; <i64> [#uses=1] - %95 = getelementptr i8* %1, i64 %.sum20.i ; <i8*> [#uses=2] - %96 = load i8* %95, align 1 ; <i8> [#uses=1] - %97 = zext i8 %96 to i32 ; <i32> [#uses=1] - %98 = or i32 %94, %97 ; <i32> [#uses=1] - %99 = sub i32 %59, %111 ; <i32> [#uses=1] - %100 = add i32 %99, %98 ; <i32> [#uses=1] - %101 = lshr i32 %100, 8 ; <i32> [#uses=1] - %102 = trunc i32 %101 to i8 ; <i8> [#uses=1] - store i8 %102, i8* %62, align 1 - %103 = load i8* %95, align 1 ; <i8> [#uses=1] - %104 = sub i8 %57, %109 ; <i8> [#uses=1] - %105 = add i8 %104, %103 ; <i8> [#uses=1] - store i8 %105, i8* %66, align 1 - br label %bb11.i - -bb11.i: ; preds = %bb9.i, %bb11.outer.i - %106 = load i8* %49, align 1 ; <i8> [#uses=1] - %107 = zext i8 %106 to i32 ; <i32> [#uses=1] - %108 = shl i32 %107, 8 ; <i32> [#uses=1] - %109 = load i8* %50, align 1 ; <i8> [#uses=3] - %110 = zext i8 %109 to i32 ; <i32> [#uses=1] - %111 = or i32 %108, %110 ; <i32> [#uses=6] - %112 = icmp eq i32 %111, 0 ; <i1> [#uses=1] - br i1 %112, label %bb12.i, label %bb6.i - -bb12.i: ; preds = %bb11.i - %113 = zext i32 %20 to i64 ; <i64> [#uses=2] - %114 = getelementptr i8* %1, i64 %113 ; <i8*> [#uses=2] - %115 = load i8* %114, align 1 ; <i8> [#uses=2] - %116 = add i32 %19, 5 ; <i32> [#uses=1] - %117 = zext i32 %116 to i64 ; <i64> [#uses=2] - %118 = getelementptr i8* %1, i64 %117 ; <i8*> [#uses=3] - %119 = load i8* %118, align 1 ; <i8> [#uses=1] - %120 = icmp eq i8 %115, %119 ; <i1> [#uses=1] - br i1 %120, label %bb13.i, label %bb1.preheader - -bb13.i: ; preds = %bb12.i - %121 = add i32 %19, 2 ; <i32> [#uses=1] - %122 = zext i32 %121 to i64 ; <i64> [#uses=1] - %123 = getelementptr i8* %1, i64 %122 ; <i8*> [#uses=1] - %124 = load i8* %123, align 1 ; <i8> [#uses=1] - %125 = add i32 %19, 6 ; <i32> [#uses=1] - %126 = zext i32 %125 to i64 ; <i64> [#uses=1] - %127 = getelementptr i8* %1, i64 %126 ; <i8*> [#uses=1] - %128 = load i8* %127, align 1 ; <i8> [#uses=1] - %129 = icmp eq i8 %124, %128 ; <i1> [#uses=1] - br i1 %129, label %bb14.i, label %bb1.preheader - -bb14.i: ; preds = %bb13.i - %130 = zext i8 %115 to i32 ; <i32> [#uses=1] - %131 = shl i32 %130, 8 ; <i32> [#uses=1] - %.sum29.i = add i64 %113, 1 ; <i64> [#uses=1] - %132 = getelementptr i8* %1, i64 %.sum29.i ; <i8*> [#uses=1] - %133 = load i8* %132, align 1 ; <i8> [#uses=1] - %134 = zext i8 %133 to i32 ; <i32> [#uses=1] - %135 = or i32 %134, %131 ; <i32> [#uses=2] - %136 = zext i32 %135 to i64 ; <i64> [#uses=1] - %137 = getelementptr i8* %1, i64 %136 ; <i8*> [#uses=1] - %138 = bitcast i8* %137 to i16* ; <i16*> [#uses=1] - %139 = bitcast i8* %114 to i16* ; <i16*> [#uses=1] - %tmp.i = load i16* %138, align 1 ; <i16> [#uses=1] - store i16 %tmp.i, i16* %139, align 1 - %140 = load i8* %118, align 1 ; <i8> [#uses=1] - %141 = zext i8 %140 to i32 ; <i32> [#uses=1] - %142 = shl i32 %141, 8 ; <i32> [#uses=1] - %.sum28.i = add i64 %117, 1 ; <i64> [#uses=1] - %143 = getelementptr i8* %1, i64 %.sum28.i ; <i8*> [#uses=2] - %144 = load i8* %143, align 1 ; <i8> [#uses=2] - %145 = zext i8 %144 to i32 ; <i32> [#uses=1] - %146 = or i32 %142, %145 ; <i32> [#uses=1] - %147 = add i32 %135, 2 ; <i32> [#uses=1] - %148 = zext i32 %147 to i64 ; <i64> [#uses=2] - %149 = getelementptr i8* %1, i64 %148 ; <i8*> [#uses=1] - %150 = load i8* %149, align 1 ; <i8> [#uses=1] - %151 = zext i8 %150 to i32 ; <i32> [#uses=1] - %152 = shl i32 %151, 8 ; <i32> [#uses=1] - %.sum27.i = add i64 %148, 1 ; <i64> [#uses=1] - %153 = getelementptr i8* %1, i64 %.sum27.i ; <i8*> [#uses=2] - %154 = load i8* %153, align 1 ; <i8> [#uses=1] - %155 = zext i8 %154 to i32 ; <i32> [#uses=1] - %156 = or i32 %152, %155 ; <i32> [#uses=1] - %157 = add i32 %156, %146 ; <i32> [#uses=1] - %158 = lshr i32 %157, 8 ; <i32> [#uses=1] - %159 = trunc i32 %158 to i8 ; <i8> [#uses=1] - store i8 %159, i8* %118, align 1 - %160 = load i8* %153, align 1 ; <i8> [#uses=1] - %161 = add i8 %160, %144 ; <i8> [#uses=1] - store i8 %161, i8* %143, align 1 - br label %bb1.preheader - -bb1.preheader: ; preds = %bb14.i, %bb13.i, %bb12.i - %i.08 = add i32 %idx, 1 ; <i32> [#uses=2] - %162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15 ; <i16*> [#uses=4] - %163 = load i16* %162, align 4 ; <i16> [#uses=2] - %164 = zext i16 %163 to i32 ; <i32> [#uses=1] - %165 = icmp sgt i32 %164, %i.08 ; <i1> [#uses=1] - br i1 %165, label %bb, label %bb2 - -bb: ; preds = %bb, %bb1.preheader - %indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ] ; <i64> [#uses=3] - %tmp16 = add i32 %5, %4 ; <i32> [#uses=1] - %tmp.17 = sext i32 %tmp16 to i64 ; <i64> [#uses=1] - %tmp19 = shl i64 %indvar, 1 ; <i64> [#uses=1] - %ctg2.sum = add i64 %tmp.17, %tmp19 ; <i64> [#uses=4] - %ctg229 = getelementptr i8* %1, i64 %ctg2.sum ; <i8*> [#uses=1] - %ctg229.sum31 = add i64 %ctg2.sum, 2 ; <i64> [#uses=1] - %166 = getelementptr i8* %1, i64 %ctg229.sum31 ; <i8*> [#uses=1] - %167 = load i8* %166, align 1 ; <i8> [#uses=1] - store i8 %167, i8* %ctg229 - %ctg229.sum30 = add i64 %ctg2.sum, 3 ; <i64> [#uses=1] - %168 = getelementptr i8* %1, i64 %ctg229.sum30 ; <i8*> [#uses=1] - %169 = load i8* %168, align 1 ; <i8> [#uses=1] - %ctg229.sum = add i64 %ctg2.sum, 1 ; <i64> [#uses=1] - %170 = getelementptr i8* %1, i64 %ctg229.sum ; <i8*> [#uses=1] - store i8 %169, i8* %170, align 1 - %indvar15 = trunc i64 %indvar to i32 ; <i32> [#uses=1] - %i.09 = add i32 %indvar15, %i.08 ; <i32> [#uses=1] - %i.0 = add i32 %i.09, 1 ; <i32> [#uses=1] - %171 = load i16* %162, align 4 ; <i16> [#uses=2] - %172 = zext i16 %171 to i32 ; <i32> [#uses=1] - %173 = icmp sgt i32 %172, %i.0 ; <i1> [#uses=1] - %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] - br i1 %173, label %bb, label %bb2 - -bb2: ; preds = %bb, %bb1.preheader - %174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ] ; <i16> [#uses=1] - %175 = add i16 %174, -1 ; <i16> [#uses=2] - store i16 %175, i16* %162, align 4 - %176 = load i8* %17, align 8 ; <i8> [#uses=1] - %177 = zext i8 %176 to i32 ; <i32> [#uses=1] - %178 = add i32 %177, 3 ; <i32> [#uses=1] - %179 = zext i32 %178 to i64 ; <i64> [#uses=1] - %180 = getelementptr i8* %1, i64 %179 ; <i8*> [#uses=1] - %181 = lshr i16 %175, 8 ; <i16> [#uses=1] - %182 = trunc i16 %181 to i8 ; <i8> [#uses=1] - store i8 %182, i8* %180, align 1 - %183 = load i8* %17, align 8 ; <i8> [#uses=1] - %184 = zext i8 %183 to i32 ; <i32> [#uses=1] - %185 = add i32 %184, 3 ; <i32> [#uses=1] - %186 = zext i32 %185 to i64 ; <i64> [#uses=1] - %187 = load i16* %162, align 4 ; <i16> [#uses=1] - %188 = trunc i16 %187 to i8 ; <i8> [#uses=1] - %.sum = add i64 %186, 1 ; <i64> [#uses=1] - %189 = getelementptr i8* %1, i64 %.sum ; <i8*> [#uses=1] - store i8 %188, i8* %189, align 1 - %190 = load i16* %41, align 2 ; <i16> [#uses=1] - %191 = add i16 %190, 2 ; <i16> [#uses=1] - store i16 %191, i16* %41, align 2 - %192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1] - store i8 1, i8* %192, align 1 - ret void -} diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll index 70204bc..a7c2517 100644 --- a/test/CodeGen/X86/stdcall.ll +++ b/test/CodeGen/X86/stdcall.ll @@ -2,7 +2,7 @@ ; PR5851 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" %0 = type { void (...)* } diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index 5682e7c..abc5174 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -1,6 +1,6 @@ ; rdar://7860110 -; RUN: llc < %s | FileCheck %s -check-prefix=X64 -; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32 +; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64 +; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.2" @@ -125,3 +125,30 @@ entry: ; X32: movb %cl, 5(%{{.*}}) } +; PR7833 + +@g_16 = internal global i32 -1 + +; X64: test8: +; X64-NEXT: movl _g_16(%rip), %eax +; X64-NEXT: movl $0, _g_16(%rip) +; X64-NEXT: orl $1, %eax +; X64-NEXT: movl %eax, _g_16(%rip) +; X64-NEXT: ret +define void @test8() nounwind { + %tmp = load i32* @g_16 + store i32 0, i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} + +; X64: test9: +; X64-NEXT: orb $1, _g_16(%rip) +; X64-NEXT: ret +define void @test9() nounwind { + %tmp = load i32* @g_16 + %or = or i32 %tmp, 1 + store i32 %or, i32* @g_16 + ret void +} diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll index d54fb41..7f92af4 100644 --- a/test/CodeGen/X86/tailcall-fastisel.ll +++ b/test/CodeGen/X86/tailcall-fastisel.ll @@ -1,8 +1,6 @@ -; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL +; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL -; Fast-isel shouldn't attempt to handle this tail call, and it should -; cleanly terminate instruction selection in the block after it's -; done to avoid emitting invalid MachineInstrs. +; Fast-isel shouldn't attempt to cope with tail calls. %0 = type { i64, i32, i8* } @@ -11,3 +9,11 @@ fail: ; preds = %entry %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1] ret i8* %tmp20 } + +define i32 @foo() nounwind { +entry: + %0 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1] + ret i32 %0 +} + +declare i32 @bar(...) nounwind diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll index 4c37225..6f6d6f2 100644 --- a/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/test/CodeGen/X86/twoaddr-coalesce.ll @@ -3,7 +3,7 @@ @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] -define i32 @main() nounwind { +define i32 @foo() nounwind { bb1.thread: br label %bb1 diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll index 9c4b773..76c3fdf 100644 --- a/test/CodeGen/X86/v2f32.ll +++ b/test/CodeGen/X86/v2f32.ll @@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind { store float %c, float* %P2 ret void ; X64: test1: -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: pshufd $1, %xmm0, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movss %xmm1, (%rdi) ; X64-NEXT: ret ; X32: test1: -; X32-NEXT: movss 4(%esp), %xmm0 -; X32-NEXT: addss 8(%esp), %xmm0 -; X32-NEXT: movl 12(%esp), %eax -; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: pshufd $1, %xmm0, %xmm1 +; X32-NEXT: addss %xmm0, %xmm1 +; X32-NEXT: movl 4(%esp), %eax +; X32-NEXT: movss %xmm1, (%eax) ; X32-NEXT: ret } @@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw ret <2 x float> %Z ; X64: test2: -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: addps -; X64-NEXT: movaps -; X64-NEXT: pshufd +; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: ret } + + +define <2 x float> @test3(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + ret <2 x float> %C +; CHECK: test3: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <2 x float> @test4(<2 x float> %A) nounwind { + %C = fadd <2 x float> %A, %A + ret <2 x float> %C +; CHECK: test4: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <4 x float> @test5(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> + %C = fadd <2 x float> %B, %B + br label %BB + +BB: + %D = fadd <2 x float> %C, %C + %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + ret <4 x float> %E + +; CHECK: _test5: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + + diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll index 6f18d13..f853164 100644 --- a/test/CodeGen/X86/vec_cast.ll +++ b/test/CodeGen/X86/vec_cast.ll @@ -1,15 +1,16 @@ -; RUN: llc < %s -march=x86-64 -; RUN: llc < %s -march=x86-64 -disable-mmx +; RUN: llc < %s -march=x86-64 -mcpu=core2 +; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx + define <8 x i32> @a(<8 x i16> %a) nounwind { %c = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %c } -define <3 x i32> @b(<3 x i16> %a) nounwind { - %c = sext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @b(<3 x i16> %a) nounwind { +; %c = sext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @c(<1 x i16> %a) nounwind { %c = sext <1 x i16> %a to <1 x i32> @@ -21,10 +22,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwind { ret <8 x i32> %c } -define <3 x i32> @e(<3 x i16> %a) nounwind { - %c = zext <3 x i16> %a to <3 x i32> - ret <3 x i32> %c -} +;define <3 x i32> @e(<3 x i16> %a) nounwind { +; %c = zext <3 x i16> %a to <3 x i32> +; ret <3 x i32> %c +;} define <1 x i32> @f(<1 x i16> %a) nounwind { %c = zext <1 x i16> %a to <1 x i32> diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll index 54aa43f..de3b36f 100644 --- a/test/CodeGen/X86/vec_insert-6.ll +++ b/test/CodeGen/X86/vec_insert-6.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6 +; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 define <4 x float> @t3(<4 x float>* %P) nounwind { %tmp1 = load <4 x float>* %P diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll index 2e829df..e5a7ccc 100644 --- a/test/CodeGen/X86/vec_insert-9.ll +++ b/test/CodeGen/X86/vec_insert-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t -; RUN: grep pinsrd %t | count 2 +; RUN: grep pinsrd %t | count 1 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll new file mode 100644 index 0000000..9ef7fbd --- /dev/null +++ b/test/CodeGen/X86/vec_shift4.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s + +define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shll +; CHECK: pslld +; CHECK: paddd +; CHECK: cvttps2dq +; CHECK: pmulld + + %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1] + %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} + +define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { +entry: +; CHECK-NOT: shlb +; CHECK: pblendvb +; CHECK: pblendvb +; CHECK: pblendvb + %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1] + %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp2 +} diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll deleted file mode 100644 index a63e386..0000000 --- a/test/CodeGen/X86/vec_shuffle-10.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep unpcklps %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: not grep {sub.*esp} %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { - %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] - %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2] - %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] - %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] - %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] - %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] - %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp13, <4 x float>* %res - ret void -} - -define void @test2(<4 x float> %X, <4 x float>* %res) { - %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp5, <4 x float>* %res - ret void -} diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll index 9fc09df..861a1cc 100644 --- a/test/CodeGen/X86/vec_shuffle-19.ll +++ b/test/CodeGen/X86/vec_shuffle-19.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 ; PR2485 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind { diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll index 6d1bac0..fc06b95 100644 --- a/test/CodeGen/X86/vec_shuffle-20.ll +++ b/test/CodeGen/X86/vec_shuffle-20.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll index 7562f1d..1b104de 100644 --- a/test/CodeGen/X86/vec_shuffle-24.ll +++ b/test/CodeGen/X86/vec_shuffle-24.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpck +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s define i32 @t() nounwind optsize { entry: +; CHECK: punpckldq %a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] %b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5] volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll deleted file mode 100644 index f4930b0..0000000 --- a/test/CodeGen/X86/vec_shuffle-3.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movlhps %t | count 1 -; RUN: grep movhlps %t | count 1 - -define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) { - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2] - %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2] - %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] - %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp27 -} - -define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) { -entry: - %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1] - %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1] - %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp4 -} diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll new file mode 100644 index 0000000..1ed858d --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { +entry: +; CHECK: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movlps (%rax), %xmm1 +; CHECK-NEXT: shufps $36, %xmm1, %xmm0 + %0 = load <4 x i32>* undef, align 16 + %1 = load <4 x i32>* %a0, align 16 + %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> + ret <4 x i32> %2 +} + diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll deleted file mode 100644 index 829fedf..0000000 --- a/test/CodeGen/X86/vec_shuffle-4.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t -; RUN: grep shuf %t | count 2 -; RUN: not grep unpck %t - -define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) { - %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1] - %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1] - %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp11, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll deleted file mode 100644 index c24167a..0000000 --- a/test/CodeGen/X86/vec_shuffle-5.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movhlps %t | count 1 -; RUN: grep shufps %t | count 1 - -define void @test() nounwind { - %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp4, <4 x float>* null - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll deleted file mode 100644 index 28fd59b..0000000 --- a/test/CodeGen/X86/vec_shuffle-6.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep movapd %t | count 1 -; RUN: grep movaps %t | count 1 -; RUN: grep movups %t | count 2 - -target triple = "i686-apple-darwin" -@x = external global [4 x i32] - -define <2 x i64> @test1() { - %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] - %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] - %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] - %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] - %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] - %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] - %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] - %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] - %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] - ret <2 x i64> %tmp16 -} - -define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <4 x float> @test3(float %a, float %b, float %c, float %d) { - %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] - %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] - %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] - ret <4 x float> %tmp13 -} - -define <2 x double> @test4(double %a, double %b) { - %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] - %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] - ret <2 x double> %tmp7 -} diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll deleted file mode 100644 index 64bd6a3..0000000 --- a/test/CodeGen/X86/vec_shuffle-7.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t -; RUN: grep pxor %t | count 1 -; RUN: not grep shufps %t - -define void @test() { - bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] - shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] - store <4 x float> %2, <4 x float>* null - unreachable -} - diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll deleted file mode 100644 index 964ce7b..0000000 --- a/test/CodeGen/X86/vec_shuffle-8.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | \ -; RUN: not grep shufps - -define void @test(<4 x float>* %res, <4 x float>* %A) { - %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1] - %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] - store <4 x float> %tmp2, <4 x float>* %res - ret void -} - diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll deleted file mode 100644 index 0719586..0000000 --- a/test/CodeGen/X86/vec_shuffle-9.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s - -define <4 x i32> @test(i8** %ptr) { -; CHECK: pxor -; CHECK: punpcklbw -; CHECK: punpcklwd - - %tmp = load i8** %ptr ; <i8*> [#uses=1] - %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] - %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] - %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] - %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] - %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] - %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] - %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] - %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] - %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] - %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] - %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp36 -} diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 25dde57..463f522 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -3,7 +3,8 @@ ; widening shuffle v3float and then a add define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 @@ -15,7 +16,8 @@ entry: ; widening shuffle v3float with a different mask and then a add define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf2: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> %val = fadd <3 x float> %x, %src2 @@ -26,7 +28,7 @@ entry: ; Example of when widening a v3float operation causes the DAG to replace a node ; with the operation that we are currently widening, i.e. when replacing ; opA with opB, the DAG will produce new operations with opA. -define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5> diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll new file mode 100644 index 0000000..27d3358 --- /dev/null +++ b/test/CodeGen/X86/win_chkstk.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32 +; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 +; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64 +; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX + +; Windows and mingw require a prologue helper routine if more than 4096 bytes area +; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca +; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer. +; The 64-bit version of __chkstk is only responsible for probing the stack. The 64-bit +; prologue is responsible for adjusting the stack pointer. + +; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI. +define i32 @main4k() nounwind { +entry: +; WIN_X32: call __chkstk +; WIN_X64: call __chkstk +; MINGW_X32: call __alloca +; MINGW_X64: call _alloca +; LINUX-NOT: call __chkstk + %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0] + ret i32 0 +} + +; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack +; allocation. +define i32 @main128() nounwind { +entry: +; WIN_X32: # BB#0: +; WIN_X32-NOT: call __chkstk +; WIN_X32: ret + +; WIN_X64: # BB#0: +; WIN_X64-NOT: call __chkstk +; WIN_X64: ret + +; MINGW_X64: # BB#0: +; MINGW_X64-NOT: call _alloca +; MINGW_X64: ret + +; LINUX: # BB#0: +; LINUX-NOT: call __chkstk +; LINUX: ret + %array128 = alloca [128 x i8], align 16 ; <[128 x i8]*> [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll index 3e3bb95..4470074 100644 --- a/test/CodeGen/X86/zero-remat.ll +++ b/test/CodeGen/X86/zero-remat.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 -; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 declare void @bar(double %x) diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll new file mode 100644 index 0000000..87a4a89 --- /dev/null +++ b/test/DebugInfo/2010-07-19-Crash.ll @@ -0,0 +1,24 @@ +; RUN: llc -o /dev/null < %s +; PR7662 +; Do not add variables to !11 because it is a declaration entry. + +define i32 @bar() nounwind readnone ssp { +entry: + ret i32 42, !dbg !9 +} + +!llvm.dbg.sp = !{!0, !6, !11} +!llvm.dbg.lv.foo = !{!7} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ] +!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ] +!9 = metadata !{i32 4, i32 3, metadata !10, null} +!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ] diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/DebugInfo/2010-08-04-StackVariable.ll new file mode 100644 index 0000000..61cd20b --- /dev/null +++ b/test/DebugInfo/2010-08-04-StackVariable.ll @@ -0,0 +1,124 @@ +; RUN: llc -O0 < %s | grep DW_OP_fbreg +; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. + +%struct.SVal = type { i8*, i32 } + +define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24 + call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24 + %0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb1, !dbg !27 + +bb: ; preds = %entry + %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1] + %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1] + %3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1] + br label %bb2, !dbg !29 + +bb1: ; preds = %entry + %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1] + %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1] + %6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1] + br label %bb2, !dbg !30 + +bb2: ; preds = %bb1, %bb + %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ] ; <i32> [#uses=1] + br label %return, !dbg !29 + +return: ; preds = %bb2 + ret i32 %.0, !dbg !29 +} + +define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 { +entry: + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34 + %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1] + store i8* null, i8** %0, align 8, !dbg !34 + %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1] + store i32 0, i32* %1, align 8, !dbg !34 + br label %return, !dbg !34 + +return: ; preds = %entry + ret void, !dbg !35 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +define i32 @main() nounwind ssp { +entry: + %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3] + %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41 + call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41 + %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1] + store i32 1, i32* %1, align 8, !dbg !42 + %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1] + %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1] + store i8* %4, i8** %2, align 8, !dbg !43 + %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1] + %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1] + store i32 %7, i32* %5, align 8, !dbg !43 + %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43 + br label %return, !dbg !45 + +return: ; preds = %entry + ret i32 0, !dbg !45 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !9, !16, !17, !20} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] +!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} +!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] +!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] +!11 = metadata !{null, metadata !12, metadata !13} +!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] +!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] +!15 = metadata !{null, metadata !12} +!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] +!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] +!19 = metadata !{metadata !13, metadata !13, metadata !1} +!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] +!22 = metadata !{metadata !13} +!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 16, i32 0, metadata !17, null} +!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ] +!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] +!27 = metadata !{i32 17, i32 0, metadata !28, null} +!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 18, i32 0, metadata !28, null} +!30 = metadata !{i32 20, i32 0, metadata !28, null} +!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ] +!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ] +!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 11, i32 0, metadata !16, null} +!35 = metadata !{i32 11, i32 0, metadata !36, null} +!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ] +!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 24, i32 0, metadata !39, null} +!42 = metadata !{i32 25, i32 0, metadata !39, null} +!43 = metadata !{i32 26, i32 0, metadata !39, null} +!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ] +!45 = metadata !{i32 27, i32 0, metadata !39, null} diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll index e19395b..3193791 100644 --- a/test/DebugInfo/printdbginfo2.ll +++ b/test/DebugInfo/printdbginfo2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-dbginfo -disable-output | FileCheck %s +; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s ; grep {%b is variable b of type x declared at x.c:7} %t1 ; grep {%2 is variable b of type x declared at x.c:7} %t1 ; grep {@c.1442 is variable c of type int declared at x.c:4} %t1 diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll index 02a79f8..0c6bcd9 100644 --- a/test/Feature/NamedMDNode.ll +++ b/test/Feature/NamedMDNode.ll @@ -3,7 +3,7 @@ ;; Simple NamedMDNode !0 = metadata !{i32 42} !1 = metadata !{metadata !"foo"} -!llvm.stuff = !{!0, !1, null} +!llvm.stuff = !{!0, !1} !samename = !{!0, !1} declare void @samename() diff --git a/test/Feature/linker_private_linkages.ll b/test/Feature/linker_private_linkages.ll index 19bcbb4..f9f2908 100644 --- a/test/Feature/linker_private_linkages.ll +++ b/test/Feature/linker_private_linkages.ll @@ -4,3 +4,4 @@ @foo = linker_private hidden global i32 0 @bar = linker_private_weak hidden global i32 0 +@qux = linker_private_weak_def_auto global i32 0 diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll index d43815b..9856b37 100644 --- a/test/Feature/metadata.ll +++ b/test/Feature/metadata.ll @@ -1,9 +1,11 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis ; PR7105 -define void @foo() { +define void @foo(i32 %x) { call void @llvm.zonk(metadata !1, i64 0, metadata !1) - ret void + store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"} + store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2} + ret void, !whatever !{i32 %x} } declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone diff --git a/test/Feature/unions.ll b/test/Feature/unions.ll deleted file mode 100644 index 3cf8c3c..0000000 --- a/test/Feature/unions.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis > %t1.ll -; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll -; RUN: diff %t1.ll %t2.ll - -%union.anon = type union { i8, i32, float } - -@union1 = constant union { i32, i8 } { i32 4 } -@union2 = constant union { i32, i8 } insertvalue(union { i32, i8 } undef, i32 4, 0) -@union3 = common global %union.anon zeroinitializer, align 8 - -define void @"Unions" () { - ret void -} - diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp index a8eda77..e1cc81f 100644 --- a/test/FrontendC++/2009-07-15-LineNumbers.cpp +++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp @@ -1,7 +1,7 @@ // This is a regression test on debug info to make sure that we can // print line numbers in asm. // RUN: %llvmgcc -S -O0 -g %s -o - | \ -// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep { 2009-07-15-LineNumbers.cpp:25$} +// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$} #include <stdlib.h> diff --git a/test/FrontendC++/2010-07-19-nowarn.cpp b/test/FrontendC++/2010-07-19-nowarn.cpp new file mode 100644 index 0000000..8742bf1 --- /dev/null +++ b/test/FrontendC++/2010-07-19-nowarn.cpp @@ -0,0 +1,21 @@ +// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null +// This should not warn about unreferenced label. 8195660. +// XFAIL: * +// XTARGET: x86,i386,i686 + +void quarterAsm(int array[], int len) +{ + __asm + { + mov esi, array; + mov ecx, len; + shr ecx, 2; +loop: + movdqa xmm0, [esi]; + psrad xmm0, 2; + movdqa [esi], xmm0; + add esi, 16; + sub ecx, 1; + jnz loop; + } +} diff --git a/test/FrontendC++/2010-07-23-DeclLoc.cpp b/test/FrontendC++/2010-07-23-DeclLoc.cpp new file mode 100644 index 0000000..c72de3b --- /dev/null +++ b/test/FrontendC++/2010-07-23-DeclLoc.cpp @@ -0,0 +1,86 @@ +// RUN: %llvmgxx -emit-llvm -S -g %s -o - | FileCheck %s +// Require the template function declaration refer to the correct filename. +// First, locate the function decl in metadata, and pluck out the file handle: +// CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]], +// Second: Require that filehandle refer to the correct filename: +// CHECK: {{^!}}[[filehandle]] = metadata {{![{].*}} metadata !"decl_should_be_here.hpp", +typedef long unsigned int __darwin_size_t; +typedef __darwin_size_t size_t; +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +namespace std { + template<typename _Tp> class auto_ptr { + _Tp* _M_ptr; + public: + typedef _Tp element_type; + auto_ptr(element_type* __p = 0) throw() : _M_ptr(__p) { } + element_type& operator*() const throw() { } + }; +} +class Pointer32 { +public: + typedef uint32_t ptr_t; + typedef uint32_t size_t; +}; +class Pointer64 { +public: + typedef uint64_t ptr_t; + typedef uint64_t size_t; +}; +class BigEndian {}; +class LittleEndian {}; +template <typename _SIZE, typename _ENDIANNESS> class SizeAndEndianness { +public: + typedef _SIZE SIZE; +}; +typedef SizeAndEndianness<Pointer32, LittleEndian> ISA32Little; +typedef SizeAndEndianness<Pointer32, BigEndian> ISA32Big; +typedef SizeAndEndianness<Pointer64, LittleEndian> ISA64Little; +typedef SizeAndEndianness<Pointer64, BigEndian> ISA64Big; +template <typename SIZE> class TRange { +protected: + typename SIZE::ptr_t _location; + typename SIZE::size_t _length; + TRange(typename SIZE::ptr_t location, typename SIZE::size_t length) : _location(location), _length(length) { } +}; +template <typename SIZE, typename T> class TRangeValue : public TRange<SIZE> { + T _value; +public: + TRangeValue(typename SIZE::ptr_t location, typename SIZE::size_t length, T value) : TRange<SIZE>(location, length), _value(value) {}; +}; +template <typename SIZE> class TAddressRelocator {}; +class CSCppSymbolOwner{}; +class CSCppSymbolOwnerData{}; +template <typename SIZE> class TRawSymbolOwnerData +{ + TRangeValue< SIZE, uint8_t* > _TEXT_text_section; + const char* _dsym_path; + uint32_t _dylib_current_version; + uint32_t _dylib_compatibility_version; +public: + TRawSymbolOwnerData() : + _TEXT_text_section(0, 0, __null), _dsym_path(__null), _dylib_current_version(0), _dylib_compatibility_version(0) {} +}; +template <typename SIZE_AND_ENDIANNESS> class TExtendedMachOHeader {}; +# 16 "decl_should_be_here.hpp" +template <typename SIZE_AND_ENDIANNESS> void extract_dwarf_data_from_header(TExtendedMachOHeader<SIZE_AND_ENDIANNESS>& header, + TRawSymbolOwnerData<typename SIZE_AND_ENDIANNESS::SIZE>& symbol_owner_data, + TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>* address_relocator) {} +struct CSCppSymbolOwnerHashFunctor { + size_t operator()(const CSCppSymbolOwner& symbol_owner) const { +# 97 "wrong_place_for_decl.cpp" + } +}; +template <typename SIZE_AND_ENDIANNESS> CSCppSymbolOwnerData* create_symbol_owner_data_arch_specific(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + typedef typename SIZE_AND_ENDIANNESS::SIZE SIZE; + std::auto_ptr< TRawSymbolOwnerData<SIZE> > data(new TRawSymbolOwnerData<SIZE>()); + std::auto_ptr< TExtendedMachOHeader<SIZE_AND_ENDIANNESS> > header; + extract_dwarf_data_from_header(*header, *data, (TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>*)__null); +} +CSCppSymbolOwnerData* create_symbol_owner_data2(CSCppSymbolOwner* symbol_owner, const char* dsym_path) { + create_symbol_owner_data_arch_specific< ISA32Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA32Big >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Little >(symbol_owner, dsym_path); + create_symbol_owner_data_arch_specific< ISA64Big >(symbol_owner, dsym_path); +} diff --git a/test/FrontendC++/2010-08-31-ByValArg.cpp b/test/FrontendC++/2010-08-31-ByValArg.cpp new file mode 100644 index 0000000..be0d354 --- /dev/null +++ b/test/FrontendC++/2010-08-31-ByValArg.cpp @@ -0,0 +1,53 @@ +// This regression test checks byval arguments' debug info. +// Radar 8367011 +// RUN: %llvmgcc -S -O0 -g %s -o - | \ +// RUN: llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic +// RUN: %compile_c %t.s -o %t.o +// RUN: %link %t.o -o %t.exe +// RUN: echo {break get\nrun\np missing_arg.b} > %t.in +// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ +// RUN: grep {1 = 4242} + +// XTARGET: x86_64-apple-darwin + +class EVT { +public: + int a; + int b; + int c; +}; + +class VAL { +public: + int x; + int y; +}; +void foo(EVT e); +EVT bar(); + +void get(int *i, unsigned dl, VAL v, VAL *p, unsigned n, EVT missing_arg) { +//CHECK: .ascii "missing_arg" + EVT e = bar(); + if (dl == n) + foo(missing_arg); +} + + +EVT bar() { + EVT e; + return e; +} + +void foo(EVT e) {} + +int main(){ + VAL v; + EVT ma; + ma.a = 1; + ma.b = 4242; + ma.c = 3; + int i = 42; + get (&i, 1, v, &v, 2, ma); + return 0; +} + diff --git a/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/test/FrontendC/2008-03-24-BitField-And-Alloca.c index 291f036..641bcf1 100644 --- a/test/FrontendC/2008-03-24-BitField-And-Alloca.c +++ b/test/FrontendC/2008-03-24-BitField-And-Alloca.c @@ -1,5 +1,5 @@ // RUN: %llvmgcc -O2 -S %s -o - | not grep alloca -// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep store +// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep {store } enum { PP_C, diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c index 34abbe3..12e9140 100644 --- a/test/FrontendC/2010-05-18-asmsched.c +++ b/test/FrontendC/2010-05-18-asmsched.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -c -O3 -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s +// RUN: %llvmgcc %s -c -O3 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s // r9 used to be clobbered before its value was moved to r10. 7993104. void foo(int x, int y) { @@ -14,4 +14,4 @@ void foo(int x, int y) { lr9 = x; lr10 = foo; asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10)); -}
\ No newline at end of file +} diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c index 65fbdb8..1744ba8 100644 --- a/test/FrontendC/2010-07-14-overconservative-align.c +++ b/test/FrontendC/2010-07-14-overconservative-align.c @@ -1,4 +1,4 @@ -// RUN: %llvmgcc %s -emit-llvm -m64 -S -o - | FileCheck %s +// RUN: %llvmgcc %s -emit-llvm -S -o - | FileCheck %s // PR 5995 struct s { int word; @@ -9,6 +9,6 @@ struct s { void func (struct s *s) { -// CHECK: load %struct.s** %s_addr, align 8 +// CHECK: load %struct.s** %s_addr, align {{[48]}} s->word = 0; } diff --git a/test/FrontendC/2010-07-14-ref-off-end.c b/test/FrontendC/2010-07-14-ref-off-end.c index 6ccd05b..c7fdd95 100644 --- a/test/FrontendC/2010-07-14-ref-off-end.c +++ b/test/FrontendC/2010-07-14-ref-off-end.c @@ -17,8 +17,8 @@ return(char)s->c; } main() { -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] -// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2] +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 +// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 struct T t; t.i=0xff; t.c=0xffff11; diff --git a/test/FrontendC/2010-07-27-MinNoFoldConst.c b/test/FrontendC/2010-07-27-MinNoFoldConst.c new file mode 100644 index 0000000..7cd8b4c --- /dev/null +++ b/test/FrontendC/2010-07-27-MinNoFoldConst.c @@ -0,0 +1,18 @@ +// RUN: %llvmgcc -S %s -o - | FileCheck %s +extern int printf(const char *, ...); +static void bad(unsigned int v1, unsigned int v2) { + printf("%u\n", 1631381461u * (((v2 - 1273463329u <= v1 - 1273463329u) ? v2 : v1) - 1273463329u) + 121322179u); +} +// Radar 8198362 +// GCC FE wants to convert the above to +// 1631381461u * MIN(v2 - 1273463329u, v1 - 1273463329u) +// and then to +// MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419) +// +// 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips +// this to 4047041419. This breaks the comparision implicit in the MIN(). +// Two multiply operations suggests the bad optimization is happening; +// one multiplication, after the MIN(), is correct. +// CHECK: mul +// CHECK-NOT: mul +// CHECK: ret diff --git a/test/FrontendC/2010-08-12-asm-aggr-arg.c b/test/FrontendC/2010-08-12-asm-aggr-arg.c new file mode 100644 index 0000000..81ec14b --- /dev/null +++ b/test/FrontendC/2010-08-12-asm-aggr-arg.c @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s +// Radar 8288710: A small aggregate can be passed as an integer. Make sure +// we don't get an error with "input constraint with a matching output +// constraint of incompatible type!" + +struct wrapper { + int i; +}; + +// CHECK: xyz +int test(int i) { + struct wrapper w; + w.i = i; + __asm__("xyz" : "=r" (w) : "0" (w)); + return w.i; +} diff --git a/test/FrontendC/asm-reg-var-local.c b/test/FrontendC/asm-reg-var-local.c new file mode 100644 index 0000000..22bd43c --- /dev/null +++ b/test/FrontendC/asm-reg-var-local.c @@ -0,0 +1,32 @@ +// RUN: %llvmgcc %s -S -o - | FileCheck %s +// Exercise various use cases for local asm "register variables". +// XFAIL: * +// XTARGET: x86_64,i686,i386 + +int foo() { +// CHECK: %a = alloca i32 + + register int a asm("rsi")=5; +// CHECK: store i32 5, i32* %a, align 4 + + asm volatile("; %0 This asm defines rsi" : "=r"(a)); +// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi} +// CHECK: store i32 %asmtmp, i32* %a + + a = 42; +// CHECK: store i32 42, i32* %a, align 4 + + asm volatile("; %0 This asm uses rsi" : : "r"(a)); +// CHECK: %1 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %1) nounwind +// CHECK: %2 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2) + + return a; +// CHECK: %3 = load i32* %a, align 4 +// CHECK: call void asm sideeffect "", "{rsi}"(i32 %3) nounwind +// CHECK: %4 = call i32 asm sideeffect "", "={rsi}"() nounwind +// CHECK: store i32 %4, i32* %0, align 4 +// CHECK: %5 = load i32* %0, align 4 +// CHECK: store i32 %5, i32* %retval, align 4 +} diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c index b9ec281..764126e 100644 --- a/test/FrontendC/cstring-align.c +++ b/test/FrontendC/cstring-align.c @@ -1,6 +1,4 @@ -// RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32 -// RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64 -// XTARGET: darwin +// RUN: %llvmgcc %s -c -Os -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s extern void func(const char *, const char *); @@ -8,10 +6,6 @@ void long_function_name() { func("%s: the function name", __func__); } -// DARWIN64: .align 4 -// DARWIN64: ___func__. -// DARWIN64: .asciz "long_function_name" - -// DARWIN32: .align 4 -// DARWIN32: ___func__. -// DARWIN32: .asciz "long_function_name" +// CHECK: .align 4 +// CHECK: ___func__. +// CHECK: .asciz "long_function_name" diff --git a/test/FrontendC/misaligned-param.c b/test/FrontendC/misaligned-param.c new file mode 100644 index 0000000..b4fcfe3 --- /dev/null +++ b/test/FrontendC/misaligned-param.c @@ -0,0 +1,15 @@ +// RUN: %llvmgcc %s -m32 -S -o - | FileCheck %s +// Misaligned parameter must be memcpy'd to correctly aligned temporary. +// XFAIL: * +// XTARGET: i386-apple-darwin,i686-apple-darwin,x86_64-apple-darwin + +struct s { int x; long double y; }; +long double foo(struct s x, int i, struct s y) { +// CHECK: foo +// CHECK: %x_addr = alloca %struct.s, align 16 +// CHECK: %y_addr = alloca %struct.s, align 16 +// CHECK: memcpy +// CHECK: memcpy +// CHECK: bar + return bar(&x, &y); +} diff --git a/test/FrontendC/vla-1.c b/test/FrontendC/vla-1.c index 76f6c53..77f78a5 100644 --- a/test/FrontendC/vla-1.c +++ b/test/FrontendC/vla-1.c @@ -1,5 +1,6 @@ -// RUN: true -// %llvmgcc -std=gnu99 %s -S |& grep {error: "is greater than the stack alignment" } +// RUN: %llvmgcc_only -std=gnu99 %s -S |& grep {warning: alignment for} +// ppc does not support this feature, and gets a fatal error at runtime. +// XFAIL: powerpc int foo(int a) { diff --git a/test/FrontendC/vla-2.c b/test/FrontendC/vla-2.c new file mode 100644 index 0000000..555cfc7 --- /dev/null +++ b/test/FrontendC/vla-2.c @@ -0,0 +1,10 @@ +// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16" + +extern void bar(int[]); + +void foo(int a) +{ + int var[a] __attribute__((__aligned__(16))); + bar(var); + return; +} diff --git a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm new file mode 100644 index 0000000..298844e --- /dev/null +++ b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm @@ -0,0 +1,27 @@ +// RUN: not %llvmgcc %s -S -emit-llvm -o - |& FileCheck %s +// This tests for a specific diagnostic in LLVM-GCC. +// Clang compiles this correctly with no diagnostic, +// ergo this test will fail with a Clang-based front-end. +class TFENodeVector { +public: + TFENodeVector(const TFENodeVector& inNodeVector); + TFENodeVector(); +}; + +@interface TWindowHistoryEntry {} +@property (assign, nonatomic) TFENodeVector targetPath; +@end + +@implementation TWindowHistoryEntry +@synthesize targetPath; +- (void) initWithWindowController { + TWindowHistoryEntry* entry; + TFENodeVector newPath; + // CHECK: setting a C++ non-POD object value is not implemented +#ifdef __clang__ +#error setting a C++ non-POD object value is not implemented +#endif + entry.targetPath = newPath; + [entry setTargetPath:newPath]; +} +@end diff --git a/test/FrontendObjC++/2010-08-04-Template.mm b/test/FrontendObjC++/2010-08-04-Template.mm new file mode 100644 index 0000000..d038340 --- /dev/null +++ b/test/FrontendObjC++/2010-08-04-Template.mm @@ -0,0 +1,10 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TRunSoon { + template <class P1> static void Post() {} +}; + +@implementation TPrivsTableViewMainController +- (void) applyToEnclosed { + TRunSoon::Post<int>(); +} +@end diff --git a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm new file mode 100644 index 0000000..b33d730 --- /dev/null +++ b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm @@ -0,0 +1,16 @@ +// RUN: %llvmgcc %s -S -emit-llvm +struct TFENode { + TFENode(const TFENode& inNode); +}; + +@interface TIconViewController +- (const TFENode&) target; +@end + +void sortAllChildrenForNode(const TFENode&node); + +@implementation TIconViewController +- (void) setArrangeBy { + sortAllChildrenForNode(self.target); +} +@end diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m index 2c72e95..8ed7c24 100644 --- a/test/FrontendObjC/2009-08-17-DebugInfo.m +++ b/test/FrontendObjC/2009-08-17-DebugInfo.m @@ -5,7 +5,7 @@ // RUN: %link %t.o -o %t.exe -framework Foundation // RUN: echo {break randomFunc\n} > %t.in // RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \ -// RUN: grep {Breakpoint 1 at 0x.*: file 2009-08-17-DebugInfo.m, line 21} +// RUN: grep {Breakpoint 1 at 0x.*: file .*2009-08-17-DebugInfo.m, line 21} // XTARGET: darwin @interface MyClass { diff --git a/test/Integer/a15.ll b/test/Integer/a15.ll deleted file mode 100644 index 5c9dc3b..0000000 --- a/test/Integer/a15.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 15 bits -; -@b = constant i15 add(i15 32767, i15 1) -@c = constant i15 add(i15 32767, i15 32767) -@d = constant i15 add(i15 32760, i15 8) -@e = constant i15 sub(i15 0 , i15 1) -@f = constant i15 sub(i15 0 , i15 32767) -@g = constant i15 sub(i15 2 , i15 32767) - -@h = constant i15 shl(i15 1 , i15 15) -@i = constant i15 shl(i15 1 , i15 14) -@j = constant i15 lshr(i15 32767 , i15 14) -@l = constant i15 ashr(i15 32767 , i15 14) - -@n = constant i15 mul(i15 32767, i15 2) -@q = constant i15 mul(i15 -16383,i15 -3) -@r = constant i15 sdiv(i15 -1, i15 16383) -@s = constant i15 udiv(i15 -1, i15 16383) -@t = constant i15 srem(i15 1, i15 32766) -@u = constant i15 urem(i15 32767,i15 -1) -@o = constant i15 trunc( i16 32768 to i15 ) -@p = constant i15 trunc( i16 32767 to i15 ) -@v = constant i15 srem(i15 -1, i15 768) - diff --git a/test/Integer/a15.ll.out b/test/Integer/a15.ll.out deleted file mode 100644 index 5195cdf..0000000 --- a/test/Integer/a15.ll.out +++ /dev/null @@ -1,21 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i15 0 ; <i15*> [#uses=0] -@c = constant i15 -2 ; <i15*> [#uses=0] -@d = constant i15 0 ; <i15*> [#uses=0] -@e = constant i15 -1 ; <i15*> [#uses=0] -@f = constant i15 1 ; <i15*> [#uses=0] -@g = constant i15 3 ; <i15*> [#uses=0] -@h = constant i15 undef ; <i15*> [#uses=0] -@i = constant i15 -16384 ; <i15*> [#uses=0] -@j = constant i15 1 ; <i15*> [#uses=0] -@l = constant i15 -1 ; <i15*> [#uses=0] -@n = constant i15 -2 ; <i15*> [#uses=0] -@q = constant i15 16381 ; <i15*> [#uses=0] -@r = constant i15 0 ; <i15*> [#uses=0] -@s = constant i15 2 ; <i15*> [#uses=0] -@t = constant i15 1 ; <i15*> [#uses=0] -@u = constant i15 0 ; <i15*> [#uses=0] -@o = constant i15 0 ; <i15*> [#uses=0] -@p = constant i15 -1 ; <i15*> [#uses=0] -@v = constant i15 -1 ; <i15*> [#uses=0] diff --git a/test/Integer/a17.ll b/test/Integer/a17.ll deleted file mode 100644 index db03e7c..0000000 --- a/test/Integer/a17.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 17 bits -; -@b = constant i17 add(i17 131071, i17 1) -@c = constant i17 add(i17 131071, i17 131071) -@d = constant i17 add(i17 131064, i17 8) -@e = constant i17 sub(i17 0 , i17 1) -@f = constant i17 sub(i17 0 , i17 131071) -@g = constant i17 sub(i17 2 , i17 131071) - -@h = constant i17 shl(i17 1 , i17 17) -@i = constant i17 shl(i17 1 , i17 16) -@j = constant i17 lshr(i17 131071 , i17 16) -@l = constant i17 ashr(i17 131071 , i17 16) - -@n = constant i17 mul(i17 131071, i17 2) -@q = constant i17 sdiv(i17 -1, i17 65535) -@r = constant i17 udiv(i17 -1, i17 65535) -@s = constant i17 srem(i17 1, i17 131070) -@t = constant i17 urem(i17 131071,i17 -1) -@o = constant i17 trunc( i18 131072 to i17 ) -@p = constant i17 trunc( i18 131071 to i17 ) -@v = constant i17 srem(i17 -1, i17 15) diff --git a/test/Integer/a17.ll.out b/test/Integer/a17.ll.out deleted file mode 100644 index ba66412..0000000 --- a/test/Integer/a17.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i17 0 ; <i17*> [#uses=0] -@c = constant i17 -2 ; <i17*> [#uses=0] -@d = constant i17 0 ; <i17*> [#uses=0] -@e = constant i17 -1 ; <i17*> [#uses=0] -@f = constant i17 1 ; <i17*> [#uses=0] -@g = constant i17 3 ; <i17*> [#uses=0] -@h = constant i17 undef ; <i17*> [#uses=0] -@i = constant i17 -65536 ; <i17*> [#uses=0] -@j = constant i17 1 ; <i17*> [#uses=0] -@l = constant i17 -1 ; <i17*> [#uses=0] -@n = constant i17 -2 ; <i17*> [#uses=0] -@q = constant i17 0 ; <i17*> [#uses=0] -@r = constant i17 2 ; <i17*> [#uses=0] -@s = constant i17 1 ; <i17*> [#uses=0] -@t = constant i17 0 ; <i17*> [#uses=0] -@o = constant i17 0 ; <i17*> [#uses=0] -@p = constant i17 -1 ; <i17*> [#uses=0] -@v = constant i17 -1 ; <i17*> [#uses=0] diff --git a/test/Integer/a31.ll b/test/Integer/a31.ll deleted file mode 100644 index c0c571f..0000000 --- a/test/Integer/a31.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 31 bits -; -@b = constant i31 add(i31 2147483647, i31 1) -@c = constant i31 add(i31 2147483647, i31 2147483647) -@d = constant i31 add(i31 2147483640, i31 8) -@e = constant i31 sub(i31 0 , i31 1) -@f = constant i31 sub(i31 0 , i31 2147483647) -@g = constant i31 sub(i31 2 , i31 2147483647) - -@h = constant i31 shl(i31 1 , i31 31) -@i = constant i31 shl(i31 1 , i31 30) -@j = constant i31 lshr(i31 2147483647 , i31 30) -@l = constant i31 ashr(i31 2147483647 , i31 30) - -@n = constant i31 mul(i31 2147483647, i31 2) -@q = constant i31 sdiv(i31 -1, i31 1073741823) -@r = constant i31 udiv(i31 -1, i31 1073741823) -@s = constant i31 srem(i31 1, i31 2147483646) -@t = constant i31 urem(i31 2147483647,i31 -1) -@o = constant i31 trunc( i32 2147483648 to i31 ) -@p = constant i31 trunc( i32 2147483647 to i31 ) -@u = constant i31 srem(i31 -3, i31 17) diff --git a/test/Integer/a31.ll.out b/test/Integer/a31.ll.out deleted file mode 100644 index 7407a74..0000000 --- a/test/Integer/a31.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i31 0 ; <i31*> [#uses=0] -@c = constant i31 -2 ; <i31*> [#uses=0] -@d = constant i31 0 ; <i31*> [#uses=0] -@e = constant i31 -1 ; <i31*> [#uses=0] -@f = constant i31 1 ; <i31*> [#uses=0] -@g = constant i31 3 ; <i31*> [#uses=0] -@h = constant i31 undef ; <i31*> [#uses=0] -@i = constant i31 -1073741824 ; <i31*> [#uses=0] -@j = constant i31 1 ; <i31*> [#uses=0] -@l = constant i31 -1 ; <i31*> [#uses=0] -@n = constant i31 -2 ; <i31*> [#uses=0] -@q = constant i31 0 ; <i31*> [#uses=0] -@r = constant i31 2 ; <i31*> [#uses=0] -@s = constant i31 1 ; <i31*> [#uses=0] -@t = constant i31 0 ; <i31*> [#uses=0] -@o = constant i31 0 ; <i31*> [#uses=0] -@p = constant i31 -1 ; <i31*> [#uses=0] -@u = constant i31 -3 ; <i31*> [#uses=0] diff --git a/test/Integer/a33.ll b/test/Integer/a33.ll deleted file mode 100644 index f328907..0000000 --- a/test/Integer/a33.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 33 bits -; -@b = constant i33 add(i33 8589934591, i33 1) -@c = constant i33 add(i33 8589934591, i33 8589934591) -@d = constant i33 add(i33 8589934584, i33 8) -@e = constant i33 sub(i33 0 , i33 1) -@f = constant i33 sub(i33 0 , i33 8589934591) -@g = constant i33 sub(i33 2 , i33 8589934591) - -@h = constant i33 shl(i33 1 , i33 33) -@i = constant i33 shl(i33 1 , i33 32) -@j = constant i33 lshr(i33 8589934591 , i33 32) -@l = constant i33 ashr(i33 8589934591 , i33 32) - -@n = constant i33 mul(i33 8589934591, i33 2) -@q = constant i33 sdiv(i33 -1, i33 4294967295) -@r = constant i33 udiv(i33 -1, i33 4294967295) -@s = constant i33 srem(i33 1, i33 8589934590) -@t = constant i33 urem(i33 8589934591,i33 -1) -@o = constant i33 trunc( i34 8589934592 to i33 ) -@p = constant i33 trunc( i34 8589934591 to i33 ) -@u = constant i33 srem(i33 -1, i33 17) - diff --git a/test/Integer/a33.ll.out b/test/Integer/a33.ll.out deleted file mode 100644 index 6cd61ee..0000000 --- a/test/Integer/a33.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i33 0 ; <i33*> [#uses=0] -@c = constant i33 -2 ; <i33*> [#uses=0] -@d = constant i33 0 ; <i33*> [#uses=0] -@e = constant i33 -1 ; <i33*> [#uses=0] -@f = constant i33 1 ; <i33*> [#uses=0] -@g = constant i33 3 ; <i33*> [#uses=0] -@h = constant i33 undef ; <i33*> [#uses=0] -@i = constant i33 -4294967296 ; <i33*> [#uses=0] -@j = constant i33 1 ; <i33*> [#uses=0] -@l = constant i33 -1 ; <i33*> [#uses=0] -@n = constant i33 -2 ; <i33*> [#uses=0] -@q = constant i33 0 ; <i33*> [#uses=0] -@r = constant i33 2 ; <i33*> [#uses=0] -@s = constant i33 1 ; <i33*> [#uses=0] -@t = constant i33 0 ; <i33*> [#uses=0] -@o = constant i33 0 ; <i33*> [#uses=0] -@p = constant i33 -1 ; <i33*> [#uses=0] -@u = constant i33 -1 ; <i33*> [#uses=0] diff --git a/test/Integer/a63.ll b/test/Integer/a63.ll deleted file mode 100644 index 052ecd5..0000000 --- a/test/Integer/a63.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 63 bits -; -@b = constant i63 add(i63 9223372036854775807, i63 1) -@c = constant i63 add(i63 9223372036854775807, i63 9223372036854775807) -@d = constant i63 add(i63 9223372036854775800, i63 8) -@e = constant i63 sub(i63 0 , i63 1) -@f = constant i63 sub(i63 0 , i63 9223372036854775807) -@g = constant i63 sub(i63 2 , i63 9223372036854775807) - -@h = constant i63 shl(i63 1 , i63 63) -@i = constant i63 shl(i63 1 , i63 62) -@j = constant i63 lshr(i63 9223372036854775807 , i63 62) -@l = constant i63 ashr(i63 9223372036854775807 , i63 62) - -@n = constant i63 mul(i63 9223372036854775807, i63 2) -@q = constant i63 sdiv(i63 -1, i63 4611686018427387903) -@u = constant i63 sdiv(i63 -1, i63 1) -@r = constant i63 udiv(i63 -1, i63 4611686018427387903) -@s = constant i63 srem(i63 3, i63 9223372036854775806) -@t = constant i63 urem(i63 9223372036854775807,i63 -1) -@o = constant i63 trunc( i64 9223372036854775808 to i63 ) -@p = constant i63 trunc( i64 9223372036854775807 to i63 ) diff --git a/test/Integer/a63.ll.out b/test/Integer/a63.ll.out deleted file mode 100644 index 18dff5a..0000000 --- a/test/Integer/a63.ll.out +++ /dev/null @@ -1,20 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i63 0 ; <i63*> [#uses=0] -@c = constant i63 -2 ; <i63*> [#uses=0] -@d = constant i63 0 ; <i63*> [#uses=0] -@e = constant i63 -1 ; <i63*> [#uses=0] -@f = constant i63 1 ; <i63*> [#uses=0] -@g = constant i63 3 ; <i63*> [#uses=0] -@h = constant i63 undef ; <i63*> [#uses=0] -@i = constant i63 -4611686018427387904 ; <i63*> [#uses=0] -@j = constant i63 1 ; <i63*> [#uses=0] -@l = constant i63 -1 ; <i63*> [#uses=0] -@n = constant i63 -2 ; <i63*> [#uses=0] -@q = constant i63 0 ; <i63*> [#uses=0] -@u = constant i63 -1 ; <i63*> [#uses=0] -@r = constant i63 2 ; <i63*> [#uses=0] -@s = constant i63 1 ; <i63*> [#uses=0] -@t = constant i63 0 ; <i63*> [#uses=0] -@o = constant i63 0 ; <i63*> [#uses=0] -@p = constant i63 -1 ; <i63*> [#uses=0] diff --git a/test/Integer/a7.ll b/test/Integer/a7.ll deleted file mode 100644 index 1edb35f..0000000 --- a/test/Integer/a7.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 7 bits -; -@b = constant i7 add(i7 127, i7 1) -@q = constant i7 add(i7 -64, i7 -1) -@c = constant i7 add(i7 127, i7 127) -@d = constant i7 add(i7 120, i7 8) -@e = constant i7 sub(i7 0 , i7 1) -@f = constant i7 sub(i7 0 , i7 127) -@g = constant i7 sub(i7 2 , i7 127) -@r = constant i7 sub(i7 -3, i7 120) -@s = constant i7 sub(i7 -3, i7 -8) - -@h = constant i7 shl(i7 1 , i7 7) -@i = constant i7 shl(i7 1 , i7 6) -@j = constant i7 lshr(i7 127 , i7 6) -@l = constant i7 ashr(i7 127 , i7 6) -@m2= constant i7 ashr(i7 -1 , i7 3) - -@n = constant i7 mul(i7 127, i7 2) -@t = constant i7 mul(i7 -63, i7 -2) -@u = constant i7 mul(i7 -32, i7 2) -@v = constant i7 sdiv(i7 -1, i7 63) -@w = constant i7 udiv(i7 -1, i7 63) -@x = constant i7 srem(i7 1 , i7 126) -@y = constant i7 urem(i7 127, i7 -1) -@o = constant i7 trunc( i8 128 to i7 ) -@p = constant i7 trunc( i8 255 to i7 ) - diff --git a/test/Integer/a7.ll.out b/test/Integer/a7.ll.out deleted file mode 100644 index 250925d..0000000 --- a/test/Integer/a7.ll.out +++ /dev/null @@ -1,25 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i7 0 ; <i7*> [#uses=0] -@q = constant i7 63 ; <i7*> [#uses=0] -@c = constant i7 -2 ; <i7*> [#uses=0] -@d = constant i7 0 ; <i7*> [#uses=0] -@e = constant i7 -1 ; <i7*> [#uses=0] -@f = constant i7 1 ; <i7*> [#uses=0] -@g = constant i7 3 ; <i7*> [#uses=0] -@r = constant i7 5 ; <i7*> [#uses=0] -@s = constant i7 5 ; <i7*> [#uses=0] -@h = constant i7 undef ; <i7*> [#uses=0] -@i = constant i7 -64 ; <i7*> [#uses=0] -@j = constant i7 1 ; <i7*> [#uses=0] -@l = constant i7 -1 ; <i7*> [#uses=0] -@m2 = constant i7 -1 ; <i7*> [#uses=0] -@n = constant i7 -2 ; <i7*> [#uses=0] -@t = constant i7 -2 ; <i7*> [#uses=0] -@u = constant i7 -64 ; <i7*> [#uses=0] -@v = constant i7 0 ; <i7*> [#uses=0] -@w = constant i7 2 ; <i7*> [#uses=0] -@x = constant i7 1 ; <i7*> [#uses=0] -@y = constant i7 0 ; <i7*> [#uses=0] -@o = constant i7 0 ; <i7*> [#uses=0] -@p = constant i7 -1 ; <i7*> [#uses=0] diff --git a/test/Integer/a9.ll b/test/Integer/a9.ll deleted file mode 100644 index 711ec82..0000000 --- a/test/Integer/a9.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llvm-as %s -o - | llvm-dis > %t.ll -; RUN: diff %t.ll %s.out - -; test 9 bits -; -@b = constant i9 add(i9 511, i9 1) -@c = constant i9 add(i9 511, i9 511) -@d = constant i9 add(i9 504, i9 8) -@e = constant i9 sub(i9 0 , i9 1) -@f = constant i9 sub(i9 0 , i9 511) -@g = constant i9 sub(i9 2 , i9 511) - -@h = constant i9 shl(i9 1 , i9 9) -@i = constant i9 shl(i9 1 , i9 8) -@j = constant i9 lshr(i9 511 , i9 8) -@l = constant i9 ashr(i9 511 , i9 8) - -@n = constant i9 mul(i9 511, i9 2) -@q = constant i9 sdiv(i9 511, i9 2) -@r = constant i9 udiv(i9 511, i9 2) -@s = constant i9 urem(i9 511, i9 -1) -@t = constant i9 srem(i9 1, i9 510) -@o = constant i9 trunc( i10 512 to i9 ) -@p = constant i9 trunc( i10 511 to i9 ) - diff --git a/test/Integer/a9.ll.out b/test/Integer/a9.ll.out deleted file mode 100644 index 6e38062..0000000 --- a/test/Integer/a9.ll.out +++ /dev/null @@ -1,19 +0,0 @@ -; ModuleID = '<stdin>' - -@b = constant i9 0 ; <i9*> [#uses=0] -@c = constant i9 -2 ; <i9*> [#uses=0] -@d = constant i9 0 ; <i9*> [#uses=0] -@e = constant i9 -1 ; <i9*> [#uses=0] -@f = constant i9 1 ; <i9*> [#uses=0] -@g = constant i9 3 ; <i9*> [#uses=0] -@h = constant i9 undef ; <i9*> [#uses=0] -@i = constant i9 -256 ; <i9*> [#uses=0] -@j = constant i9 1 ; <i9*> [#uses=0] -@l = constant i9 -1 ; <i9*> [#uses=0] -@n = constant i9 -2 ; <i9*> [#uses=0] -@q = constant i9 0 ; <i9*> [#uses=0] -@r = constant i9 255 ; <i9*> [#uses=0] -@s = constant i9 0 ; <i9*> [#uses=0] -@t = constant i9 1 ; <i9*> [#uses=0] -@o = constant i9 0 ; <i9*> [#uses=0] -@p = constant i9 -1 ; <i9*> [#uses=0] diff --git a/test/LLVMC/Alias.td b/test/LLVMC/Alias.td new file mode 100644 index 0000000..5d37889 --- /dev/null +++ b/test/LLVMC/Alias.td @@ -0,0 +1,24 @@ +// Test alias generation. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ + +(switch_option "dummy1", (help "none")), +// CHECK: cl::alias Alias_dummy2 +(alias_option "dummy2", "dummy1") +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td index 254d5ea..c85f002 100644 --- a/test/LLVMC/AppendCmdHook.td +++ b/test/LLVMC/AppendCmdHook.td @@ -1,7 +1,7 @@ // Check that hooks can be invoked from 'append_cmd'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -26,4 +26,4 @@ def dummy_tool : Tool<[ (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td index e5d5e9a..a52b8a8 100644 --- a/test/LLVMC/EmptyCompilationGraph.td +++ b/test/LLVMC/EmptyCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph can be empty. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td index 86091db..ce0cb82 100644 --- a/test/LLVMC/EnvParentheses.td +++ b/test/LLVMC/EnvParentheses.td @@ -2,7 +2,7 @@ // http://llvm.org/bugs/show_bug.cgi?id=4157 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: not grep {FOO")));} %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,6 +13,6 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; def Graph : CompilationGraph<[]>; diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td deleted file mode 100644 index d84ea84..0000000 --- a/test/LLVMC/ExternOptions.td +++ /dev/null @@ -1,26 +0,0 @@ -// Check that extern options work. -// The dummy tool and graph are required to silence warnings. -// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: vg_leak - -include "llvm/CompilerDriver/Common.td" - -// CHECK: extern cl::opt<bool> AutoGeneratedSwitch_Wall - -def OptList : OptionList<[(switch_option "Wall", (extern)), - (parameter_option "std", (extern)), - (prefix_list_option "L", (extern))]>; - -def dummy_tool : Tool<[ -(command "dummy_cmd"), -(in_language "dummy"), -(out_language "dummy"), -(actions (case - (switch_on "Wall"), (stop_compilation), - (not_empty "std"), (stop_compilation), - (not_empty "L"), (stop_compilation))) -]>; - -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td index 536b96a..99b240e 100644 --- a/test/LLVMC/ForwardAs.td +++ b/test/LLVMC/ForwardAs.td @@ -2,12 +2,12 @@ // http://llvm.org/bugs/show_bug.cgi?id=4159 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "dummy", (extern))]>; +def OptList : OptionList<[(parameter_option "dummy", (help "dummmy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -18,4 +18,4 @@ def dummy_tool : Tool<[ (not_empty "dummy"), (forward_as "dummy", "unique_name"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td index 5e0bf29..9184ede 100644 --- a/test/LLVMC/ForwardTransformedValue.td +++ b/test/LLVMC/ForwardTransformedValue.td @@ -2,13 +2,13 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; // CHECK: std::string HookA // CHECK: std::string HookB @@ -18,10 +18,10 @@ def dummy_tool : Tool<[ (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: HookA(AutoGeneratedParameter_a + // CHECK: HookA(autogenerated::Parameter_a (not_empty "a"), (forward_transformed_value "a", "HookA"), - // CHECK: HookB(AutoGeneratedList_b + // CHECK: HookB(autogenerated::List_b (not_empty "b"), (forward_transformed_value "b", "HookB"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td index 4c7a0ee..a42a3f0 100644 --- a/test/LLVMC/ForwardValue.td +++ b/test/LLVMC/ForwardValue.td @@ -2,23 +2,23 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(parameter_option "a", (extern)), - (prefix_list_option "b", (extern))]>; +def OptList : OptionList<[(parameter_option "a", (help "dummy")), + (prefix_list_option "b", (help "dummy"))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), (in_language "dummy"), (out_language "dummy"), (actions (case - // CHECK: , AutoGeneratedParameter_a)); + // CHECK: , autogenerated::Parameter_a)); (not_empty "a"), (forward_value "a"), - // CHECK: B = AutoGeneratedList_b.begin() + // CHECK: B = autogenerated::List_b.begin() (not_empty "b"), (forward_value "b"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td index 5ff96cd..bbba2e9 100644 --- a/test/LLVMC/HookWithArguments.td +++ b/test/LLVMC/HookWithArguments.td @@ -1,7 +1,7 @@ // Check that hooks with arguments work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -17,4 +17,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td index 9855dbc..ed08b53 100644 --- a/test/LLVMC/HookWithInFile.td +++ b/test/LLVMC/HookWithInFile.td @@ -1,7 +1,7 @@ // Check that a hook can be given $INFILE as an argument. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td index 05209bf..c384679 100644 --- a/test/LLVMC/Init.td +++ b/test/LLVMC/Init.td @@ -1,7 +1,7 @@ // Check that (init true/false) and (init "str") work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "dummy2"), (forward "dummy2"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/LanguageMap.td b/test/LLVMC/LanguageMap.td new file mode 100644 index 0000000..a050214 --- /dev/null +++ b/test/LLVMC/LanguageMap.td @@ -0,0 +1,29 @@ +// Check that LanguageMap is processed properly. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s +// RUN: %compile_cxx %t +// XFAIL: vg_leak + +include "llvm/CompilerDriver/Common.td" + +def OptList : OptionList<[ +(switch_option "dummy1", (help "none")) +]>; + +def dummy_tool : Tool<[ +(command "dummy_cmd"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + (switch_on "dummy1"), (forward "dummy1"))) +]>; + +def lang_map : LanguageMap<[ + // CHECK: langMap["dummy"] = "dummy_lang" + // CHECK: langMap["DUM"] = "dummy_lang" + (lang_to_suffixes "dummy_lang", ["dummy", "DUM"]), + // CHECK: langMap["DUM2"] = "dummy_lang_2" + (lang_to_suffixes "dummy_lang_2", "DUM2") +]>; + +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td index 73ccb63..08c7533 100644 --- a/test/LLVMC/MultiValuedOption.td +++ b/test/LLVMC/MultiValuedOption.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -10,7 +10,7 @@ include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ // CHECK: cl::multi_val(2) (prefix_list_option "foo", (multi_val 2)), - (parameter_list_option "baz", (multi_val 2), (extern))]>; + (parameter_list_option "baz", (multi_val 2))]>; def dummy_tool : Tool<[ (command "dummy_cmd"), @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td index 86cd613..b3746c0 100644 --- a/test/LLVMC/MultipleCompilationGraphs.td +++ b/test/LLVMC/MultipleCompilationGraphs.td @@ -1,6 +1,6 @@ // Check that multiple compilation graphs are allowed. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/MultiplePluginPriorities.td b/test/LLVMC/MultiplePluginPriorities.td deleted file mode 100644 index 2fe0645..0000000 --- a/test/LLVMC/MultiplePluginPriorities.td +++ /dev/null @@ -1,17 +0,0 @@ -// Check that multiple plugin priorities are not allowed. -// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "More than one 'PluginPriority' instance found" - -// Disable for Darwin PPC: <rdar://problem/7598390> -// XFAIL: powerpc-apple-darwin - -// Generally XFAIL'ed for now, this is (sometimes?) failing on x86_64-apple-darwin10. -// RUN: false -// XFAIL: * - -include "llvm/CompilerDriver/Common.td" - -def Graph : CompilationGraph<[]>; - -def Priority1 : PluginPriority<1>; - -def Priority2 : PluginPriority<2>; diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td index a80bcfe..34b4440 100644 --- a/test/LLVMC/NoActions.td +++ b/test/LLVMC/NoActions.td @@ -1,7 +1,7 @@ // Check that tools without associated actions are accepted. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,4 +13,4 @@ def dummy_tool : Tool<[ (out_language "dummy") ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td index 69df701..4182882 100644 --- a/test/LLVMC/NoCompilationGraph.td +++ b/test/LLVMC/NoCompilationGraph.td @@ -1,6 +1,6 @@ // Check that the compilation graph is not required. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td index 37fbc87..54fa62d 100644 --- a/test/LLVMC/OneOrMore.td +++ b/test/LLVMC/OneOrMore.td @@ -2,7 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -22,4 +22,4 @@ def dummy_tool : Tool<[ (not_empty "baz"), (forward "baz"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td index c2641be..8019c42 100644 --- a/test/LLVMC/OptionPreprocessor.td +++ b/test/LLVMC/OptionPreprocessor.td @@ -1,7 +1,7 @@ // Test for the OptionPreprocessor and related functionality. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t +// RUN: %compile_cxx %t // XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -63,5 +63,5 @@ def dummy : Tool< (not_empty "foo_l"), (error))) ]>; -def Graph : CompilationGraph<[Edge<"root", "dummy">]>; +def Graph : CompilationGraph<[(edge "root", "dummy")]>; diff --git a/test/LLVMC/OutputSuffixHook.td b/test/LLVMC/OutputSuffixHook.td index 4ecad23..1f5ecd1 100644 --- a/test/LLVMC/OutputSuffixHook.td +++ b/test/LLVMC/OutputSuffixHook.td @@ -1,8 +1,8 @@ // Check that hooks can be invoked from 'output_suffix'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: FileCheck -input-file %t %s -// RUN: %compile_cxx -fexceptions -x c++ %t -// XFAIL: * +// RUN: %compile_cxx %t +// XFAIL: vg_leak include "llvm/CompilerDriver/Common.td" @@ -13,7 +13,7 @@ def OptList : OptionList<[ ]>; def dummy_tool : Tool<[ -(cmd_line "dummy_cmd $INFILE"), +(command "dummy_cmd"), (in_language "dummy_lang"), (out_language "dummy_lang"), (actions (case @@ -21,4 +21,4 @@ def dummy_tool : Tool<[ (switch_on "dummy1"), (output_suffix "$CALL(MyHook)"))) ]>; -def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; +def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>; diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td index 0388cb0..b0f57e9 100644 --- a/test/LLVMC/TestWarnings.td +++ b/test/LLVMC/TestWarnings.td @@ -5,4 +5,4 @@ include "llvm/CompilerDriver/Common.td" -def OptList : OptionList<[(switch_option "Wall", (extern))]>; +def OptList : OptionList<[(switch_option "Wall", (help "dummy"))]>; diff --git a/test/Linker/metadata-a.ll b/test/Linker/metadata-a.ll new file mode 100644 index 0000000..5a9d2e4 --- /dev/null +++ b/test/Linker/metadata-a.ll @@ -0,0 +1,15 @@ +; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s + +; CHECK: define void @foo(i32 %a) +; CHECK: ret void, !attach !0, !also !{i32 %a} +; CHECK: define void @goo(i32 %b) +; CHECK: ret void, !attach !1, !and !{i32 %b} +; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo} +; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo} + +define void @foo(i32 %a) nounwind { +entry: + ret void, !attach !0, !also !{ i32 %a } +} + +!0 = metadata !{i32 524334, void (i32)* @foo} diff --git a/test/Linker/metadata-b.ll b/test/Linker/metadata-b.ll new file mode 100644 index 0000000..ef0270a --- /dev/null +++ b/test/Linker/metadata-b.ll @@ -0,0 +1,9 @@ +; This file is for use with metadata-a.ll +; RUN: true + +define void @goo(i32 %b) nounwind { +entry: + ret void, !attach !0, !and !{ i32 %b } +} + +!0 = metadata !{i32 524334, void (i32)* @goo} diff --git a/test/MC/AsmParser/ARM/arm_instructions.s b/test/MC/AsmParser/ARM/arm_instructions.s new file mode 100644 index 0000000..8632cb0 --- /dev/null +++ b/test/MC/AsmParser/ARM/arm_instructions.s @@ -0,0 +1,8 @@ +@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s + +@ CHECK: nop + nop + +@ CHECK: nopeq + nopeq + diff --git a/test/MC/AsmParser/ELF/dg.exp b/test/MC/AsmParser/ELF/dg.exp new file mode 100644 index 0000000..ca6aefe --- /dev/null +++ b/test/MC/AsmParser/ELF/dg.exp @@ -0,0 +1,6 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} + diff --git a/test/MC/AsmParser/ELF/directive_previous.s b/test/MC/AsmParser/ELF/directive_previous.s new file mode 100644 index 0000000..5db1eac --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_previous.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +.bss +# CHECK: .bss + +.text +# CHECK: .text + +.previous +# CHECK: .bss + +.previous +# CHECK: .text diff --git a/test/MC/AsmParser/ELF/directive_section.s b/test/MC/AsmParser/ELF/directive_section.s new file mode 100644 index 0000000..9531c02 --- /dev/null +++ b/test/MC/AsmParser/ELF/directive_section.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + + .bss +# CHECK: .bss + + .data.rel.ro +# CHECK: .data.rel.ro + + .data.rel +# CHECK: .data.rel + + .eh_frame +# CHECK: .eh_frame + + .rodata +# CHECK: .rodata + + .tbss +# CHECK: .tbss + + .tdata +# CHECK: .tdata + diff --git a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s new file mode 100644 index 0000000..47bf980 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulhqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulhqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $1, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01] + vpclmulhqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $1, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01] + vpclmulhqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $16, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10] + vpclmullqhqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $16, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10] + vpclmullqhqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $0, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00] + vpclmullqlqdq %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $0, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00] + vpclmullqlqdq (%eax), %xmm5, %xmm3 + +// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11] + vpclmulqdq $17, %xmm2, %xmm5, %xmm1 + +// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11] + vpclmulqdq $17, (%eax), %xmm5, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_32-avx-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-encoding.s new file mode 100644 index 0000000..b7ade66 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-avx-encoding.s @@ -0,0 +1,3241 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x58,0xd4] + vaddss %xmm4, %xmm6, %xmm2 + +// CHECK: vmulss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x59,0xd4] + vmulss %xmm4, %xmm6, %xmm2 + +// CHECK: vsubss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] + vsubss %xmm4, %xmm6, %xmm2 + +// CHECK: vdivss %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] + vdivss %xmm4, %xmm6, %xmm2 + +// CHECK: vaddsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] + vaddsd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] + vmulsd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] + vsubsd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivsd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] + vdivsd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] + vaddps %xmm4, %xmm6, %xmm2 + +// CHECK: vsubps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] + vsubps %xmm4, %xmm6, %xmm2 + +// CHECK: vmulps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] + vmulps %xmm4, %xmm6, %xmm2 + +// CHECK: vdivps %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] + vdivps %xmm4, %xmm6, %xmm2 + +// CHECK: vaddpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] + vaddpd %xmm4, %xmm6, %xmm2 + +// CHECK: vsubpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] + vsubpd %xmm4, %xmm6, %xmm2 + +// CHECK: vmulpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] + vmulpd %xmm4, %xmm6, %xmm2 + +// CHECK: vdivpd %xmm4, %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] + vdivpd %xmm4, %xmm6, %xmm2 + +// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] + vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] + vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] + vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] + vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: vmaxss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] + vmaxss %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] + vmaxsd %xmm2, %xmm4, %xmm6 + +// CHECK: vminss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] + vminss %xmm2, %xmm4, %xmm6 + +// CHECK: vminsd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] + vminsd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] + vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] + vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] + vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] + vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] + vmaxps %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] + vmaxpd %xmm2, %xmm4, %xmm6 + +// CHECK: vminps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] + vminps %xmm2, %xmm4, %xmm6 + +// CHECK: vminpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] + vminpd %xmm2, %xmm4, %xmm6 + +// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] + vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] + vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] + vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] + vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] + vandps %xmm2, %xmm4, %xmm6 + +// CHECK: vandpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] + vandpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] + vorps %xmm2, %xmm4, %xmm6 + +// CHECK: vorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] + vorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] + vxorps %xmm2, %xmm4, %xmm6 + +// CHECK: vxorpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] + vxorpd %xmm2, %xmm4, %xmm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnps %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] + vandnps %xmm2, %xmm4, %xmm6 + +// CHECK: vandnpd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] + vandnpd %xmm2, %xmm4, %xmm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] + vmovss -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovss %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xea,0x10,0xec] + vmovss %xmm4, %xmm2, %xmm5 + +// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 +// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] + vmovsd -4(%ebx,%ecx,8), %xmm5 + +// CHECK: vmovsd %xmm4, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xeb,0x10,0xec] + vmovsd %xmm4, %xmm2, %xmm5 + +// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] + vunpckhps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] + vunpckhpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] + vunpcklps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] + vunpcklpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] + vcmpps $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] + vcmpps $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] + vcmpps $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] + vcmppd $0, %xmm0, %xmm6, %xmm1 + +// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] + vcmppd $0, (%eax), %xmm6, %xmm1 + +// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 +// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] + vcmppd $7, %xmm0, %xmm6, %xmm1 + +// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] + vshufps $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] + vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] + vshufpd $8, %xmm1, %xmm2, %xmm3 + +// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] + vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] + vcmpeqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] + vcmpleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] + vcmpltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] + vcmpneqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] + vcmpnleps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] + vcmpnltps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] + vcmpordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] + vcmpunordps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] + vcmpeqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] + vcmplepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] + vcmpltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] + vcmpneqpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] + vcmpnlepd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] + vcmpnltpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] + vcmpordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] + vcmpunordpd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] + vcmpeqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] + vcmpless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] + vcmpltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] + vcmpneqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] + vcmpnless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] + vcmpnltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] + vcmpordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] + vcmpunordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] + vcmpeqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] + vcmplesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] + vcmpltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] + vcmpneqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] + vcmpnlesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] + vcmpnltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] + vcmpordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] + vcmpunordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vucomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] + vucomiss %xmm1, %xmm2 + +// CHECK: vucomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] + vucomiss (%eax), %xmm2 + +// CHECK: vcomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] + vcomiss %xmm1, %xmm2 + +// CHECK: vcomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] + vcomiss (%eax), %xmm2 + +// CHECK: vucomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] + vucomisd %xmm1, %xmm2 + +// CHECK: vucomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] + vucomisd (%eax), %xmm2 + +// CHECK: vcomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] + vcomisd %xmm1, %xmm2 + +// CHECK: vcomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] + vcomisd (%eax), %xmm2 + +// CHECK: vcvttss2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] + vcvttss2si %xmm1, %eax + +// CHECK: vcvttss2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%ecx), %eax + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] + vcvtsi2ss (%eax), %xmm1, %xmm2 + +// CHECK: vcvttsd2si %xmm1, %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] + vcvttsd2si %xmm1, %eax + +// CHECK: vcvttsd2si (%ecx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%ecx), %eax + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] + vcvtsi2sd (%eax), %xmm1, %xmm2 + +// CHECK: vmovaps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0x10] + vmovaps (%eax), %xmm2 + +// CHECK: vmovaps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] + vmovaps %xmm1, %xmm2 + +// CHECK: vmovaps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x29,0x08] + vmovaps %xmm1, (%eax) + +// CHECK: vmovapd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0x10] + vmovapd (%eax), %xmm2 + +// CHECK: vmovapd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] + vmovapd %xmm1, %xmm2 + +// CHECK: vmovapd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x29,0x08] + vmovapd %xmm1, (%eax) + +// CHECK: vmovups (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0x10] + vmovups (%eax), %xmm2 + +// CHECK: vmovups %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] + vmovups %xmm1, %xmm2 + +// CHECK: vmovups %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x11,0x08] + vmovups %xmm1, (%eax) + +// CHECK: vmovupd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0x10] + vmovupd (%eax), %xmm2 + +// CHECK: vmovupd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] + vmovupd %xmm1, %xmm2 + +// CHECK: vmovupd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x11,0x08] + vmovupd %xmm1, (%eax) + +// CHECK: vmovlps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x13,0x08] + vmovlps %xmm1, (%eax) + +// CHECK: vmovlps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0x18] + vmovlps (%eax), %xmm2, %xmm3 + +// CHECK: vmovlpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x13,0x08] + vmovlpd %xmm1, (%eax) + +// CHECK: vmovlpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x12,0x18] + vmovlpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovhps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x17,0x08] + vmovhps %xmm1, (%eax) + +// CHECK: vmovhps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0x18] + vmovhps (%eax), %xmm2, %xmm3 + +// CHECK: vmovhpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x17,0x08] + vmovhpd %xmm1, (%eax) + +// CHECK: vmovhpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x16,0x18] + vmovhpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] + vmovlhps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] + vmovhlps %xmm1, %xmm2, %xmm3 + +// CHECK: vcvtss2sil %xmm1, %eax +// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] + vcvtss2si %xmm1, %eax + +// CHECK: vcvtss2sil (%eax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%eax), %ebx + +// CHECK: vcvtdq2ps %xmm5, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] + vcvtdq2ps %xmm5, %xmm6 + +// CHECK: vcvtdq2ps (%eax), %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] + vcvtdq2ps (%eax), %xmm6 + +// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] + vcvtsd2ss %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] + vcvtsd2ss (%eax), %xmm4, %xmm6 + +// CHECK: vcvtps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] + vcvtps2dq %xmm2, %xmm3 + +// CHECK: vcvtps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] + vcvtps2dq (%eax), %xmm3 + +// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] + vcvtss2sd %xmm2, %xmm4, %xmm6 + +// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xda,0x5a,0x30] + vcvtss2sd (%eax), %xmm4, %xmm6 + +// CHECK: vcvtdq2ps %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] + vcvtdq2ps %xmm4, %xmm6 + +// CHECK: vcvtdq2ps (%ecx), %xmm4 +// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] + vcvtdq2ps (%ecx), %xmm4 + +// CHECK: vcvttps2dq %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] + vcvttps2dq %xmm2, %xmm3 + +// CHECK: vcvttps2dq (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] + vcvttps2dq (%eax), %xmm3 + +// CHECK: vcvtps2pd %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] + vcvtps2pd %xmm2, %xmm3 + +// CHECK: vcvtps2pd (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] + vcvtps2pd (%eax), %xmm3 + +// CHECK: vcvtpd2ps %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] + vcvtpd2ps %xmm2, %xmm3 + +// CHECK: vsqrtpd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] + vsqrtpd %xmm1, %xmm2 + +// CHECK: vsqrtpd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x51,0x10] + vsqrtpd (%eax), %xmm2 + +// CHECK: vsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] + vsqrtps %xmm1, %xmm2 + +// CHECK: vsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x51,0x10] + vsqrtps (%eax), %xmm2 + +// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] + vsqrtsd %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x51,0x18] + vsqrtsd (%eax), %xmm2, %xmm3 + +// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0xd9] + vsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x51,0x18] + vsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrsqrtps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] + vrsqrtps %xmm1, %xmm2 + +// CHECK: vrsqrtps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x52,0x10] + vrsqrtps (%eax), %xmm2 + +// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0xd9] + vrsqrtss %xmm1, %xmm2, %xmm3 + +// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x52,0x18] + vrsqrtss (%eax), %xmm2, %xmm3 + +// CHECK: vrcpps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] + vrcpps %xmm1, %xmm2 + +// CHECK: vrcpps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x53,0x10] + vrcpps (%eax), %xmm2 + +// CHECK: vrcpss %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0xd9] + vrcpss %xmm1, %xmm2, %xmm3 + +// CHECK: vrcpss (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0x53,0x18] + vrcpss (%eax), %xmm2, %xmm3 + +// CHECK: vmovntdq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] + vmovntdq %xmm1, (%eax) + +// CHECK: vmovntpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] + vmovntpd %xmm1, (%eax) + +// CHECK: vmovntps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] + vmovntps %xmm1, (%eax) + +// CHECK: vldmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x10] + vldmxcsr (%eax) + +// CHECK: vstmxcsr (%eax) +// CHECK: encoding: [0xc5,0xf8,0xae,0x18] + vstmxcsr (%eax) + +// CHECK: vldmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] + vldmxcsr 0xdeadbeef + +// CHECK: vstmxcsr 3735928559 +// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] + vstmxcsr 0xdeadbeef + +// CHECK: vpsubb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] + vpsubb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] + vpsubb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] + vpsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] + vpsubw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] + vpsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] + vpsubd (%eax), %xmm2, %xmm3 + +// CHECK: vpsubq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] + vpsubq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] + vpsubq (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] + vpsubsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] + vpsubsb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] + vpsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] + vpsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] + vpsubusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] + vpsubusb (%eax), %xmm2, %xmm3 + +// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] + vpsubusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsubusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] + vpsubusw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] + vpaddb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] + vpaddb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] + vpaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] + vpaddw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] + vpaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] + vpaddd (%eax), %xmm2, %xmm3 + +// CHECK: vpaddq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] + vpaddq %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] + vpaddq (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] + vpaddsb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xec,0x18] + vpaddsb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] + vpaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xed,0x18] + vpaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] + vpaddusb %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] + vpaddusb (%eax), %xmm2, %xmm3 + +// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] + vpaddusw %xmm1, %xmm2, %xmm3 + +// CHECK: vpaddusw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] + vpaddusw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] + vpmulhuw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] + vpmulhuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] + vpmulhw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] + vpmulhw (%eax), %xmm2, %xmm3 + +// CHECK: vpmullw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] + vpmullw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmullw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] + vpmullw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] + vpmuludq %xmm1, %xmm2, %xmm3 + +// CHECK: vpmuludq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] + vpmuludq (%eax), %xmm2, %xmm3 + +// CHECK: vpavgb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] + vpavgb %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] + vpavgb (%eax), %xmm2, %xmm3 + +// CHECK: vpavgw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] + vpavgw %xmm1, %xmm2, %xmm3 + +// CHECK: vpavgw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] + vpavgw (%eax), %xmm2, %xmm3 + +// CHECK: vpminsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] + vpminsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpminsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xea,0x18] + vpminsw (%eax), %xmm2, %xmm3 + +// CHECK: vpminub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] + vpminub %xmm1, %xmm2, %xmm3 + +// CHECK: vpminub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xda,0x18] + vpminub (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] + vpmaxsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xee,0x18] + vpmaxsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] + vpmaxub %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaxub (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xde,0x18] + vpmaxub (%eax), %xmm2, %xmm3 + +// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] + vpsadbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsadbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] + vpsadbw (%eax), %xmm2, %xmm3 + +// CHECK: vpsllw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] + vpsllw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] + vpsllw (%eax), %xmm2, %xmm3 + +// CHECK: vpslld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] + vpslld %xmm1, %xmm2, %xmm3 + +// CHECK: vpslld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] + vpslld (%eax), %xmm2, %xmm3 + +// CHECK: vpsllq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] + vpsllq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] + vpsllq (%eax), %xmm2, %xmm3 + +// CHECK: vpsraw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] + vpsraw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsraw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] + vpsraw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrad %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] + vpsrad %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrad (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] + vpsrad (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] + vpsrlw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] + vpsrlw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] + vpsrld %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] + vpsrld (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] + vpsrlq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] + vpsrlq (%eax), %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpslldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] + vpslldq $10, %xmm2, %xmm3 + +// CHECK: vpsllq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] + vpsllq $10, %xmm2, %xmm3 + +// CHECK: vpsllw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] + vpsllw $10, %xmm2, %xmm3 + +// CHECK: vpsrad $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] + vpsrad $10, %xmm2, %xmm3 + +// CHECK: vpsraw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] + vpsraw $10, %xmm2, %xmm3 + +// CHECK: vpsrld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] + vpsrld $10, %xmm2, %xmm3 + +// CHECK: vpsrldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] + vpsrldq $10, %xmm2, %xmm3 + +// CHECK: vpsrlq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] + vpsrlq $10, %xmm2, %xmm3 + +// CHECK: vpsrlw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] + vpsrlw $10, %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpand %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] + vpand %xmm1, %xmm2, %xmm3 + +// CHECK: vpand (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] + vpand (%eax), %xmm2, %xmm3 + +// CHECK: vpor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] + vpor %xmm1, %xmm2, %xmm3 + +// CHECK: vpor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] + vpor (%eax), %xmm2, %xmm3 + +// CHECK: vpxor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] + vpxor %xmm1, %xmm2, %xmm3 + +// CHECK: vpxor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0x18] + vpxor (%eax), %xmm2, %xmm3 + +// CHECK: vpandn %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] + vpandn %xmm1, %xmm2, %xmm3 + +// CHECK: vpandn (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] + vpandn (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] + vpcmpeqb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x74,0x18] + vpcmpeqb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] + vpcmpeqw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x75,0x18] + vpcmpeqw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] + vpcmpeqd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x76,0x18] + vpcmpeqd (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] + vpcmpgtb %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x64,0x18] + vpcmpgtb (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] + vpcmpgtw %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x65,0x18] + vpcmpgtw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] + vpcmpgtd %xmm1, %xmm2, %xmm3 + +// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x66,0x18] + vpcmpgtd (%eax), %xmm2, %xmm3 + +// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] + vpacksswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpacksswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x63,0x18] + vpacksswb (%eax), %xmm2, %xmm3 + +// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] + vpackssdw %xmm1, %xmm2, %xmm3 + +// CHECK: vpackssdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] + vpackssdw (%eax), %xmm2, %xmm3 + +// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] + vpackuswb %xmm1, %xmm2, %xmm3 + +// CHECK: vpackuswb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x67,0x18] + vpackuswb (%eax), %xmm2, %xmm3 + +// CHECK: vpshufd $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] + vpshufd $4, %xmm2, %xmm3 + +// CHECK: vpshufd $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] + vpshufd $4, (%eax), %xmm3 + +// CHECK: vpshufhw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] + vpshufhw $4, %xmm2, %xmm3 + +// CHECK: vpshufhw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] + vpshufhw $4, (%eax), %xmm3 + +// CHECK: vpshuflw $4, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] + vpshuflw $4, %xmm2, %xmm3 + +// CHECK: vpshuflw $4, (%eax), %xmm3 +// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] + vpshuflw $4, (%eax), %xmm3 + +// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] + vpunpcklbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0x18] + vpunpcklbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] + vpunpcklwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0x18] + vpunpcklwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] + vpunpckldq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0x18] + vpunpckldq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] + vpunpcklqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] + vpunpcklqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] + vpunpckhbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0x18] + vpunpckhbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] + vpunpckhwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0x18] + vpunpckhwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] + vpunpckhdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] + vpunpckhdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] + vpunpckhqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] + vpunpckhqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm3 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm3 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpmovmskb %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] + vpmovmskb %xmm1, %eax + +// CHECK: vmaskmovdqu %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] + vmaskmovdqu %xmm1, %xmm2 + +// CHECK: vmovd %xmm1, %eax +// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] + vmovd %xmm1, %eax + +// CHECK: vmovd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] + vmovd %xmm1, (%eax) + +// CHECK: vmovd %eax, %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] + vmovd %eax, %xmm1 + +// CHECK: vmovd (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] + vmovd (%eax), %xmm1 + +// CHECK: vmovq %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] + vmovq %xmm1, (%eax) + +// CHECK: vmovq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] + vmovq %xmm1, %xmm2 + +// CHECK: vmovq (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] + vmovq (%eax), %xmm1 + +// CHECK: vcvtpd2dq %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] + vcvtpd2dq %xmm1, %xmm2 + +// CHECK: vcvtdq2pd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] + vcvtdq2pd %xmm1, %xmm2 + +// CHECK: vcvtdq2pd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] + vcvtdq2pd (%eax), %xmm2 + +// CHECK: vmovshdup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] + vmovshdup %xmm1, %xmm2 + +// CHECK: vmovshdup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x16,0x10] + vmovshdup (%eax), %xmm2 + +// CHECK: vmovsldup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] + vmovsldup %xmm1, %xmm2 + +// CHECK: vmovsldup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfa,0x12,0x10] + vmovsldup (%eax), %xmm2 + +// CHECK: vmovddup %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] + vmovddup %xmm1, %xmm2 + +// CHECK: vmovddup (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xfb,0x12,0x10] + vmovddup (%eax), %xmm2 + +// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] + vaddsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubps (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] + vaddsubps (%eax), %xmm1, %xmm2 + +// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] + vaddsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] + vaddsubpd (%eax), %xmm1, %xmm2 + +// CHECK: vhaddps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] + vhaddps %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] + vhaddps (%eax), %xmm2, %xmm3 + +// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] + vhaddpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] + vhaddpd (%eax), %xmm2, %xmm3 + +// CHECK: vhsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] + vhsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] + vhsubps (%eax), %xmm2, %xmm3 + +// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] + vhsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] + vhsubpd (%eax), %xmm2, %xmm3 + +// CHECK: vpabsb %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] + vpabsb %xmm1, %xmm2 + +// CHECK: vpabsb (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] + vpabsb (%eax), %xmm2 + +// CHECK: vpabsw %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] + vpabsw %xmm1, %xmm2 + +// CHECK: vpabsw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] + vpabsw (%eax), %xmm2 + +// CHECK: vpabsd %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] + vpabsd %xmm1, %xmm2 + +// CHECK: vpabsd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] + vpabsd (%eax), %xmm2 + +// CHECK: vphaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] + vphaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] + vphaddw (%eax), %xmm2, %xmm3 + +// CHECK: vphaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] + vphaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] + vphaddd (%eax), %xmm2, %xmm3 + +// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] + vphaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] + vphaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] + vphsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] + vphsubw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] + vphsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] + vphsubd (%eax), %xmm2, %xmm3 + +// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] + vphsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] + vphsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] + vpmaddubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] + vpmaddubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpshufb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] + vpshufb %xmm1, %xmm2, %xmm3 + +// CHECK: vpshufb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] + vpshufb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] + vpsignb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] + vpsignb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] + vpsignw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] + vpsignw (%eax), %xmm2, %xmm3 + +// CHECK: vpsignd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] + vpsignd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] + vpsignd (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] + vpmulhrsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] + vpmulhrsw (%eax), %xmm2, %xmm3 + +// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] + vpalignr $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] + vpalignr $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] + vroundsd $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] + vroundsd $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] + vroundss $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] + vroundss $7, (%eax), %xmm2, %xmm3 + +// CHECK: vroundpd $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] + vroundpd $7, %xmm2, %xmm3 + +// CHECK: vroundpd $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] + vroundpd $7, (%eax), %xmm3 + +// CHECK: vroundps $7, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] + vroundps $7, %xmm2, %xmm3 + +// CHECK: vroundps $7, (%eax), %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] + vroundps $7, (%eax), %xmm3 + +// CHECK: vphminposuw %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] + vphminposuw %xmm2, %xmm3 + +// CHECK: vphminposuw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] + vphminposuw (%eax), %xmm2 + +// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] + vpackusdw %xmm2, %xmm3, %xmm1 + +// CHECK: vpackusdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] + vpackusdw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] + vpcmpeqq %xmm2, %xmm3, %xmm1 + +// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] + vpcmpeqq (%eax), %xmm2, %xmm3 + +// CHECK: vpminsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] + vpminsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] + vpminsb (%eax), %xmm2, %xmm3 + +// CHECK: vpminsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] + vpminsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] + vpminsd (%eax), %xmm2, %xmm3 + +// CHECK: vpminud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] + vpminud %xmm2, %xmm3, %xmm1 + +// CHECK: vpminud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] + vpminud (%eax), %xmm2, %xmm3 + +// CHECK: vpminuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] + vpminuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpminuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] + vpminuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] + vpmaxsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] + vpmaxsb (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] + vpmaxsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] + vpmaxsd (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] + vpmaxud %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] + vpmaxud (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] + vpmaxuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] + vpmaxuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] + vpmuldq %xmm2, %xmm3, %xmm1 + +// CHECK: vpmuldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] + vpmuldq (%eax), %xmm2, %xmm3 + +// CHECK: vpmulld %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] + vpmulld %xmm2, %xmm5, %xmm1 + +// CHECK: vpmulld (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] + vpmulld (%eax), %xmm5, %xmm3 + +// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] + vblendps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] + vblendps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] + vblendpd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] + vblendpd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] + vpblendw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] + vpblendw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] + vmpsadbw $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] + vmpsadbw $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] + vdpps $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] + vdpps $3, (%eax), %xmm5, %xmm1 + +// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] + vdppd $3, %xmm2, %xmm5, %xmm1 + +// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] + vdppd $3, (%eax), %xmm5, %xmm1 + +// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] + vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] + vblendvpd %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] + vblendvps %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] + vblendvps %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] + vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 + +// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] + vpblendvb %xmm2, (%eax), %xmm1, %xmm3 + +// CHECK: vpmovsxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] + vpmovsxbw %xmm2, %xmm5 + +// CHECK: vpmovsxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] + vpmovsxbw (%eax), %xmm2 + +// CHECK: vpmovsxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] + vpmovsxwd %xmm2, %xmm5 + +// CHECK: vpmovsxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] + vpmovsxwd (%eax), %xmm2 + +// CHECK: vpmovsxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] + vpmovsxdq %xmm2, %xmm5 + +// CHECK: vpmovsxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] + vpmovsxdq (%eax), %xmm2 + +// CHECK: vpmovzxbw %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] + vpmovzxbw %xmm2, %xmm5 + +// CHECK: vpmovzxbw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] + vpmovzxbw (%eax), %xmm2 + +// CHECK: vpmovzxwd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] + vpmovzxwd %xmm2, %xmm5 + +// CHECK: vpmovzxwd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] + vpmovzxwd (%eax), %xmm2 + +// CHECK: vpmovzxdq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] + vpmovzxdq %xmm2, %xmm5 + +// CHECK: vpmovzxdq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] + vpmovzxdq (%eax), %xmm2 + +// CHECK: vpmovsxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] + vpmovsxbq %xmm2, %xmm5 + +// CHECK: vpmovsxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] + vpmovsxbq (%eax), %xmm2 + +// CHECK: vpmovzxbq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] + vpmovzxbq %xmm2, %xmm5 + +// CHECK: vpmovzxbq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] + vpmovzxbq (%eax), %xmm2 + +// CHECK: vpmovsxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] + vpmovsxbd %xmm2, %xmm5 + +// CHECK: vpmovsxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] + vpmovsxbd (%eax), %xmm2 + +// CHECK: vpmovsxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] + vpmovsxwq %xmm2, %xmm5 + +// CHECK: vpmovsxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] + vpmovsxwq (%eax), %xmm2 + +// CHECK: vpmovzxbd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] + vpmovzxbd %xmm2, %xmm5 + +// CHECK: vpmovzxbd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] + vpmovzxbd (%eax), %xmm2 + +// CHECK: vpmovzxwq %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] + vpmovzxwq %xmm2, %xmm5 + +// CHECK: vpmovzxwq (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] + vpmovzxwq (%eax), %xmm2 + +// CHECK: vpextrw $7, %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] + vpextrw $7, %xmm2, %eax + +// CHECK: vpextrw $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] + vpextrw $7, %xmm2, (%eax) + +// CHECK: vpextrd $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] + vpextrd $7, %xmm2, %eax + +// CHECK: vpextrd $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] + vpextrd $7, %xmm2, (%eax) + +// CHECK: vpextrb $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] + vpextrb $7, %xmm2, %eax + +// CHECK: vpextrb $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] + vpextrb $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] + vextractps $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] + vextractps $7, %xmm2, %eax + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] + vpinsrb $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] + vpinsrb $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] + vpinsrd $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] + vpinsrd $7, (%eax), %xmm2, %xmm5 + +// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] + vinsertps $7, %xmm2, %xmm5, %xmm1 + +// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] + vinsertps $7, (%eax), %xmm5, %xmm1 + +// CHECK: vptest %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] + vptest %xmm2, %xmm5 + +// CHECK: vptest (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] + vptest (%eax), %xmm2 + +// CHECK: vmovntdqa (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] + vmovntdqa (%eax), %xmm2 + +// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] + vpcmpgtq %xmm2, %xmm5, %xmm1 + +// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] + vpcmpgtq (%eax), %xmm5, %xmm3 + +// CHECK: vpcmpistrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] + vpcmpistrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpistrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] + vpcmpistrm $7, (%eax), %xmm5 + +// CHECK: vpcmpestrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] + vpcmpestrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpestrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] + vpcmpestrm $7, (%eax), %xmm5 + +// CHECK: vpcmpistri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] + vpcmpistri $7, %xmm2, %xmm5 + +// CHECK: vpcmpistri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] + vpcmpistri $7, (%eax), %xmm5 + +// CHECK: vpcmpestri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] + vpcmpestri $7, %xmm2, %xmm5 + +// CHECK: vpcmpestri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] + vpcmpestri $7, (%eax), %xmm5 + +// CHECK: vaesimc %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] + vaesimc %xmm2, %xmm5 + +// CHECK: vaesimc (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] + vaesimc (%eax), %xmm2 + +// CHECK: vaesenc %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] + vaesenc %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenc (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] + vaesenc (%eax), %xmm5, %xmm3 + +// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] + vaesenclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesenclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] + vaesenclast (%eax), %xmm5, %xmm3 + +// CHECK: vaesdec %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] + vaesdec %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdec (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] + vaesdec (%eax), %xmm5, %xmm3 + +// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] + vaesdeclast %xmm2, %xmm5, %xmm1 + +// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] + vaesdeclast (%eax), %xmm5, %xmm3 + +// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] + vaeskeygenassist $7, %xmm2, %xmm5 + +// CHECK: vaeskeygenassist $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] + vaeskeygenassist $7, (%eax), %xmm5 + +// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] + vcmpeq_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] + vcmpngeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] + vcmpngtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] + vcmpfalseps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] + vcmpneq_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] + vcmpgeps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] + vcmpgtps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] + vcmptrueps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] + vcmpeq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] + vcmplt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] + vcmple_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] + vcmpunord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] + vcmpneq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] + vcmpnlt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] + vcmpnle_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] + vcmpord_sps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] + vcmpeq_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] + vcmpnge_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] + vcmpngt_uqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] + vcmpfalse_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] + vcmpneq_osps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] + vcmpge_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] + vcmpgt_oqps %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] + vcmptrue_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovaps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0x10] + vmovaps (%eax), %ymm2 + +// CHECK: vmovaps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] + vmovaps %ymm1, %ymm2 + +// CHECK: vmovaps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x29,0x08] + vmovaps %ymm1, (%eax) + +// CHECK: vmovapd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0x10] + vmovapd (%eax), %ymm2 + +// CHECK: vmovapd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] + vmovapd %ymm1, %ymm2 + +// CHECK: vmovapd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x29,0x08] + vmovapd %ymm1, (%eax) + +// CHECK: vmovups (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0x10] + vmovups (%eax), %ymm2 + +// CHECK: vmovups %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] + vmovups %ymm1, %ymm2 + +// CHECK: vmovups %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x11,0x08] + vmovups %ymm1, (%eax) + +// CHECK: vmovupd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0x10] + vmovupd (%eax), %ymm2 + +// CHECK: vmovupd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] + vmovupd %ymm1, %ymm2 + +// CHECK: vmovupd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x11,0x08] + vmovupd %ymm1, (%eax) + +// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x15,0xe1] + vunpckhps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x15,0xe1] + vunpckhpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x14,0xe1] + vunpcklps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x14,0xe1] + vunpcklpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vmovntdq %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] + vmovntdq %ymm1, (%eax) + +// CHECK: vmovntpd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] + vmovntpd %ymm1, (%eax) + +// CHECK: vmovntps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] + vmovntps %ymm1, (%eax) + +// CHECK: vmovmskps %xmm2, %eax +// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] + vmovmskps %xmm2, %eax + +// CHECK: vmovmskpd %xmm2, %eax +// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] + vmovmskpd %xmm2, %eax + +// CHECK: vmaxps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] + vmaxps %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] + vmaxpd %ymm2, %ymm4, %ymm6 + +// CHECK: vminps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] + vminps %ymm2, %ymm4, %ymm6 + +// CHECK: vminpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] + vminpd %ymm2, %ymm4, %ymm6 + +// CHECK: vsubps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] + vsubps %ymm2, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] + vsubpd %ymm2, %ymm4, %ymm6 + +// CHECK: vdivps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] + vdivps %ymm2, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] + vdivpd %ymm2, %ymm4, %ymm6 + +// CHECK: vaddps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] + vaddps %ymm2, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] + vaddpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmulps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] + vmulps %ymm2, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] + vmulpd %ymm2, %ymm4, %ymm6 + +// CHECK: vmaxps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%eax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%eax), %ymm4, %ymm6 + +// CHECK: vminps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%eax), %ymm4, %ymm6 + +// CHECK: vminpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%eax), %ymm4, %ymm6 + +// CHECK: vsubps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%eax), %ymm4, %ymm6 + +// CHECK: vsubpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%eax), %ymm4, %ymm6 + +// CHECK: vdivps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%eax), %ymm4, %ymm6 + +// CHECK: vdivpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%eax), %ymm4, %ymm6 + +// CHECK: vaddps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%eax), %ymm4, %ymm6 + +// CHECK: vaddpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%eax), %ymm4, %ymm6 + +// CHECK: vmulps (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%eax), %ymm4, %ymm6 + +// CHECK: vmulpd (%eax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%eax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] + vsqrtpd %ymm1, %ymm2 + +// CHECK: vsqrtpd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x51,0x10] + vsqrtpd (%eax), %ymm2 + +// CHECK: vsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] + vsqrtps %ymm1, %ymm2 + +// CHECK: vsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x51,0x10] + vsqrtps (%eax), %ymm2 + +// CHECK: vrsqrtps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] + vrsqrtps %ymm1, %ymm2 + +// CHECK: vrsqrtps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x52,0x10] + vrsqrtps (%eax), %ymm2 + +// CHECK: vrcpps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] + vrcpps %ymm1, %ymm2 + +// CHECK: vrcpps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x53,0x10] + vrcpps (%eax), %ymm2 + +// CHECK: vandps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] + vandps %ymm2, %ymm4, %ymm6 + +// CHECK: vandpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] + vandpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] + vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] + vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] + vorps %ymm2, %ymm4, %ymm6 + +// CHECK: vorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] + vorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] + vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] + vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] + vxorps %ymm2, %ymm4, %ymm6 + +// CHECK: vxorpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] + vxorpd %ymm2, %ymm4, %ymm6 + +// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] + vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] + vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnps %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] + vandnps %ymm2, %ymm4, %ymm6 + +// CHECK: vandnpd %ymm2, %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] + vandnpd %ymm2, %ymm4, %ymm6 + +// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] + vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] + vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vcvtps2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] + vcvtps2pd %xmm3, %ymm2 + +// CHECK: vcvtps2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] + vcvtps2pd (%eax), %ymm2 + +// CHECK: vcvtdq2pd %xmm3, %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] + vcvtdq2pd %xmm3, %ymm2 + +// CHECK: vcvtdq2pd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] + vcvtdq2pd (%eax), %ymm2 + +// CHECK: vcvtdq2ps %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] + vcvtdq2ps %ymm2, %ymm5 + +// CHECK: vcvtdq2ps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] + vcvtdq2ps (%eax), %ymm2 + +// CHECK: vcvtps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] + vcvtps2dq %ymm2, %ymm5 + +// CHECK: vcvtps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] + vcvtps2dq (%eax), %ymm5 + +// CHECK: vcvttps2dq %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] + vcvttps2dq %ymm2, %ymm5 + +// CHECK: vcvttps2dq (%eax), %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] + vcvttps2dq (%eax), %ymm5 + +// CHECK: vcvttpd2dq %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dq %xmm1, %xmm5 + +// CHECK: vcvttpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] + vcvttpd2dq %ymm2, %xmm5 + +// CHECK: vcvttpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] + vcvttpd2dqx %xmm1, %xmm5 + +// CHECK: vcvttpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] + vcvttpd2dqx (%eax), %xmm1 + +// CHECK: vcvttpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] + vcvttpd2dqy %ymm2, %xmm1 + +// CHECK: vcvttpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] + vcvttpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2ps %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] + vcvtpd2ps %ymm2, %xmm5 + +// CHECK: vcvtpd2psx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] + vcvtpd2psx %xmm1, %xmm5 + +// CHECK: vcvtpd2psx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] + vcvtpd2psx (%eax), %xmm1 + +// CHECK: vcvtpd2psy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] + vcvtpd2psy %ymm2, %xmm1 + +// CHECK: vcvtpd2psy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] + vcvtpd2psy (%eax), %xmm1 + +// CHECK: vcvtpd2dq %ymm2, %xmm5 +// CHECK: encoding: [0xc5,0xff,0xe6,0xea] + vcvtpd2dq %ymm2, %xmm5 + +// CHECK: vcvtpd2dqy %ymm2, %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0xca] + vcvtpd2dqy %ymm2, %xmm1 + +// CHECK: vcvtpd2dqy (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xff,0xe6,0x08] + vcvtpd2dqy (%eax), %xmm1 + +// CHECK: vcvtpd2dqx %xmm1, %xmm5 +// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] + vcvtpd2dqx %xmm1, %xmm5 + +// CHECK: vcvtpd2dqx (%eax), %xmm1 +// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] + vcvtpd2dqx (%eax), %xmm1 + +// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] + vcmpeqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] + vcmpleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] + vcmpltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] + vcmpneqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] + vcmpnleps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] + vcmpnltps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] + vcmpordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] + vcmpunordps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] + vcmpeqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] + vcmplepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] + vcmpltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] + vcmpneqpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] + vcmpnlepd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] + vcmpnltpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] + vcmpordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] + vcmpunordpd %ymm1, %ymm2, %ymm3 + +// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 +// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 + +// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 + +// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] + vcmpeq_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] + vcmpngeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] + vcmpngtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] + vcmpfalseps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] + vcmpneq_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] + vcmpgeps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] + vcmpgtps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] + vcmptrueps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] + vcmpeq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] + vcmplt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] + vcmple_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] + vcmpunord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] + vcmpneq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] + vcmpnlt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] + vcmpnle_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] + vcmpord_sps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] + vcmpeq_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] + vcmpnge_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] + vcmpngt_uqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] + vcmpfalse_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] + vcmpneq_osps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] + vcmpge_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] + vcmpgt_oqps %ymm1, %ymm2, %ymm3 + +// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] + vcmptrue_usps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0xd0,0xd9] + vaddsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubps (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf7,0xd0,0x10] + vaddsubps (%eax), %ymm1, %ymm2 + +// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0xd0,0xd9] + vaddsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vaddsubpd (%eax), %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xf5,0xd0,0x10] + vaddsubpd (%eax), %ymm1, %ymm2 + +// CHECK: vhaddps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0xd9] + vhaddps %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7c,0x18] + vhaddps (%eax), %ymm2, %ymm3 + +// CHECK: vhaddpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0xd9] + vhaddpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhaddpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7c,0x18] + vhaddpd (%eax), %ymm2, %ymm3 + +// CHECK: vhsubps %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0xd9] + vhsubps %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubps (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xef,0x7d,0x18] + vhsubps (%eax), %ymm2, %ymm3 + +// CHECK: vhsubpd %ymm1, %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0xd9] + vhsubpd %ymm1, %ymm2, %ymm3 + +// CHECK: vhsubpd (%eax), %ymm2, %ymm3 +// CHECK: encoding: [0xc5,0xed,0x7d,0x18] + vhsubpd (%eax), %ymm2, %ymm3 + +// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03] + vblendps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03] + vblendps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03] + vblendpd $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03] + vblendpd $3, (%eax), %ymm5, %ymm1 + +// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03] + vdpps $3, %ymm2, %ymm5, %ymm1 + +// CHECK: vdpps $3, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03] + vdpps $3, (%eax), %ymm5, %ymm1 + +// CHECK: vbroadcastf128 (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10] + vbroadcastf128 (%eax), %ymm2 + +// CHECK: vbroadcastsd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10] + vbroadcastsd (%eax), %ymm2 + +// CHECK: vbroadcastss (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10] + vbroadcastss (%eax), %xmm2 + +// CHECK: vbroadcastss (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10] + vbroadcastss (%eax), %ymm2 + +// CHECK: vinsertf128 $7, %xmm2, %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07] + vinsertf128 $7, %xmm2, %ymm2, %ymm5 + +// CHECK: vinsertf128 $7, (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07] + vinsertf128 $7, (%eax), %ymm2, %ymm5 + +// CHECK: vextractf128 $7, %ymm2, %xmm2 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07] + vextractf128 $7, %ymm2, %xmm2 + +// CHECK: vextractf128 $7, %ymm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07] + vextractf128 $7, %ymm2, (%eax) + +// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10] + vmaskmovpd %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10] + vmaskmovpd %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28] + vmaskmovpd (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28] + vmaskmovpd (%eax), %ymm2, %ymm5 + +// CHECK: vmaskmovps %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10] + vmaskmovps %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovps %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10] + vmaskmovps %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovps (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28] + vmaskmovps (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovps (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28] + vmaskmovps (%eax), %ymm2, %ymm5 + +// CHECK: vpermilps $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07] + vpermilps $7, %xmm1, %xmm5 + +// CHECK: vpermilps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07] + vpermilps $7, %ymm5, %ymm1 + +// CHECK: vpermilps $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07] + vpermilps $7, (%eax), %xmm5 + +// CHECK: vpermilps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07] + vpermilps $7, (%eax), %ymm5 + +// CHECK: vpermilps %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9] + vpermilps %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilps %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9] + vpermilps %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilps (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18] + vpermilps (%eax), %xmm5, %xmm3 + +// CHECK: vpermilps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08] + vpermilps (%eax), %ymm5, %ymm1 + +// CHECK: vpermilpd $7, %xmm1, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07] + vpermilpd $7, %xmm1, %xmm5 + +// CHECK: vpermilpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07] + vpermilpd $7, %ymm5, %ymm1 + +// CHECK: vpermilpd $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07] + vpermilpd $7, (%eax), %xmm5 + +// CHECK: vpermilpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07] + vpermilpd $7, (%eax), %ymm5 + +// CHECK: vpermilpd %xmm1, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9] + vpermilpd %xmm1, %xmm5, %xmm1 + +// CHECK: vpermilpd %ymm1, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9] + vpermilpd %ymm1, %ymm5, %ymm1 + +// CHECK: vpermilpd (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18] + vpermilpd (%eax), %xmm5, %xmm3 + +// CHECK: vpermilpd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08] + vpermilpd (%eax), %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07] + vperm2f128 $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vperm2f128 $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07] + vperm2f128 $7, (%eax), %ymm5, %ymm1 + +// CHECK: vzeroall +// CHECK: encoding: [0xc5,0xfc,0x77] + vzeroall + +// CHECK: vzeroupper +// CHECK: encoding: [0xc5,0xf8,0x77] + vzeroupper + +// CHECK: vcvtsd2si %xmm4, %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc] + vcvtsd2si %xmm4, %ecx + +// CHECK: vcvtsd2si (%ecx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%ecx), %ecx + +// CHECK: vcvtsi2sdl (%ebp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00] + vcvtsi2sdl (%ebp), %xmm0, %xmm7 + +// CHECK: vcvtsi2sdl (%esp), %xmm0, %xmm7 +// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24] + vcvtsi2sdl (%esp), %xmm0, %xmm7 + +// CHECK: vlddqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0xf0,0x10] + vlddqu (%eax), %ymm2 + +// CHECK: vmovddup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xff,0x12,0xea] + vmovddup %ymm2, %ymm5 + +// CHECK: vmovddup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0x12,0x10] + vmovddup (%eax), %ymm2 + +// CHECK: vmovdqa %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x6f,0xea] + vmovdqa %ymm2, %ymm5 + +// CHECK: vmovdqa %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x7f,0x10] + vmovdqa %ymm2, (%eax) + +// CHECK: vmovdqa (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x6f,0x10] + vmovdqa (%eax), %ymm2 + +// CHECK: vmovdqu %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x6f,0xea] + vmovdqu %ymm2, %ymm5 + +// CHECK: vmovdqu %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfe,0x7f,0x10] + vmovdqu %ymm2, (%eax) + +// CHECK: vmovdqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x6f,0x10] + vmovdqu (%eax), %ymm2 + +// CHECK: vmovshdup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x16,0xea] + vmovshdup %ymm2, %ymm5 + +// CHECK: vmovshdup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x16,0x10] + vmovshdup (%eax), %ymm2 + +// CHECK: vmovsldup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x12,0xea] + vmovsldup %ymm2, %ymm5 + +// CHECK: vmovsldup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x12,0x10] + vmovsldup (%eax), %ymm2 + +// CHECK: vptest %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea] + vptest %ymm2, %ymm5 + +// CHECK: vptest (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10] + vptest (%eax), %ymm2 + +// CHECK: vroundpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07] + vroundpd $7, %ymm5, %ymm1 + +// CHECK: vroundpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07] + vroundpd $7, (%eax), %ymm5 + +// CHECK: vroundps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07] + vroundps $7, %ymm5, %ymm1 + +// CHECK: vroundps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07] + vroundps $7, (%eax), %ymm5 + +// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07] + vshufpd $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07] + vshufpd $7, (%eax), %ymm5, %ymm1 + +// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07] + vshufps $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufps $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07] + vshufps $7, (%eax), %ymm5, %ymm1 + +// CHECK: vtestpd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea] + vtestpd %xmm2, %xmm5 + +// CHECK: vtestpd %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea] + vtestpd %ymm2, %ymm5 + +// CHECK: vtestpd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10] + vtestpd (%eax), %xmm2 + +// CHECK: vtestpd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10] + vtestpd (%eax), %ymm2 + +// CHECK: vtestps %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea] + vtestps %xmm2, %xmm5 + +// CHECK: vtestps %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea] + vtestps %ymm2, %ymm5 + +// CHECK: vtestps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10] + vtestps (%eax), %xmm2 + +// CHECK: vtestps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10] + vtestps (%eax), %ymm2 + +// CHECK: vblendvpd %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2 +// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00] + vblendvpd %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2 + diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index ebafb11..ef77423 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10047,2882 +10047,23 @@ // CHECK: encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00] ficomps 32493 -// CHECK: vaddss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x58,0xd4] - vaddss %xmm4, %xmm6, %xmm2 +// CHECK: movl 57005(,%eiz), %ebx +// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movl 57005(,%eiz), %ebx -// CHECK: vmulss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x59,0xd4] - vmulss %xmm4, %xmm6, %xmm2 +// CHECK: movl 48879(,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movl 48879(,%eiz), %eax -// CHECK: vsubss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5c,0xd4] - vsubss %xmm4, %xmm6, %xmm2 +// CHECK: movl -4(,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movl -4(,%eiz,8), %eax -// CHECK: vdivss %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0x5e,0xd4] - vdivss %xmm4, %xmm6, %xmm2 +// CHECK: movl (%ecx,%eiz), %eax +// CHECK: encoding: [0x8b,0x04,0x21] + movl (%ecx,%eiz), %eax -// CHECK: vaddsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x58,0xd4] - vaddsd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x59,0xd4] - vmulsd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4] - vsubsd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivsd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4] - vdivsd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x58,0xd4] - vaddps %xmm4, %xmm6, %xmm2 - -// CHECK: vsubps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4] - vsubps %xmm4, %xmm6, %xmm2 - -// CHECK: vmulps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x59,0xd4] - vmulps %xmm4, %xmm6, %xmm2 - -// CHECK: vdivps %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4] - vdivps %xmm4, %xmm6, %xmm2 - -// CHECK: vaddpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x58,0xd4] - vaddpd %xmm4, %xmm6, %xmm2 - -// CHECK: vsubpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4] - vsubpd %xmm4, %xmm6, %xmm2 - -// CHECK: vmulpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x59,0xd4] - vmulpd %xmm4, %xmm6, %xmm2 - -// CHECK: vdivpd %xmm4, %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4] - vdivpd %xmm4, %xmm6, %xmm2 - -// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde] - vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde] - vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde] - vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde] - vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: vmaxss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5f,0xf2] - vmaxss %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2] - vmaxsd %xmm2, %xmm4, %xmm6 - -// CHECK: vminss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5d,0xf2] - vminss %xmm2, %xmm4, %xmm6 - -// CHECK: vminsd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2] - vminsd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc] - vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc] - vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc] - vminss -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc] - vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2] - vmaxps %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2] - vmaxpd %xmm2, %xmm4, %xmm6 - -// CHECK: vminps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2] - vminps %xmm2, %xmm4, %xmm6 - -// CHECK: vminpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2] - vminpd %xmm2, %xmm4, %xmm6 - -// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc] - vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc] - vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc] - vminps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc] - vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x54,0xf2] - vandps %xmm2, %xmm4, %xmm6 - -// CHECK: vandpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x54,0xf2] - vandpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x56,0xf2] - vorps %xmm2, %xmm4, %xmm6 - -// CHECK: vorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x56,0xf2] - vorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x57,0xf2] - vxorps %xmm2, %xmm4, %xmm6 - -// CHECK: vxorpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x57,0xf2] - vxorpd %xmm2, %xmm4, %xmm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnps %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd8,0x55,0xf2] - vandnps %xmm2, %xmm4, %xmm6 - -// CHECK: vandnpd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xd9,0x55,0xf2] - vandnpd %xmm2, %xmm4, %xmm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc] - vmovss -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovss %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xea,0x10,0xec] - vmovss %xmm4, %xmm2, %xmm5 - -// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5 -// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc] - vmovsd -4(%ebx,%ecx,8), %xmm5 - -// CHECK: vmovsd %xmm4, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xeb,0x10,0xec] - vmovsd %xmm4, %xmm2, %xmm5 - -// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] - vunpckhps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] - vunpckhpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] - vunpcklps %xmm1, %xmm2, %xmm4 - -// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 -// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] - vunpcklpd %xmm1, %xmm2, %xmm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 - -// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00] - vcmpps $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00] - vcmpps $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07] - vcmpps $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00] - vcmppd $0, %xmm0, %xmm6, %xmm1 - -// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00] - vcmppd $0, (%eax), %xmm6, %xmm1 - -// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1 -// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07] - vcmppd $7, %xmm0, %xmm6, %xmm1 - -// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08] - vshufps $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08] - vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08] - vshufpd $8, %xmm1, %xmm2, %xmm3 - -// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08] - vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00] - vcmpeqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02] - vcmpleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01] - vcmpltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04] - vcmpneqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06] - vcmpnleps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05] - vcmpnltps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07] - vcmpordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03] - vcmpunordps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00] - vcmpeqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02] - vcmplepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01] - vcmpltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04] - vcmpneqpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06] - vcmpnlepd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05] - vcmpnltpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07] - vcmpordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03] - vcmpunordpd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] - vcmpeqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] - vcmpless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] - vcmpltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] - vcmpneqss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] - vcmpnless %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] - vcmpnltss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] - vcmpordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] - vcmpunordss %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] - vcmpeqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] - vcmplesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] - vcmpltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] - vcmpneqsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] - vcmpnlesd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] - vcmpnltsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] - vcmpordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] - vcmpunordsd %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 - -// CHECK: vucomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] - vucomiss %xmm1, %xmm2 - -// CHECK: vucomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] - vucomiss (%eax), %xmm2 - -// CHECK: vcomiss %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] - vcomiss %xmm1, %xmm2 - -// CHECK: vcomiss (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] - vcomiss (%eax), %xmm2 - -// CHECK: vucomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] - vucomisd %xmm1, %xmm2 - -// CHECK: vucomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] - vucomisd (%eax), %xmm2 - -// CHECK: vcomisd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] - vcomisd %xmm1, %xmm2 - -// CHECK: vcomisd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] - vcomisd (%eax), %xmm2 - -// CHECK: vcvttss2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1] - vcvttss2si %xmm1, %eax - -// CHECK: vcvttss2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%ecx), %eax - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf2,0x2a,0x10] - vcvtsi2ss (%eax), %xmm1, %xmm2 - -// CHECK: vcvttsd2si %xmm1, %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1] - vcvttsd2si %xmm1, %eax - -// CHECK: vcvttsd2si (%ecx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%ecx), %eax - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0x2a,0x10] - vcvtsi2sd (%eax), %xmm1, %xmm2 - -// CHECK: vmovaps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0x10] - vmovaps (%eax), %xmm2 - -// CHECK: vmovaps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] - vmovaps %xmm1, %xmm2 - -// CHECK: vmovaps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x29,0x08] - vmovaps %xmm1, (%eax) - -// CHECK: vmovapd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0x10] - vmovapd (%eax), %xmm2 - -// CHECK: vmovapd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] - vmovapd %xmm1, %xmm2 - -// CHECK: vmovapd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x29,0x08] - vmovapd %xmm1, (%eax) - -// CHECK: vmovups (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0x10] - vmovups (%eax), %xmm2 - -// CHECK: vmovups %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] - vmovups %xmm1, %xmm2 - -// CHECK: vmovups %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x11,0x08] - vmovups %xmm1, (%eax) - -// CHECK: vmovupd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0x10] - vmovupd (%eax), %xmm2 - -// CHECK: vmovupd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] - vmovupd %xmm1, %xmm2 - -// CHECK: vmovupd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x11,0x08] - vmovupd %xmm1, (%eax) - -// CHECK: vmovlps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x13,0x08] - vmovlps %xmm1, (%eax) - -// CHECK: vmovlps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0x18] - vmovlps (%eax), %xmm2, %xmm3 - -// CHECK: vmovlpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x13,0x08] - vmovlpd %xmm1, (%eax) - -// CHECK: vmovlpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x12,0x18] - vmovlpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovhps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x17,0x08] - vmovhps %xmm1, (%eax) - -// CHECK: vmovhps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0x18] - vmovhps (%eax), %xmm2, %xmm3 - -// CHECK: vmovhpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x17,0x08] - vmovhpd %xmm1, (%eax) - -// CHECK: vmovhpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x16,0x18] - vmovhpd (%eax), %xmm2, %xmm3 - -// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] - vmovlhps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] - vmovhlps %xmm1, %xmm2, %xmm3 - -// CHECK: vcvtss2sil %xmm1, %eax -// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1] - vcvtss2si %xmm1, %eax - -// CHECK: vcvtss2sil (%eax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%eax), %ebx - -// CHECK: vcvtdq2ps %xmm5, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5] - vcvtdq2ps %xmm5, %xmm6 - -// CHECK: vcvtdq2ps (%eax), %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x30] - vcvtdq2ps (%eax), %xmm6 - -// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2] - vcvtsd2ss %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xdb,0x5a,0x30] - vcvtsd2ss (%eax), %xmm4, %xmm6 - -// CHECK: vcvtps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0xda] - vcvtps2dq %xmm2, %xmm3 - -// CHECK: vcvtps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5b,0x18] - vcvtps2dq (%eax), %xmm3 - -// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0xf2] - vcvtss2sd %xmm2, %xmm4, %xmm6 - -// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xda,0x5a,0x30] - vcvtss2sd (%eax), %xmm4, %xmm6 - -// CHECK: vcvtdq2ps %xmm4, %xmm6 -// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4] - vcvtdq2ps %xmm4, %xmm6 - -// CHECK: vcvtdq2ps (%ecx), %xmm4 -// CHECK: encoding: [0xc5,0xf8,0x5b,0x21] - vcvtdq2ps (%ecx), %xmm4 - -// CHECK: vcvttps2dq %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0xda] - vcvttps2dq %xmm2, %xmm3 - -// CHECK: vcvttps2dq (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x5b,0x18] - vcvttps2dq (%eax), %xmm3 - -// CHECK: vcvtps2pd %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0xda] - vcvtps2pd %xmm2, %xmm3 - -// CHECK: vcvtps2pd (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf8,0x5a,0x18] - vcvtps2pd (%eax), %xmm3 - -// CHECK: vcvtpd2ps %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xda] - vcvtpd2ps %xmm2, %xmm3 - -// CHECK: vsqrtpd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0xd1] - vsqrtpd %xmm1, %xmm2 - -// CHECK: vsqrtpd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf9,0x51,0x10] - vsqrtpd (%eax), %xmm2 - -// CHECK: vsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0xd1] - vsqrtps %xmm1, %xmm2 - -// CHECK: vsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x51,0x10] - vsqrtps (%eax), %xmm2 - -// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0xd9] - vsqrtsd %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x51,0x18] - vsqrtsd (%eax), %xmm2, %xmm3 - -// CHECK: vsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0xd9] - vsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x51,0x18] - vsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrsqrtps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0xd1] - vrsqrtps %xmm1, %xmm2 - -// CHECK: vrsqrtps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x52,0x10] - vrsqrtps (%eax), %xmm2 - -// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0xd9] - vrsqrtss %xmm1, %xmm2, %xmm3 - -// CHECK: vrsqrtss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x52,0x18] - vrsqrtss (%eax), %xmm2, %xmm3 - -// CHECK: vrcpps %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0xd1] - vrcpps %xmm1, %xmm2 - -// CHECK: vrcpps (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xf8,0x53,0x10] - vrcpps (%eax), %xmm2 - -// CHECK: vrcpss %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0xd9] - vrcpss %xmm1, %xmm2, %xmm3 - -// CHECK: vrcpss (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xea,0x53,0x18] - vrcpss (%eax), %xmm2, %xmm3 - -// CHECK: vmovntdq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xe7,0x08] - vmovntdq %xmm1, (%eax) - -// CHECK: vmovntpd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x2b,0x08] - vmovntpd %xmm1, (%eax) - -// CHECK: vmovntps %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf8,0x2b,0x08] - vmovntps %xmm1, (%eax) - -// CHECK: vldmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x10] - vldmxcsr (%eax) - -// CHECK: vstmxcsr (%eax) -// CHECK: encoding: [0xc5,0xf8,0xae,0x18] - vstmxcsr (%eax) - -// CHECK: vldmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde] - vldmxcsr 0xdeadbeef - -// CHECK: vstmxcsr 3735928559 -// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde] - vstmxcsr 0xdeadbeef - -// CHECK: vpsubb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9] - vpsubb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf8,0x18] - vpsubb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9] - vpsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf9,0x18] - vpsubw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9] - vpsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfa,0x18] - vpsubd (%eax), %xmm2, %xmm3 - -// CHECK: vpsubq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9] - vpsubq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfb,0x18] - vpsubq (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9] - vpsubsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe8,0x18] - vpsubsb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9] - vpsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe9,0x18] - vpsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9] - vpsubusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd8,0x18] - vpsubusb (%eax), %xmm2, %xmm3 - -// CHECK: vpsubusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9] - vpsubusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsubusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd9,0x18] - vpsubusw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9] - vpaddb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfc,0x18] - vpaddb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9] - vpaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfd,0x18] - vpaddw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9] - vpaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xfe,0x18] - vpaddd (%eax), %xmm2, %xmm3 - -// CHECK: vpaddq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9] - vpaddq %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd4,0x18] - vpaddq (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0xd9] - vpaddsb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xec,0x18] - vpaddsb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0xd9] - vpaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xed,0x18] - vpaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9] - vpaddusb %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdc,0x18] - vpaddusb (%eax), %xmm2, %xmm3 - -// CHECK: vpaddusw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9] - vpaddusw %xmm1, %xmm2, %xmm3 - -// CHECK: vpaddusw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdd,0x18] - vpaddusw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9] - vpmulhuw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe4,0x18] - vpmulhuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9] - vpmulhw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe5,0x18] - vpmulhw (%eax), %xmm2, %xmm3 - -// CHECK: vpmullw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9] - vpmullw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmullw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd5,0x18] - vpmullw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuludq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9] - vpmuludq %xmm1, %xmm2, %xmm3 - -// CHECK: vpmuludq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf4,0x18] - vpmuludq (%eax), %xmm2, %xmm3 - -// CHECK: vpavgb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9] - vpavgb %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe0,0x18] - vpavgb (%eax), %xmm2, %xmm3 - -// CHECK: vpavgw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9] - vpavgw %xmm1, %xmm2, %xmm3 - -// CHECK: vpavgw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe3,0x18] - vpavgw (%eax), %xmm2, %xmm3 - -// CHECK: vpminsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0xd9] - vpminsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpminsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xea,0x18] - vpminsw (%eax), %xmm2, %xmm3 - -// CHECK: vpminub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0xd9] - vpminub %xmm1, %xmm2, %xmm3 - -// CHECK: vpminub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xda,0x18] - vpminub (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0xd9] - vpmaxsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xee,0x18] - vpmaxsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxub %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0xd9] - vpmaxub %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaxub (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xde,0x18] - vpmaxub (%eax), %xmm2, %xmm3 - -// CHECK: vpsadbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9] - vpsadbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsadbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf6,0x18] - vpsadbw (%eax), %xmm2, %xmm3 - -// CHECK: vpsllw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] - vpsllw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] - vpsllw (%eax), %xmm2, %xmm3 - -// CHECK: vpslld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] - vpslld %xmm1, %xmm2, %xmm3 - -// CHECK: vpslld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] - vpslld (%eax), %xmm2, %xmm3 - -// CHECK: vpsllq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] - vpsllq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsllq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] - vpsllq (%eax), %xmm2, %xmm3 - -// CHECK: vpsraw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] - vpsraw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsraw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] - vpsraw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrad %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] - vpsrad %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrad (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] - vpsrad (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] - vpsrlw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] - vpsrlw (%eax), %xmm2, %xmm3 - -// CHECK: vpsrld %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] - vpsrld %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrld (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] - vpsrld (%eax), %xmm2, %xmm3 - -// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] - vpsrlq %xmm1, %xmm2, %xmm3 - -// CHECK: vpsrlq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] - vpsrlq (%eax), %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpslldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] - vpslldq $10, %xmm2, %xmm3 - -// CHECK: vpsllq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] - vpsllq $10, %xmm2, %xmm3 - -// CHECK: vpsllw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] - vpsllw $10, %xmm2, %xmm3 - -// CHECK: vpsrad $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] - vpsrad $10, %xmm2, %xmm3 - -// CHECK: vpsraw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] - vpsraw $10, %xmm2, %xmm3 - -// CHECK: vpsrld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] - vpsrld $10, %xmm2, %xmm3 - -// CHECK: vpsrldq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] - vpsrldq $10, %xmm2, %xmm3 - -// CHECK: vpsrlq $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] - vpsrlq $10, %xmm2, %xmm3 - -// CHECK: vpsrlw $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] - vpsrlw $10, %xmm2, %xmm3 - -// CHECK: vpslld $10, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] - vpslld $10, %xmm2, %xmm3 - -// CHECK: vpand %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] - vpand %xmm1, %xmm2, %xmm3 - -// CHECK: vpand (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] - vpand (%eax), %xmm2, %xmm3 - -// CHECK: vpor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] - vpor %xmm1, %xmm2, %xmm3 - -// CHECK: vpor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] - vpor (%eax), %xmm2, %xmm3 - -// CHECK: vpxor %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] - vpxor %xmm1, %xmm2, %xmm3 - -// CHECK: vpxor (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xef,0x18] - vpxor (%eax), %xmm2, %xmm3 - -// CHECK: vpandn %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] - vpandn %xmm1, %xmm2, %xmm3 - -// CHECK: vpandn (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] - vpandn (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0xd9] - vpcmpeqb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x74,0x18] - vpcmpeqb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0xd9] - vpcmpeqw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x75,0x18] - vpcmpeqw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0xd9] - vpcmpeqd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x76,0x18] - vpcmpeqd (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0xd9] - vpcmpgtb %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x64,0x18] - vpcmpgtb (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0xd9] - vpcmpgtw %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x65,0x18] - vpcmpgtw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0xd9] - vpcmpgtd %xmm1, %xmm2, %xmm3 - -// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x66,0x18] - vpcmpgtd (%eax), %xmm2, %xmm3 - -// CHECK: vpacksswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0xd9] - vpacksswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpacksswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x63,0x18] - vpacksswb (%eax), %xmm2, %xmm3 - -// CHECK: vpackssdw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9] - vpackssdw %xmm1, %xmm2, %xmm3 - -// CHECK: vpackssdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6b,0x18] - vpackssdw (%eax), %xmm2, %xmm3 - -// CHECK: vpackuswb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0xd9] - vpackuswb %xmm1, %xmm2, %xmm3 - -// CHECK: vpackuswb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x67,0x18] - vpackuswb (%eax), %xmm2, %xmm3 - -// CHECK: vpshufd $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04] - vpshufd $4, %xmm2, %xmm3 - -// CHECK: vpshufd $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04] - vpshufd $4, (%eax), %xmm3 - -// CHECK: vpshufhw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04] - vpshufhw $4, %xmm2, %xmm3 - -// CHECK: vpshufhw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04] - vpshufhw $4, (%eax), %xmm3 - -// CHECK: vpshuflw $4, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04] - vpshuflw $4, %xmm2, %xmm3 - -// CHECK: vpshuflw $4, (%eax), %xmm3 -// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] - vpshuflw $4, (%eax), %xmm3 - -// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] - vpunpcklbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x60,0x18] - vpunpcklbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] - vpunpcklwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x61,0x18] - vpunpcklwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] - vpunpckldq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x62,0x18] - vpunpckldq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] - vpunpcklqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] - vpunpcklqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] - vpunpckhbw %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x68,0x18] - vpunpckhbw (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] - vpunpckhwd %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x69,0x18] - vpunpckhwd (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] - vpunpckhdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] - vpunpckhdq (%eax), %xmm2, %xmm3 - -// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] - vpunpckhqdq %xmm1, %xmm2, %xmm3 - -// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] - vpunpckhqdq (%eax), %xmm2, %xmm3 - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm3 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm3 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpmovmskb %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1] - vpmovmskb %xmm1, %eax - -// CHECK: vmaskmovdqu %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1] - vmaskmovdqu %xmm1, %xmm2 - -// CHECK: vmovd %xmm1, %eax -// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8] - vmovd %xmm1, %eax - -// CHECK: vmovd %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0x7e,0x08] - vmovd %xmm1, (%eax) - -// CHECK: vmovd %eax, %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8] - vmovd %eax, %xmm1 - -// CHECK: vmovd (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x6e,0x08] - vmovd (%eax), %xmm1 - -// CHECK: vmovq %xmm1, (%eax) -// CHECK: encoding: [0xc5,0xf9,0xd6,0x08] - vmovq %xmm1, (%eax) - -// CHECK: vmovq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1] - vmovq %xmm1, %xmm2 - -// CHECK: vmovq (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfa,0x7e,0x08] - vmovq (%eax), %xmm1 - -// CHECK: vcvtpd2dq %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1] - vcvtpd2dq %xmm1, %xmm2 - -// CHECK: vcvtdq2pd %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1] - vcvtdq2pd %xmm1, %xmm2 - -// CHECK: vcvtdq2pd (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0xe6,0x10] - vcvtdq2pd (%eax), %xmm2 - -// CHECK: vmovshdup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0xd1] - vmovshdup %xmm1, %xmm2 - -// CHECK: vmovshdup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x16,0x10] - vmovshdup (%eax), %xmm2 - -// CHECK: vmovsldup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0xd1] - vmovsldup %xmm1, %xmm2 - -// CHECK: vmovsldup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfa,0x12,0x10] - vmovsldup (%eax), %xmm2 - -// CHECK: vmovddup %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0xd1] - vmovddup %xmm1, %xmm2 - -// CHECK: vmovddup (%eax), %xmm2 -// CHECK: encoding: [0xc5,0xfb,0x12,0x10] - vmovddup (%eax), %xmm2 - -// CHECK: vaddsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9] - vaddsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubps (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf3,0xd0,0x10] - vaddsubps (%eax), %xmm1, %xmm2 - -// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9] - vaddsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vaddsubpd (%eax), %xmm1, %xmm2 -// CHECK: encoding: [0xc5,0xf1,0xd0,0x10] - vaddsubpd (%eax), %xmm1, %xmm2 - -// CHECK: vhaddps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] - vhaddps %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] - vhaddps (%eax), %xmm2, %xmm3 - -// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] - vhaddpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhaddpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] - vhaddpd (%eax), %xmm2, %xmm3 - -// CHECK: vhsubps %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] - vhsubps %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubps (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] - vhsubps (%eax), %xmm2, %xmm3 - -// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] - vhsubpd %xmm1, %xmm2, %xmm3 - -// CHECK: vhsubpd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] - vhsubpd (%eax), %xmm2, %xmm3 - -// CHECK: vpabsb %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] - vpabsb %xmm1, %xmm2 - -// CHECK: vpabsb (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] - vpabsb (%eax), %xmm2 - -// CHECK: vpabsw %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] - vpabsw %xmm1, %xmm2 - -// CHECK: vpabsw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] - vpabsw (%eax), %xmm2 - -// CHECK: vpabsd %xmm1, %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] - vpabsd %xmm1, %xmm2 - -// CHECK: vpabsd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] - vpabsd (%eax), %xmm2 - -// CHECK: vphaddw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] - vphaddw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] - vphaddw (%eax), %xmm2, %xmm3 - -// CHECK: vphaddd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] - vphaddd %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] - vphaddd (%eax), %xmm2, %xmm3 - -// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] - vphaddsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphaddsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] - vphaddsw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] - vphsubw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] - vphsubw (%eax), %xmm2, %xmm3 - -// CHECK: vphsubd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] - vphsubd %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] - vphsubd (%eax), %xmm2, %xmm3 - -// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] - vphsubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vphsubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] - vphsubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] - vpmaddubsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] - vpmaddubsw (%eax), %xmm2, %xmm3 - -// CHECK: vpshufb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] - vpshufb %xmm1, %xmm2, %xmm3 - -// CHECK: vpshufb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] - vpshufb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignb %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] - vpsignb %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] - vpsignb (%eax), %xmm2, %xmm3 - -// CHECK: vpsignw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] - vpsignw %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] - vpsignw (%eax), %xmm2, %xmm3 - -// CHECK: vpsignd %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] - vpsignd %xmm1, %xmm2, %xmm3 - -// CHECK: vpsignd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] - vpsignd (%eax), %xmm2, %xmm3 - -// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] - vpmulhrsw %xmm1, %xmm2, %xmm3 - -// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] - vpmulhrsw (%eax), %xmm2, %xmm3 - -// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] - vpalignr $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] - vpalignr $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07] - vroundsd $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07] - vroundsd $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07] - vroundss $7, %xmm1, %xmm2, %xmm3 - -// CHECK: vroundss $7, (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07] - vroundss $7, (%eax), %xmm2, %xmm3 - -// CHECK: vroundpd $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07] - vroundpd $7, %xmm2, %xmm3 - -// CHECK: vroundpd $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07] - vroundpd $7, (%eax), %xmm3 - -// CHECK: vroundps $7, %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07] - vroundps $7, %xmm2, %xmm3 - -// CHECK: vroundps $7, (%eax), %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07] - vroundps $7, (%eax), %xmm3 - -// CHECK: vphminposuw %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda] - vphminposuw %xmm2, %xmm3 - -// CHECK: vphminposuw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] - vphminposuw (%eax), %xmm2 - -// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] - vpackusdw %xmm2, %xmm3, %xmm1 - -// CHECK: vpackusdw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] - vpackusdw (%eax), %xmm2, %xmm3 - -// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] - vpcmpeqq %xmm2, %xmm3, %xmm1 - -// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] - vpcmpeqq (%eax), %xmm2, %xmm3 - -// CHECK: vpminsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] - vpminsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] - vpminsb (%eax), %xmm2, %xmm3 - -// CHECK: vpminsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] - vpminsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpminsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] - vpminsd (%eax), %xmm2, %xmm3 - -// CHECK: vpminud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] - vpminud %xmm2, %xmm3, %xmm1 - -// CHECK: vpminud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] - vpminud (%eax), %xmm2, %xmm3 - -// CHECK: vpminuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] - vpminuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpminuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] - vpminuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] - vpmaxsb %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] - vpmaxsb (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] - vpmaxsd %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] - vpmaxsd (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] - vpmaxud %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxud (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] - vpmaxud (%eax), %xmm2, %xmm3 - -// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] - vpmaxuw %xmm2, %xmm3, %xmm1 - -// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] - vpmaxuw (%eax), %xmm2, %xmm3 - -// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] - vpmuldq %xmm2, %xmm3, %xmm1 - -// CHECK: vpmuldq (%eax), %xmm2, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] - vpmuldq (%eax), %xmm2, %xmm3 - -// CHECK: vpmulld %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca] - vpmulld %xmm2, %xmm5, %xmm1 - -// CHECK: vpmulld (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18] - vpmulld (%eax), %xmm5, %xmm3 - -// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03] - vblendps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03] - vblendps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03] - vblendpd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03] - vblendpd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03] - vpblendw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03] - vpblendw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03] - vmpsadbw $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03] - vmpsadbw $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03] - vdpps $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdpps $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03] - vdpps $3, (%eax), %xmm5, %xmm1 - -// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03] - vdppd $3, %xmm2, %xmm5, %xmm1 - -// CHECK: vdppd $3, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03] - vdppd $3, (%eax), %xmm5, %xmm1 - -// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20] - vblendvpd %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20] - vblendvpd %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20] - vblendvps %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20] - vblendvps %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20] - vpblendvb %xmm2, %xmm5, %xmm1, %xmm3 - -// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3 -// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20] - vpblendvb %xmm2, (%eax), %xmm1, %xmm3 - -// CHECK: vpmovsxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea] - vpmovsxbw %xmm2, %xmm5 - -// CHECK: vpmovsxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10] - vpmovsxbw (%eax), %xmm2 - -// CHECK: vpmovsxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea] - vpmovsxwd %xmm2, %xmm5 - -// CHECK: vpmovsxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10] - vpmovsxwd (%eax), %xmm2 - -// CHECK: vpmovsxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea] - vpmovsxdq %xmm2, %xmm5 - -// CHECK: vpmovsxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10] - vpmovsxdq (%eax), %xmm2 - -// CHECK: vpmovzxbw %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea] - vpmovzxbw %xmm2, %xmm5 - -// CHECK: vpmovzxbw (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10] - vpmovzxbw (%eax), %xmm2 - -// CHECK: vpmovzxwd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea] - vpmovzxwd %xmm2, %xmm5 - -// CHECK: vpmovzxwd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10] - vpmovzxwd (%eax), %xmm2 - -// CHECK: vpmovzxdq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea] - vpmovzxdq %xmm2, %xmm5 - -// CHECK: vpmovzxdq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10] - vpmovzxdq (%eax), %xmm2 - -// CHECK: vpmovsxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea] - vpmovsxbq %xmm2, %xmm5 - -// CHECK: vpmovsxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10] - vpmovsxbq (%eax), %xmm2 - -// CHECK: vpmovzxbq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea] - vpmovzxbq %xmm2, %xmm5 - -// CHECK: vpmovzxbq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10] - vpmovzxbq (%eax), %xmm2 - -// CHECK: vpmovsxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea] - vpmovsxbd %xmm2, %xmm5 - -// CHECK: vpmovsxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10] - vpmovsxbd (%eax), %xmm2 - -// CHECK: vpmovsxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea] - vpmovsxwq %xmm2, %xmm5 - -// CHECK: vpmovsxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10] - vpmovsxwq (%eax), %xmm2 - -// CHECK: vpmovzxbd %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea] - vpmovzxbd %xmm2, %xmm5 - -// CHECK: vpmovzxbd (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10] - vpmovzxbd (%eax), %xmm2 - -// CHECK: vpmovzxwq %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea] - vpmovzxwq %xmm2, %xmm5 - -// CHECK: vpmovzxwq (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10] - vpmovzxwq (%eax), %xmm2 - -// CHECK: vpextrw $7, %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07] - vpextrw $7, %xmm2, %eax - -// CHECK: vpextrw $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07] - vpextrw $7, %xmm2, (%eax) - -// CHECK: vpextrd $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07] - vpextrd $7, %xmm2, %eax - -// CHECK: vpextrd $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07] - vpextrd $7, %xmm2, (%eax) - -// CHECK: vpextrb $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07] - vpextrb $7, %xmm2, %eax - -// CHECK: vpextrb $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] - vpextrb $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, (%eax) -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] - vextractps $7, %xmm2, (%eax) - -// CHECK: vextractps $7, %xmm2, %eax -// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] - vextractps $7, %xmm2, %eax - -// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] - vpinsrw $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] - vpinsrb $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] - vpinsrb $7, (%eax), %xmm2, %xmm5 - -// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] - vpinsrd $7, %eax, %xmm2, %xmm5 - -// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] - vpinsrd $7, (%eax), %xmm2, %xmm5 - -// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] - vinsertps $7, %xmm2, %xmm5, %xmm1 - -// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] - vinsertps $7, (%eax), %xmm5, %xmm1 - -// CHECK: vptest %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] - vptest %xmm2, %xmm5 - -// CHECK: vptest (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] - vptest (%eax), %xmm2 - -// CHECK: vmovntdqa (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] - vmovntdqa (%eax), %xmm2 - -// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] - vpcmpgtq %xmm2, %xmm5, %xmm1 - -// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] - vpcmpgtq (%eax), %xmm5, %xmm3 - -// CHECK: vpcmpistrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] - vpcmpistrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpistrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] - vpcmpistrm $7, (%eax), %xmm5 - -// CHECK: vpcmpestrm $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] - vpcmpestrm $7, %xmm2, %xmm5 - -// CHECK: vpcmpestrm $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] - vpcmpestrm $7, (%eax), %xmm5 - -// CHECK: vpcmpistri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] - vpcmpistri $7, %xmm2, %xmm5 - -// CHECK: vpcmpistri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] - vpcmpistri $7, (%eax), %xmm5 - -// CHECK: vpcmpestri $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] - vpcmpestri $7, %xmm2, %xmm5 - -// CHECK: vpcmpestri $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] - vpcmpestri $7, (%eax), %xmm5 - -// CHECK: vaesimc %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea] - vaesimc %xmm2, %xmm5 - -// CHECK: vaesimc (%eax), %xmm2 -// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10] - vaesimc (%eax), %xmm2 - -// CHECK: vaesenc %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca] - vaesenc %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenc (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18] - vaesenc (%eax), %xmm5, %xmm3 - -// CHECK: vaesenclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca] - vaesenclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesenclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18] - vaesenclast (%eax), %xmm5, %xmm3 - -// CHECK: vaesdec %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca] - vaesdec %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdec (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18] - vaesdec (%eax), %xmm5, %xmm3 - -// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca] - vaesdeclast %xmm2, %xmm5, %xmm1 - -// CHECK: vaesdeclast (%eax), %xmm5, %xmm3 -// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18] - vaesdeclast (%eax), %xmm5, %xmm3 - -// CHECK: vaeskeygenassist $7, %xmm2, %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07] - vaeskeygenassist $7, %xmm2, %xmm5 - -// CHECK: vaeskeygenassist $7, (%eax), %xmm5 -// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07] - vaeskeygenassist $7, (%eax), %xmm5 - -// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08] - vcmpeq_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09] - vcmpngeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a] - vcmpngtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b] - vcmpfalseps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c] - vcmpneq_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d] - vcmpgeps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e] - vcmpgtps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f] - vcmptrueps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10] - vcmpeq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11] - vcmplt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12] - vcmple_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13] - vcmpunord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14] - vcmpneq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15] - vcmpnlt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16] - vcmpnle_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17] - vcmpord_sps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18] - vcmpeq_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19] - vcmpnge_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a] - vcmpngt_uqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b] - vcmpfalse_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c] - vcmpneq_osps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d] - vcmpge_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e] - vcmpgt_oqps %xmm1, %xmm2, %xmm3 - -// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 -// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] - vcmptrue_usps %xmm1, %xmm2, %xmm3 - -// CHECK: vmovaps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0x10] - vmovaps (%eax), %ymm2 - -// CHECK: vmovaps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] - vmovaps %ymm1, %ymm2 - -// CHECK: vmovaps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x29,0x08] - vmovaps %ymm1, (%eax) - -// CHECK: vmovapd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0x10] - vmovapd (%eax), %ymm2 - -// CHECK: vmovapd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] - vmovapd %ymm1, %ymm2 - -// CHECK: vmovapd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x29,0x08] - vmovapd %ymm1, (%eax) - -// CHECK: vmovups (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0x10] - vmovups (%eax), %ymm2 - -// CHECK: vmovups %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] - vmovups %ymm1, %ymm2 - -// CHECK: vmovups %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x11,0x08] - vmovups %ymm1, (%eax) - -// CHECK: vmovupd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0x10] - vmovupd (%eax), %ymm2 - -// CHECK: vmovupd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] - vmovupd %ymm1, %ymm2 - -// CHECK: vmovupd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x11,0x08] - vmovupd %ymm1, (%eax) - -// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x15,0xe1] - vunpckhps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x15,0xe1] - vunpckhpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xec,0x14,0xe1] - vunpcklps %ymm1, %ymm2, %ymm4 - -// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 -// CHECK: encoding: [0xc5,0xed,0x14,0xe1] - vunpcklpd %ymm1, %ymm2, %ymm4 - -// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] - vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] - vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] - vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] - vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vmovntdq %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0xe7,0x08] - vmovntdq %ymm1, (%eax) - -// CHECK: vmovntpd %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfd,0x2b,0x08] - vmovntpd %ymm1, (%eax) - -// CHECK: vmovntps %ymm1, (%eax) -// CHECK: encoding: [0xc5,0xfc,0x2b,0x08] - vmovntps %ymm1, (%eax) - -// CHECK: vmovmskps %xmm2, %eax -// CHECK: encoding: [0xc5,0xf8,0x50,0xc2] - vmovmskps %xmm2, %eax - -// CHECK: vmovmskpd %xmm2, %eax -// CHECK: encoding: [0xc5,0xf9,0x50,0xc2] - vmovmskpd %xmm2, %eax - -// CHECK: vmaxps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2] - vmaxps %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2] - vmaxpd %ymm2, %ymm4, %ymm6 - -// CHECK: vminps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2] - vminps %ymm2, %ymm4, %ymm6 - -// CHECK: vminpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2] - vminpd %ymm2, %ymm4, %ymm6 - -// CHECK: vsubps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2] - vsubps %ymm2, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2] - vsubpd %ymm2, %ymm4, %ymm6 - -// CHECK: vdivps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2] - vdivps %ymm2, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2] - vdivpd %ymm2, %ymm4, %ymm6 - -// CHECK: vaddps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0xf2] - vaddps %ymm2, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0xf2] - vaddpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmulps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0xf2] - vmulps %ymm2, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0xf2] - vmulpd %ymm2, %ymm4, %ymm6 - -// CHECK: vmaxps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%eax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%eax), %ymm4, %ymm6 - -// CHECK: vminps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%eax), %ymm4, %ymm6 - -// CHECK: vminpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%eax), %ymm4, %ymm6 - -// CHECK: vsubps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%eax), %ymm4, %ymm6 - -// CHECK: vsubpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%eax), %ymm4, %ymm6 - -// CHECK: vdivps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%eax), %ymm4, %ymm6 - -// CHECK: vdivpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%eax), %ymm4, %ymm6 - -// CHECK: vaddps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%eax), %ymm4, %ymm6 - -// CHECK: vaddpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%eax), %ymm4, %ymm6 - -// CHECK: vmulps (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%eax), %ymm4, %ymm6 - -// CHECK: vmulpd (%eax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%eax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0xd1] - vsqrtpd %ymm1, %ymm2 - -// CHECK: vsqrtpd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfd,0x51,0x10] - vsqrtpd (%eax), %ymm2 - -// CHECK: vsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0xd1] - vsqrtps %ymm1, %ymm2 - -// CHECK: vsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x51,0x10] - vsqrtps (%eax), %ymm2 - -// CHECK: vrsqrtps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0xd1] - vrsqrtps %ymm1, %ymm2 - -// CHECK: vrsqrtps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x52,0x10] - vrsqrtps (%eax), %ymm2 - -// CHECK: vrcpps %ymm1, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0xd1] - vrcpps %ymm1, %ymm2 - -// CHECK: vrcpps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x53,0x10] - vrcpps (%eax), %ymm2 - -// CHECK: vandps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x54,0xf2] - vandps %ymm2, %ymm4, %ymm6 - -// CHECK: vandpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x54,0xf2] - vandpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc] - vandps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc] - vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x56,0xf2] - vorps %ymm2, %ymm4, %ymm6 - -// CHECK: vorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x56,0xf2] - vorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc] - vorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc] - vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x57,0xf2] - vxorps %ymm2, %ymm4, %ymm6 - -// CHECK: vxorpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x57,0xf2] - vxorpd %ymm2, %ymm4, %ymm6 - -// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc] - vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc] - vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnps %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x55,0xf2] - vandnps %ymm2, %ymm4, %ymm6 - -// CHECK: vandnpd %ymm2, %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x55,0xf2] - vandnpd %ymm2, %ymm4, %ymm6 - -// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc] - vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc] - vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5 - -// CHECK: vcvtps2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3] - vcvtps2pd %xmm3, %ymm2 - -// CHECK: vcvtps2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5a,0x10] - vcvtps2pd (%eax), %ymm2 - -// CHECK: vcvtdq2pd %xmm3, %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3] - vcvtdq2pd %xmm3, %ymm2 - -// CHECK: vcvtdq2pd (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfe,0xe6,0x10] - vcvtdq2pd (%eax), %ymm2 - -// CHECK: vcvtdq2ps %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfc,0x5b,0xea] - vcvtdq2ps %ymm2, %ymm5 - -// CHECK: vcvtdq2ps (%eax), %ymm2 -// CHECK: encoding: [0xc5,0xfc,0x5b,0x10] - vcvtdq2ps (%eax), %ymm2 - -// CHECK: vcvtps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0xea] - vcvtps2dq %ymm2, %ymm5 - -// CHECK: vcvtps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfd,0x5b,0x28] - vcvtps2dq (%eax), %ymm5 - -// CHECK: vcvttps2dq %ymm2, %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0xea] - vcvttps2dq %ymm2, %ymm5 - -// CHECK: vcvttps2dq (%eax), %ymm5 -// CHECK: encoding: [0xc5,0xfe,0x5b,0x28] - vcvttps2dq (%eax), %ymm5 - -// CHECK: vcvttpd2dq %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dq %xmm1, %xmm5 - -// CHECK: vcvttpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xea] - vcvttpd2dq %ymm2, %xmm5 - -// CHECK: vcvttpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] - vcvttpd2dqx %xmm1, %xmm5 - -// CHECK: vcvttpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0xe6,0x08] - vcvttpd2dqx (%eax), %xmm1 - -// CHECK: vcvttpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0xca] - vcvttpd2dqy %ymm2, %xmm1 - -// CHECK: vcvttpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0xe6,0x08] - vcvttpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2ps %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xea] - vcvtpd2ps %ymm2, %xmm5 - -// CHECK: vcvtpd2psx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] - vcvtpd2psx %xmm1, %xmm5 - -// CHECK: vcvtpd2psx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xf9,0x5a,0x08] - vcvtpd2psx (%eax), %xmm1 - -// CHECK: vcvtpd2psy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0xca] - vcvtpd2psy %ymm2, %xmm1 - -// CHECK: vcvtpd2psy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfd,0x5a,0x08] - vcvtpd2psy (%eax), %xmm1 - -// CHECK: vcvtpd2dq %ymm2, %xmm5 -// CHECK: encoding: [0xc5,0xff,0xe6,0xea] - vcvtpd2dq %ymm2, %xmm5 - -// CHECK: vcvtpd2dqy %ymm2, %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0xca] - vcvtpd2dqy %ymm2, %xmm1 - -// CHECK: vcvtpd2dqy (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xff,0xe6,0x08] - vcvtpd2dqy (%eax), %xmm1 - -// CHECK: vcvtpd2dqx %xmm1, %xmm5 -// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] - vcvtpd2dqx %xmm1, %xmm5 - -// CHECK: vcvtpd2dqx (%eax), %xmm1 -// CHECK: encoding: [0xc5,0xfb,0xe6,0x08] - vcvtpd2dqx (%eax), %xmm1 - -// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00] - vcmpeqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02] - vcmpleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01] - vcmpltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04] - vcmpneqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06] - vcmpnleps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05] - vcmpnltps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07] - vcmpordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03] - vcmpunordps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02] - vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00] - vcmpeqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02] - vcmplepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01] - vcmpltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04] - vcmpneqpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06] - vcmpnlepd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05] - vcmpnltpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07] - vcmpordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03] - vcmpunordpd %ymm1, %ymm2, %ymm3 - -// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00] - vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02] - vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01] - vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04] - vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06] - vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05] - vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2 -// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2 - -// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03] - vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3 - -// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08] - vcmpeq_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09] - vcmpngeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a] - vcmpngtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b] - vcmpfalseps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c] - vcmpneq_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d] - vcmpgeps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e] - vcmpgtps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f] - vcmptrueps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10] - vcmpeq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11] - vcmplt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12] - vcmple_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13] - vcmpunord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14] - vcmpneq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15] - vcmpnlt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16] - vcmpnle_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17] - vcmpord_sps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18] - vcmpeq_usps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19] - vcmpnge_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a] - vcmpngt_uqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b] - vcmpfalse_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c] - vcmpneq_osps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d] - vcmpge_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e] - vcmpgt_oqps %ymm1, %ymm2, %ymm3 - -// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3 -// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f] - vcmptrue_usps %ymm1, %ymm2, %ymm3 +// CHECK: movl (%ecx,%eiz,8), %eax +// CHECK: encoding: [0x8b,0x04,0xe1] + movl (%ecx,%eiz,8), %eax diff --git a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s new file mode 100644 index 0000000..db7efec --- /dev/null +++ b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca] + vfmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08] + vfmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca] + vfmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08] + vfmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca] + vfmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08] + vfmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca] + vfmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08] + vfmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca] + vfmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08] + vfmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca] + vfmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08] + vfmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca] + vfmaddsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08] + vfmaddsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca] + vfmaddsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08] + vfmaddsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca] + vfmaddsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08] + vfmaddsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca] + vfmaddsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08] + vfmaddsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca] + vfmaddsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08] + vfmaddsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca] + vfmaddsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmaddsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08] + vfmaddsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca] + vfmsubadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08] + vfmsubadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca] + vfmsubadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08] + vfmsubadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca] + vfmsubadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08] + vfmsubadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca] + vfmsubadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08] + vfmsubadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca] + vfmsubadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08] + vfmsubadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca] + vfmsubadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsubadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08] + vfmsubadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca] + vfmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08] + vfmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca] + vfmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08] + vfmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca] + vfmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08] + vfmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca] + vfmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08] + vfmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca] + vfmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08] + vfmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca] + vfmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08] + vfmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca] + vfnmadd132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08] + vfnmadd132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca] + vfnmadd132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08] + vfnmadd132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca] + vfnmadd213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08] + vfnmadd213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca] + vfnmadd213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08] + vfnmadd213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca] + vfnmadd231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08] + vfnmadd231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmadd231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca] + vfnmadd231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmadd231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08] + vfnmadd231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca] + vfnmsub132pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08] + vfnmsub132pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub132ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca] + vfnmsub132ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub132ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08] + vfnmsub132ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca] + vfnmsub213pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08] + vfnmsub213pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub213ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca] + vfnmsub213ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub213ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08] + vfnmsub213ps (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231pd %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca] + vfnmsub231pd %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231pd (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08] + vfnmsub231pd (%eax), %xmm5, %xmm1 + +// CHECK: vfnmsub231ps %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca] + vfnmsub231ps %xmm2, %xmm5, %xmm1 + +// CHECK: vfnmsub231ps (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08] + vfnmsub231ps (%eax), %xmm5, %xmm1 + +// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca] + vfmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08] + vfmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca] + vfmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08] + vfmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca] + vfmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08] + vfmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca] + vfmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08] + vfmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca] + vfmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08] + vfmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca] + vfmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08] + vfmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca] + vfmaddsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08] + vfmaddsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca] + vfmaddsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08] + vfmaddsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca] + vfmaddsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08] + vfmaddsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca] + vfmaddsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08] + vfmaddsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca] + vfmaddsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08] + vfmaddsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca] + vfmaddsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmaddsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08] + vfmaddsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca] + vfmsubadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08] + vfmsubadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca] + vfmsubadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08] + vfmsubadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca] + vfmsubadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08] + vfmsubadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca] + vfmsubadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08] + vfmsubadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca] + vfmsubadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08] + vfmsubadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca] + vfmsubadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsubadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08] + vfmsubadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca] + vfmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08] + vfmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca] + vfmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08] + vfmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca] + vfmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08] + vfmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca] + vfmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08] + vfmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca] + vfmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08] + vfmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca] + vfmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08] + vfmsub231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca] + vfnmadd132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08] + vfnmadd132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca] + vfnmadd132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08] + vfnmadd132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca] + vfnmadd213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08] + vfnmadd213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca] + vfnmadd213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08] + vfnmadd213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca] + vfnmadd231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08] + vfnmadd231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmadd231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca] + vfnmadd231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmadd231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08] + vfnmadd231ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca] + vfnmsub132pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08] + vfnmsub132pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub132ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca] + vfnmsub132ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub132ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08] + vfnmsub132ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca] + vfnmsub213pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08] + vfnmsub213pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub213ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca] + vfnmsub213ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub213ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08] + vfnmsub213ps (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231pd %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca] + vfnmsub231pd %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231pd (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08] + vfnmsub231pd (%eax), %ymm5, %ymm1 + +// CHECK: vfnmsub231ps %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca] + vfnmsub231ps %ymm2, %ymm5, %ymm1 + +// CHECK: vfnmsub231ps (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08] + vfnmsub231ps (%eax), %ymm5, %ymm1 + diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s index e97e494..e3aa188 100644 --- a/test/MC/AsmParser/X86/x86_32-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_32-new-encoder.s @@ -415,3 +415,11 @@ retl // CHECK: encoding: [0x61] popal +// CHECK: jmpl *8(%eax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%eax) + +// PR7465 +// CHECK: lcalll $2, $4660 +// CHECK: encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00] +lcalll $0x2, $0x1234 diff --git a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s new file mode 100644 index 0000000..67e82c6 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s @@ -0,0 +1,42 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulhqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulhqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $1, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01] + vpclmulhqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $1, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01] + vpclmulhqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $16, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10] + vpclmullqhqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $16, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10] + vpclmullqhqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $0, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00] + vpclmullqlqdq %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $0, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00] + vpclmullqlqdq (%rax), %xmm10, %xmm13 + +// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11] + vpclmulqdq $17, %xmm12, %xmm10, %xmm11 + +// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11] + vpclmulqdq $17, (%rax), %xmm10, %xmm13 + diff --git a/test/MC/AsmParser/X86/x86_64-avx-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-encoding.s new file mode 100644 index 0000000..7a96bb5 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-avx-encoding.s @@ -0,0 +1,3318 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vaddss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] +vaddss %xmm8, %xmm9, %xmm10 + +// CHECK: vmulss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] +vmulss %xmm8, %xmm9, %xmm10 + +// CHECK: vsubss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] +vsubss %xmm8, %xmm9, %xmm10 + +// CHECK: vdivss %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] +vdivss %xmm8, %xmm9, %xmm10 + +// CHECK: vaddsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] +vaddsd %xmm8, %xmm9, %xmm10 + +// CHECK: vmulsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] +vmulsd %xmm8, %xmm9, %xmm10 + +// CHECK: vsubsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] +vsubsd %xmm8, %xmm9, %xmm10 + +// CHECK: vdivsd %xmm8, %xmm9, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] +vdivsd %xmm8, %xmm9, %xmm10 + +// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] +vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] +vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] +vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] +vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] +vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] +vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] +vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] +vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] +vaddps %xmm10, %xmm11, %xmm15 + +// CHECK: vsubps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] +vsubps %xmm10, %xmm11, %xmm15 + +// CHECK: vmulps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] +vmulps %xmm10, %xmm11, %xmm15 + +// CHECK: vdivps %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] +vdivps %xmm10, %xmm11, %xmm15 + +// CHECK: vaddpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] +vaddpd %xmm10, %xmm11, %xmm15 + +// CHECK: vsubpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] +vsubpd %xmm10, %xmm11, %xmm15 + +// CHECK: vmulpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] +vmulpd %xmm10, %xmm11, %xmm15 + +// CHECK: vdivpd %xmm10, %xmm11, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] +vdivpd %xmm10, %xmm11, %xmm15 + +// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] +vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] +vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] +vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] +vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] +vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] +vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] +vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 +// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] +vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 + +// CHECK: vmaxss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] + vmaxss %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] + vmaxsd %xmm10, %xmm14, %xmm12 + +// CHECK: vminss %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] + vminss %xmm10, %xmm14, %xmm12 + +// CHECK: vminsd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] + vminsd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] + vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] + vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] + vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] + vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] + vmaxps %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] + vmaxpd %xmm10, %xmm14, %xmm12 + +// CHECK: vminps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] + vminps %xmm10, %xmm14, %xmm12 + +// CHECK: vminpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] + vminpd %xmm10, %xmm14, %xmm12 + +// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] + vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] + vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] + vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] + vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] + vandps %xmm10, %xmm14, %xmm12 + +// CHECK: vandpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] + vandpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] + vorps %xmm10, %xmm14, %xmm12 + +// CHECK: vorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] + vorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] + vxorps %xmm10, %xmm14, %xmm12 + +// CHECK: vxorpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] + vxorpd %xmm10, %xmm14, %xmm12 + +// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnps %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] + vandnps %xmm10, %xmm14, %xmm12 + +// CHECK: vandnpd %xmm10, %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] + vandnpd %xmm10, %xmm14, %xmm12 + +// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 + +// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] + vmovss -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovss %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] + vmovss %xmm14, %xmm10, %xmm15 + +// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 +// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] + vmovsd -4(%rbx,%rcx,8), %xmm10 + +// CHECK: vmovsd %xmm14, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] + vmovsd %xmm14, %xmm10, %xmm15 + +// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] + vunpckhps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] + vunpckhpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] + vunpcklps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] + vunpcklpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] + vcmpps $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] + vcmpps $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] + vcmpps $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] + vcmppd $0, %xmm10, %xmm12, %xmm15 + +// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] + vcmppd $0, (%rax), %xmm12, %xmm15 + +// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] + vcmppd $7, %xmm10, %xmm12, %xmm15 + +// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] + vshufps $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] + vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] + vshufpd $8, %xmm11, %xmm12, %xmm13 + +// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] + vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] + vcmpeqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] + vcmpleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] + vcmpltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] + vcmpneqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] + vcmpnleps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] + vcmpnltps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] + vcmpordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] + vcmpunordps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] + vcmpeqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] + vcmplepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] + vcmpltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] + vcmpneqpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] + vcmpnlepd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] + vcmpnltpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] + vcmpordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] + vcmpunordpd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] + vcmpeqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] + vcmpless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] + vcmpltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] + vcmpneqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] + vcmpnless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] + vcmpnltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] + vcmpordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] + vcmpunordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] + vcmpeqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] + vcmplesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] + vcmpltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] + vcmpneqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] + vcmpnlesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] + vcmpnltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] + vcmpordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] + vcmpunordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vucomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] + vucomiss %xmm11, %xmm12 + +// CHECK: vucomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2e,0x20] + vucomiss (%rax), %xmm12 + +// CHECK: vcomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] + vcomiss %xmm11, %xmm12 + +// CHECK: vcomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2f,0x20] + vcomiss (%rax), %xmm12 + +// CHECK: vucomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] + vucomisd %xmm11, %xmm12 + +// CHECK: vucomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2e,0x20] + vucomisd (%rax), %xmm12 + +// CHECK: vcomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] + vcomisd %xmm11, %xmm12 + +// CHECK: vcomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2f,0x20] + vcomisd (%rax), %xmm12 + +// CHECK: vcvttss2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] + vcvttss2si (%rcx), %eax + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x22,0x2a,0x20] + vcvtsi2ss (%rax), %xmm11, %xmm12 + +// CHECK: vcvttsd2si (%rcx), %eax +// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] + vcvttsd2si (%rcx), %eax + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0x2a,0x20] + vcvtsi2sd (%rax), %xmm11, %xmm12 + +// CHECK: vmovaps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x28,0x20] + vmovaps (%rax), %xmm12 + +// CHECK: vmovaps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] + vmovaps %xmm11, %xmm12 + +// CHECK: vmovaps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x29,0x18] + vmovaps %xmm11, (%rax) + +// CHECK: vmovapd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x28,0x20] + vmovapd (%rax), %xmm12 + +// CHECK: vmovapd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] + vmovapd %xmm11, %xmm12 + +// CHECK: vmovapd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x29,0x18] + vmovapd %xmm11, (%rax) + +// CHECK: vmovups (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x10,0x20] + vmovups (%rax), %xmm12 + +// CHECK: vmovups %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] + vmovups %xmm11, %xmm12 + +// CHECK: vmovups %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x11,0x18] + vmovups %xmm11, (%rax) + +// CHECK: vmovupd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x10,0x20] + vmovupd (%rax), %xmm12 + +// CHECK: vmovupd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] + vmovupd %xmm11, %xmm12 + +// CHECK: vmovupd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x11,0x18] + vmovupd %xmm11, (%rax) + +// CHECK: vmovlps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x13,0x18] + vmovlps %xmm11, (%rax) + +// CHECK: vmovlps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x12,0x28] + vmovlps (%rax), %xmm12, %xmm13 + +// CHECK: vmovlpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x13,0x18] + vmovlpd %xmm11, (%rax) + +// CHECK: vmovlpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x12,0x28] + vmovlpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovhps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x17,0x18] + vmovhps %xmm11, (%rax) + +// CHECK: vmovhps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x16,0x28] + vmovhps (%rax), %xmm12, %xmm13 + +// CHECK: vmovhpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x17,0x18] + vmovhpd %xmm11, (%rax) + +// CHECK: vmovhpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x16,0x28] + vmovhpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] + vmovlhps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] + vmovhlps %xmm11, %xmm12, %xmm13 + +// CHECK: vcvtss2sil %xmm11, %eax +// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] + vcvtss2si %xmm11, %eax + +// CHECK: vcvtss2sil (%rax), %ebx +// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] + vcvtss2si (%rax), %ebx + +// CHECK: vcvtdq2ps %xmm10, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] + vcvtdq2ps %xmm10, %xmm12 + +// CHECK: vcvtdq2ps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x5b,0x20] + vcvtdq2ps (%rax), %xmm12 + +// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] + vcvtsd2ss %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x13,0x5a,0x10] + vcvtsd2ss (%rax), %xmm13, %xmm10 + +// CHECK: vcvtps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] + vcvtps2dq %xmm12, %xmm11 + +// CHECK: vcvtps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5b,0x18] + vcvtps2dq (%rax), %xmm11 + +// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] + vcvtss2sd %xmm12, %xmm13, %xmm10 + +// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 +// CHECK: encoding: [0xc5,0x12,0x5a,0x10] + vcvtss2sd (%rax), %xmm13, %xmm10 + +// CHECK: vcvtdq2ps %xmm13, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] + vcvtdq2ps %xmm13, %xmm10 + +// CHECK: vcvtdq2ps (%ecx), %xmm13 +// CHECK: encoding: [0xc5,0x78,0x5b,0x29] + vcvtdq2ps (%ecx), %xmm13 + +// CHECK: vcvttps2dq %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] + vcvttps2dq %xmm12, %xmm11 + +// CHECK: vcvttps2dq (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] + vcvttps2dq (%rax), %xmm11 + +// CHECK: vcvtps2pd %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] + vcvtps2pd %xmm12, %xmm11 + +// CHECK: vcvtps2pd (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x78,0x5a,0x18] + vcvtps2pd (%rax), %xmm11 + +// CHECK: vcvtpd2ps %xmm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] + vcvtpd2ps %xmm12, %xmm11 + +// CHECK: vsqrtpd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] + vsqrtpd %xmm11, %xmm12 + +// CHECK: vsqrtpd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x51,0x20] + vsqrtpd (%rax), %xmm12 + +// CHECK: vsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] + vsqrtps %xmm11, %xmm12 + +// CHECK: vsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x51,0x20] + vsqrtps (%rax), %xmm12 + +// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] + vsqrtsd %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1b,0x51,0x10] + vsqrtsd (%rax), %xmm12, %xmm10 + +// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] + vsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x51,0x10] + vsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrsqrtps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] + vrsqrtps %xmm11, %xmm12 + +// CHECK: vrsqrtps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x52,0x20] + vrsqrtps (%rax), %xmm12 + +// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] + vrsqrtss %xmm11, %xmm12, %xmm10 + +// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x52,0x10] + vrsqrtss (%rax), %xmm12, %xmm10 + +// CHECK: vrcpps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] + vrcpps %xmm11, %xmm12 + +// CHECK: vrcpps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x53,0x20] + vrcpps (%rax), %xmm12 + +// CHECK: vrcpss %xmm11, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] + vrcpss %xmm11, %xmm12, %xmm10 + +// CHECK: vrcpss (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x1a,0x53,0x10] + vrcpss (%rax), %xmm12, %xmm10 + +// CHECK: vmovntdq %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0xe7,0x18] + vmovntdq %xmm11, (%rax) + +// CHECK: vmovntpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x2b,0x18] + vmovntpd %xmm11, (%rax) + +// CHECK: vmovntps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x2b,0x18] + vmovntps %xmm11, (%rax) + +// CHECK: vldmxcsr -4(%rip) +// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] + vldmxcsr -4(%rip) + +// CHECK: vstmxcsr -4(%rsp) +// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] + vstmxcsr -4(%rsp) + +// CHECK: vpsubb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] + vpsubb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf8,0x28] + vpsubb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] + vpsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf9,0x28] + vpsubw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] + vpsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfa,0x28] + vpsubd (%rax), %xmm12, %xmm13 + +// CHECK: vpsubq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] + vpsubq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfb,0x28] + vpsubq (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] + vpsubsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe8,0x28] + vpsubsb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] + vpsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe9,0x28] + vpsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] + vpsubusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd8,0x28] + vpsubusb (%rax), %xmm12, %xmm13 + +// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] + vpsubusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsubusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd9,0x28] + vpsubusw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] + vpaddb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfc,0x28] + vpaddb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] + vpaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfd,0x28] + vpaddw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] + vpaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xfe,0x28] + vpaddd (%rax), %xmm12, %xmm13 + +// CHECK: vpaddq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] + vpaddq %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd4,0x28] + vpaddq (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] + vpaddsb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xec,0x28] + vpaddsb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] + vpaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xed,0x28] + vpaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] + vpaddusb %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdc,0x28] + vpaddusb (%rax), %xmm12, %xmm13 + +// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] + vpaddusw %xmm11, %xmm12, %xmm13 + +// CHECK: vpaddusw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdd,0x28] + vpaddusw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] + vpmulhuw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe4,0x28] + vpmulhuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] + vpmulhw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe5,0x28] + vpmulhw (%rax), %xmm12, %xmm13 + +// CHECK: vpmullw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] + vpmullw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmullw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd5,0x28] + vpmullw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] + vpmuludq %xmm11, %xmm12, %xmm13 + +// CHECK: vpmuludq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf4,0x28] + vpmuludq (%rax), %xmm12, %xmm13 + +// CHECK: vpavgb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] + vpavgb %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe0,0x28] + vpavgb (%rax), %xmm12, %xmm13 + +// CHECK: vpavgw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] + vpavgw %xmm11, %xmm12, %xmm13 + +// CHECK: vpavgw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe3,0x28] + vpavgw (%rax), %xmm12, %xmm13 + +// CHECK: vpminsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] + vpminsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpminsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xea,0x28] + vpminsw (%rax), %xmm12, %xmm13 + +// CHECK: vpminub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] + vpminub %xmm11, %xmm12, %xmm13 + +// CHECK: vpminub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xda,0x28] + vpminub (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] + vpmaxsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xee,0x28] + vpmaxsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] + vpmaxub %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaxub (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xde,0x28] + vpmaxub (%rax), %xmm12, %xmm13 + +// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] + vpsadbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsadbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf6,0x28] + vpsadbw (%rax), %xmm12, %xmm13 + +// CHECK: vpsllw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] + vpsllw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf1,0x28] + vpsllw (%rax), %xmm12, %xmm13 + +// CHECK: vpslld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] + vpslld %xmm11, %xmm12, %xmm13 + +// CHECK: vpslld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf2,0x28] + vpslld (%rax), %xmm12, %xmm13 + +// CHECK: vpsllq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] + vpsllq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf3,0x28] + vpsllq (%rax), %xmm12, %xmm13 + +// CHECK: vpsraw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] + vpsraw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsraw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe1,0x28] + vpsraw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrad %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] + vpsrad %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrad (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe2,0x28] + vpsrad (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] + vpsrlw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd1,0x28] + vpsrlw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] + vpsrld %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd2,0x28] + vpsrld (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] + vpsrlq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd3,0x28] + vpsrlq (%rax), %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpslldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] + vpslldq $10, %xmm12, %xmm13 + +// CHECK: vpsllq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] + vpsllq $10, %xmm12, %xmm13 + +// CHECK: vpsllw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] + vpsllw $10, %xmm12, %xmm13 + +// CHECK: vpsrad $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] + vpsrad $10, %xmm12, %xmm13 + +// CHECK: vpsraw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] + vpsraw $10, %xmm12, %xmm13 + +// CHECK: vpsrld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] + vpsrld $10, %xmm12, %xmm13 + +// CHECK: vpsrldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] + vpsrldq $10, %xmm12, %xmm13 + +// CHECK: vpsrlq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] + vpsrlq $10, %xmm12, %xmm13 + +// CHECK: vpsrlw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] + vpsrlw $10, %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpand %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] + vpand %xmm11, %xmm12, %xmm13 + +// CHECK: vpand (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdb,0x28] + vpand (%rax), %xmm12, %xmm13 + +// CHECK: vpor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] + vpor %xmm11, %xmm12, %xmm13 + +// CHECK: vpor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xeb,0x28] + vpor (%rax), %xmm12, %xmm13 + +// CHECK: vpxor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] + vpxor %xmm11, %xmm12, %xmm13 + +// CHECK: vpxor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xef,0x28] + vpxor (%rax), %xmm12, %xmm13 + +// CHECK: vpandn %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] + vpandn %xmm11, %xmm12, %xmm13 + +// CHECK: vpandn (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdf,0x28] + vpandn (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] + vpcmpeqb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x74,0x28] + vpcmpeqb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] + vpcmpeqw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x75,0x28] + vpcmpeqw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] + vpcmpeqd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x76,0x28] + vpcmpeqd (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] + vpcmpgtb %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x64,0x28] + vpcmpgtb (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] + vpcmpgtw %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x65,0x28] + vpcmpgtw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] + vpcmpgtd %xmm11, %xmm12, %xmm13 + +// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x66,0x28] + vpcmpgtd (%rax), %xmm12, %xmm13 + +// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] + vpacksswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpacksswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x63,0x28] + vpacksswb (%rax), %xmm12, %xmm13 + +// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] + vpackssdw %xmm11, %xmm12, %xmm13 + +// CHECK: vpackssdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6b,0x28] + vpackssdw (%rax), %xmm12, %xmm13 + +// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] + vpackuswb %xmm11, %xmm12, %xmm13 + +// CHECK: vpackuswb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x67,0x28] + vpackuswb (%rax), %xmm12, %xmm13 + +// CHECK: vpshufd $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] + vpshufd $4, %xmm12, %xmm13 + +// CHECK: vpshufd $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] + vpshufd $4, (%rax), %xmm13 + +// CHECK: vpshufhw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] + vpshufhw $4, %xmm12, %xmm13 + +// CHECK: vpshufhw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] + vpshufhw $4, (%rax), %xmm13 + +// CHECK: vpshuflw $4, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] + vpshuflw $4, %xmm12, %xmm13 + +// CHECK: vpshuflw $4, (%rax), %xmm13 +// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] + vpshuflw $4, (%rax), %xmm13 + +// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] + vpunpcklbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x60,0x28] + vpunpcklbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] + vpunpcklwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x61,0x28] + vpunpcklwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] + vpunpckldq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x62,0x28] + vpunpckldq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] + vpunpcklqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6c,0x28] + vpunpcklqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] + vpunpckhbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x68,0x28] + vpunpckhbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] + vpunpckhwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x69,0x28] + vpunpckhwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] + vpunpckhdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6a,0x28] + vpunpckhdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] + vpunpckhqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6d,0x28] + vpunpckhqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm12, %xmm13 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm13 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpmovmskb %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] + vpmovmskb %xmm12, %eax + +// CHECK: vmaskmovdqu %xmm14, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] + vmaskmovdqu %xmm14, %xmm15 + +// CHECK: vmovd %eax, %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] + vmovd %eax, %xmm14 + +// CHECK: vmovd (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x79,0x6e,0x30] + vmovd (%rax), %xmm14 + +// CHECK: vmovd %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0x7e,0x30] + vmovd %xmm14, (%rax) + +// CHECK: vmovd %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovd %rax, %xmm14 + +// CHECK: vmovq %xmm14, (%rax) +// CHECK: encoding: [0xc5,0x79,0xd6,0x30] + vmovq %xmm14, (%rax) + +// CHECK: vmovq %xmm14, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] + vmovq %xmm14, %xmm12 + +// CHECK: vmovq (%rax), %xmm14 +// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] + vmovq (%rax), %xmm14 + +// CHECK: vmovq %rax, %xmm14 +// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] + vmovq %rax, %xmm14 + +// CHECK: vmovq %xmm14, %rax +// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] + vmovq %xmm14, %rax + +// CHECK: vcvtpd2dq %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] + vcvtpd2dq %xmm11, %xmm12 + +// CHECK: vcvtdq2pd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] + vcvtdq2pd %xmm11, %xmm12 + +// CHECK: vcvtdq2pd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] + vcvtdq2pd (%rax), %xmm12 + +// CHECK: vmovshdup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] + vmovshdup %xmm11, %xmm12 + +// CHECK: vmovshdup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x16,0x20] + vmovshdup (%rax), %xmm12 + +// CHECK: vmovsldup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] + vmovsldup %xmm11, %xmm12 + +// CHECK: vmovsldup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7a,0x12,0x20] + vmovsldup (%rax), %xmm12 + +// CHECK: vmovddup %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] + vmovddup %xmm11, %xmm12 + +// CHECK: vmovddup (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x7b,0x12,0x20] + vmovddup (%rax), %xmm12 + +// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] + vaddsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubps (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x23,0xd0,0x20] + vaddsubps (%rax), %xmm11, %xmm12 + +// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] + vaddsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 +// CHECK: encoding: [0xc5,0x21,0xd0,0x20] + vaddsubpd (%rax), %xmm11, %xmm12 + +// CHECK: vhaddps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] + vhaddps %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] + vhaddps (%rax), %xmm12, %xmm13 + +// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] + vhaddpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7c,0x28] + vhaddpd (%rax), %xmm12, %xmm13 + +// CHECK: vhsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] + vhsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] + vhsubps (%rax), %xmm12, %xmm13 + +// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] + vhsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7d,0x28] + vhsubpd (%rax), %xmm12, %xmm13 + +// CHECK: vpabsb %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] + vpabsb %xmm11, %xmm12 + +// CHECK: vpabsb (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] + vpabsb (%rax), %xmm12 + +// CHECK: vpabsw %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] + vpabsw %xmm11, %xmm12 + +// CHECK: vpabsw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] + vpabsw (%rax), %xmm12 + +// CHECK: vpabsd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] + vpabsd %xmm11, %xmm12 + +// CHECK: vpabsd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] + vpabsd (%rax), %xmm12 + +// CHECK: vphaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] + vphaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] + vphaddw (%rax), %xmm12, %xmm13 + +// CHECK: vphaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] + vphaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] + vphaddd (%rax), %xmm12, %xmm13 + +// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] + vphaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] + vphaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] + vphsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] + vphsubw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] + vphsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] + vphsubd (%rax), %xmm12, %xmm13 + +// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] + vphsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] + vphsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] + vpmaddubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] + vpmaddubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpshufb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] + vpshufb %xmm11, %xmm12, %xmm13 + +// CHECK: vpshufb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] + vpshufb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] + vpsignb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] + vpsignb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] + vpsignw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] + vpsignw (%rax), %xmm12, %xmm13 + +// CHECK: vpsignd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] + vpsignd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] + vpsignd (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] + vpmulhrsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] + vpmulhrsw (%rax), %xmm12, %xmm13 + +// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] + vpalignr $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] + vpalignr $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] + vroundsd $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] + vroundsd $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] + vroundss $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] + vroundss $7, (%rax), %xmm12, %xmm13 + +// CHECK: vroundpd $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] + vroundpd $7, %xmm12, %xmm13 + +// CHECK: vroundpd $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] + vroundpd $7, (%rax), %xmm13 + +// CHECK: vroundps $7, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] + vroundps $7, %xmm12, %xmm13 + +// CHECK: vroundps $7, (%rax), %xmm13 +// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] + vroundps $7, (%rax), %xmm13 + +// CHECK: vphminposuw %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] + vphminposuw %xmm12, %xmm13 + +// CHECK: vphminposuw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] + vphminposuw (%rax), %xmm12 + +// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] + vpackusdw %xmm12, %xmm13, %xmm11 + +// CHECK: vpackusdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] + vpackusdw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] + vpcmpeqq %xmm12, %xmm13, %xmm11 + +// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] + vpcmpeqq (%rax), %xmm12, %xmm13 + +// CHECK: vpminsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] + vpminsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] + vpminsb (%rax), %xmm12, %xmm13 + +// CHECK: vpminsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] + vpminsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] + vpminsd (%rax), %xmm12, %xmm13 + +// CHECK: vpminud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] + vpminud %xmm12, %xmm13, %xmm11 + +// CHECK: vpminud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] + vpminud (%rax), %xmm12, %xmm13 + +// CHECK: vpminuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] + vpminuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpminuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] + vpminuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] + vpmaxsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] + vpmaxsb (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] + vpmaxsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] + vpmaxsd (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] + vpmaxud %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] + vpmaxud (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] + vpmaxuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] + vpmaxuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] + vpmuldq %xmm12, %xmm13, %xmm11 + +// CHECK: vpmuldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] + vpmuldq (%rax), %xmm12, %xmm13 + +// CHECK: vpmulld %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] + vpmulld %xmm12, %xmm5, %xmm11 + +// CHECK: vpmulld (%rax), %xmm5, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] + vpmulld (%rax), %xmm5, %xmm13 + +// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] + vblendps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] + vblendps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] + vblendpd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] + vblendpd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] + vpblendw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] + vpblendw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] + vmpsadbw $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] + vmpsadbw $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] + vdpps $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] + vdpps $3, (%rax), %xmm5, %xmm11 + +// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] + vdppd $3, %xmm12, %xmm5, %xmm11 + +// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] + vdppd $3, (%rax), %xmm5, %xmm11 + +// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] + vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] + vblendvpd %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] + vblendvps %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] + vblendvps %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] + vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 + +// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] + vpblendvb %xmm12, (%rax), %xmm11, %xmm13 + +// CHECK: vpmovsxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] + vpmovsxbw %xmm12, %xmm10 + +// CHECK: vpmovsxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] + vpmovsxbw (%rax), %xmm12 + +// CHECK: vpmovsxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] + vpmovsxwd %xmm12, %xmm10 + +// CHECK: vpmovsxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] + vpmovsxwd (%rax), %xmm12 + +// CHECK: vpmovsxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] + vpmovsxdq %xmm12, %xmm10 + +// CHECK: vpmovsxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] + vpmovsxdq (%rax), %xmm12 + +// CHECK: vpmovzxbw %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] + vpmovzxbw %xmm12, %xmm10 + +// CHECK: vpmovzxbw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] + vpmovzxbw (%rax), %xmm12 + +// CHECK: vpmovzxwd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] + vpmovzxwd %xmm12, %xmm10 + +// CHECK: vpmovzxwd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] + vpmovzxwd (%rax), %xmm12 + +// CHECK: vpmovzxdq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] + vpmovzxdq %xmm12, %xmm10 + +// CHECK: vpmovzxdq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] + vpmovzxdq (%rax), %xmm12 + +// CHECK: vpmovsxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] + vpmovsxbq %xmm12, %xmm10 + +// CHECK: vpmovsxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] + vpmovsxbq (%rax), %xmm12 + +// CHECK: vpmovzxbq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] + vpmovzxbq %xmm12, %xmm10 + +// CHECK: vpmovzxbq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] + vpmovzxbq (%rax), %xmm12 + +// CHECK: vpmovsxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] + vpmovsxbd %xmm12, %xmm10 + +// CHECK: vpmovsxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] + vpmovsxbd (%rax), %xmm12 + +// CHECK: vpmovsxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] + vpmovsxwq %xmm12, %xmm10 + +// CHECK: vpmovsxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] + vpmovsxwq (%rax), %xmm12 + +// CHECK: vpmovzxbd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] + vpmovzxbd %xmm12, %xmm10 + +// CHECK: vpmovzxbd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] + vpmovzxbd (%rax), %xmm12 + +// CHECK: vpmovzxwq %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] + vpmovzxwq %xmm12, %xmm10 + +// CHECK: vpmovzxwq (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] + vpmovzxwq (%rax), %xmm12 + +// CHECK: vpextrw $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] + vpextrw $7, %xmm12, %eax + +// CHECK: vpextrw $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] + vpextrw $7, %xmm12, (%rax) + +// CHECK: vpextrd $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] + vpextrd $7, %xmm12, %eax + +// CHECK: vpextrd $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] + vpextrd $7, %xmm12, (%rax) + +// CHECK: vpextrb $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] + vpextrb $7, %xmm12, %eax + +// CHECK: vpextrb $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] + vpextrb $7, %xmm12, (%rax) + +// CHECK: vpextrq $7, %xmm12, %rcx +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] + vpextrq $7, %xmm12, %rcx + +// CHECK: vpextrq $7, %xmm12, (%rcx) +// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] + vpextrq $7, %xmm12, (%rcx) + +// CHECK: vextractps $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] + vextractps $7, %xmm12, (%rax) + +// CHECK: vextractps $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] + vextractps $7, %xmm12, %eax + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] + vpinsrw $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] + vpinsrb $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] + vpinsrb $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] + vpinsrd $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] + vpinsrd $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] + vpinsrq $7, %rax, %xmm12, %xmm10 + +// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] + vpinsrq $7, (%rax), %xmm12, %xmm10 + +// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] + vinsertps $7, %xmm12, %xmm10, %xmm11 + +// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] + vinsertps $7, (%rax), %xmm10, %xmm11 + +// CHECK: vptest %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] + vptest %xmm12, %xmm10 + +// CHECK: vptest (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] + vptest (%rax), %xmm12 + +// CHECK: vmovntdqa (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] + vmovntdqa (%rax), %xmm12 + +// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] + vpcmpgtq %xmm12, %xmm10, %xmm11 + +// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] + vpcmpgtq (%rax), %xmm10, %xmm13 + +// CHECK: vpcmpistrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] + vpcmpistrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpistrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] + vpcmpistrm $7, (%rax), %xmm10 + +// CHECK: vpcmpestrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] + vpcmpestrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpestrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] + vpcmpestrm $7, (%rax), %xmm10 + +// CHECK: vpcmpistri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] + vpcmpistri $7, %xmm12, %xmm10 + +// CHECK: vpcmpistri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] + vpcmpistri $7, (%rax), %xmm10 + +// CHECK: vpcmpestri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] + vpcmpestri $7, %xmm12, %xmm10 + +// CHECK: vpcmpestri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] + vpcmpestri $7, (%rax), %xmm10 + +// CHECK: vaesimc %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] + vaesimc %xmm12, %xmm10 + +// CHECK: vaesimc (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] + vaesimc (%rax), %xmm12 + +// CHECK: vaesenc %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] + vaesenc %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenc (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] + vaesenc (%rax), %xmm10, %xmm13 + +// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] + vaesenclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesenclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] + vaesenclast (%rax), %xmm10, %xmm13 + +// CHECK: vaesdec %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] + vaesdec %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdec (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] + vaesdec (%rax), %xmm10, %xmm13 + +// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] + vaesdeclast %xmm12, %xmm10, %xmm11 + +// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] + vaesdeclast (%rax), %xmm10, %xmm13 + +// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] + vaeskeygenassist $7, %xmm12, %xmm10 + +// CHECK: vaeskeygenassist $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] + vaeskeygenassist $7, (%rax), %xmm10 + +// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] + vcmpeq_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] + vcmpngeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] + vcmpngtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] + vcmpfalseps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] + vcmpneq_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] + vcmpgeps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] + vcmpgtps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] + vcmptrueps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] + vcmpeq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] + vcmplt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] + vcmple_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] + vcmpunord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] + vcmpneq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] + vcmpnlt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] + vcmpnle_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] + vcmpord_sps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] + vcmpeq_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] + vcmpnge_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] + vcmpngt_uqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] + vcmpfalse_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] + vcmpneq_osps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] + vcmpge_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] + vcmpgt_oqps %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] + vcmptrue_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovaps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x28,0x20] + vmovaps (%rax), %ymm12 + +// CHECK: vmovaps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] + vmovaps %ymm11, %ymm12 + +// CHECK: vmovaps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x29,0x18] + vmovaps %ymm11, (%rax) + +// CHECK: vmovapd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x28,0x20] + vmovapd (%rax), %ymm12 + +// CHECK: vmovapd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] + vmovapd %ymm11, %ymm12 + +// CHECK: vmovapd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x29,0x18] + vmovapd %ymm11, (%rax) + +// CHECK: vmovups (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x10,0x20] + vmovups (%rax), %ymm12 + +// CHECK: vmovups %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] + vmovups %ymm11, %ymm12 + +// CHECK: vmovups %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x11,0x18] + vmovups %ymm11, (%rax) + +// CHECK: vmovupd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x10,0x20] + vmovupd (%rax), %ymm12 + +// CHECK: vmovupd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] + vmovupd %ymm11, %ymm12 + +// CHECK: vmovupd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x11,0x18] + vmovupd %ymm11, (%rax) + +// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] + vunpckhps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] + vunpckhpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] + vunpcklps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] + vunpcklpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vmovntdq %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] + vmovntdq %ymm11, (%rax) + +// CHECK: vmovntpd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] + vmovntpd %ymm11, (%rax) + +// CHECK: vmovntps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] + vmovntps %ymm11, (%rax) + +// CHECK: vmovmskps %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] + vmovmskps %xmm12, %eax + +// CHECK: vmovmskpd %xmm12, %eax +// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] + vmovmskpd %xmm12, %eax + +// CHECK: vmaxps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] + vmaxps %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] + vmaxpd %ymm12, %ymm4, %ymm6 + +// CHECK: vminps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] + vminps %ymm12, %ymm4, %ymm6 + +// CHECK: vminpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] + vminpd %ymm12, %ymm4, %ymm6 + +// CHECK: vsubps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] + vsubps %ymm12, %ymm4, %ymm6 + +// CHECK: vsubpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] + vsubpd %ymm12, %ymm4, %ymm6 + +// CHECK: vdivps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] + vdivps %ymm12, %ymm4, %ymm6 + +// CHECK: vdivpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] + vdivpd %ymm12, %ymm4, %ymm6 + +// CHECK: vaddps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] + vaddps %ymm12, %ymm4, %ymm6 + +// CHECK: vaddpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] + vaddpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmulps %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] + vmulps %ymm12, %ymm4, %ymm6 + +// CHECK: vmulpd %ymm12, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] + vmulpd %ymm12, %ymm4, %ymm6 + +// CHECK: vmaxps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] + vmaxps (%rax), %ymm4, %ymm6 + +// CHECK: vmaxpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] + vmaxpd (%rax), %ymm4, %ymm6 + +// CHECK: vminps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] + vminps (%rax), %ymm4, %ymm6 + +// CHECK: vminpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] + vminpd (%rax), %ymm4, %ymm6 + +// CHECK: vsubps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] + vsubps (%rax), %ymm4, %ymm6 + +// CHECK: vsubpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] + vsubpd (%rax), %ymm4, %ymm6 + +// CHECK: vdivps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] + vdivps (%rax), %ymm4, %ymm6 + +// CHECK: vdivpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] + vdivpd (%rax), %ymm4, %ymm6 + +// CHECK: vaddps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x58,0x30] + vaddps (%rax), %ymm4, %ymm6 + +// CHECK: vaddpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x58,0x30] + vaddpd (%rax), %ymm4, %ymm6 + +// CHECK: vmulps (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdc,0x59,0x30] + vmulps (%rax), %ymm4, %ymm6 + +// CHECK: vmulpd (%rax), %ymm4, %ymm6 +// CHECK: encoding: [0xc5,0xdd,0x59,0x30] + vmulpd (%rax), %ymm4, %ymm6 + +// CHECK: vsqrtpd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] + vsqrtpd %ymm11, %ymm12 + +// CHECK: vsqrtpd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x51,0x20] + vsqrtpd (%rax), %ymm12 + +// CHECK: vsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] + vsqrtps %ymm11, %ymm12 + +// CHECK: vsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x51,0x20] + vsqrtps (%rax), %ymm12 + +// CHECK: vrsqrtps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] + vrsqrtps %ymm11, %ymm12 + +// CHECK: vrsqrtps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x52,0x20] + vrsqrtps (%rax), %ymm12 + +// CHECK: vrcpps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] + vrcpps %ymm11, %ymm12 + +// CHECK: vrcpps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x53,0x20] + vrcpps (%rax), %ymm12 + +// CHECK: vandps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] + vandps %ymm12, %ymm14, %ymm11 + +// CHECK: vandpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] + vandpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] + vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] + vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] + vorps %ymm12, %ymm14, %ymm11 + +// CHECK: vorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] + vorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] + vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] + vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] + vxorps %ymm12, %ymm14, %ymm11 + +// CHECK: vxorpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] + vxorpd %ymm12, %ymm14, %ymm11 + +// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] + vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] + vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnps %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] + vandnps %ymm12, %ymm14, %ymm11 + +// CHECK: vandnpd %ymm12, %ymm14, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] + vandnpd %ymm12, %ymm14, %ymm11 + +// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] + vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] + vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vcvtps2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] + vcvtps2pd %xmm13, %ymm12 + +// CHECK: vcvtps2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] + vcvtps2pd (%rax), %ymm12 + +// CHECK: vcvtdq2pd %xmm13, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] + vcvtdq2pd %xmm13, %ymm12 + +// CHECK: vcvtdq2pd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] + vcvtdq2pd (%rax), %ymm12 + +// CHECK: vcvtdq2ps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] + vcvtdq2ps %ymm12, %ymm10 + +// CHECK: vcvtdq2ps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] + vcvtdq2ps (%rax), %ymm12 + +// CHECK: vcvtps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] + vcvtps2dq %ymm12, %ymm10 + +// CHECK: vcvtps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] + vcvtps2dq (%rax), %ymm10 + +// CHECK: vcvttps2dq %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] + vcvttps2dq %ymm12, %ymm10 + +// CHECK: vcvttps2dq (%rax), %ymm10 +// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] + vcvttps2dq (%rax), %ymm10 + +// CHECK: vcvttpd2dq %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dq %xmm11, %xmm10 + +// CHECK: vcvttpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] + vcvttpd2dq %ymm12, %xmm10 + +// CHECK: vcvttpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] + vcvttpd2dqx %xmm11, %xmm10 + +// CHECK: vcvttpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0xe6,0x18] + vcvttpd2dqx (%rax), %xmm11 + +// CHECK: vcvttpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] + vcvttpd2dqy %ymm12, %xmm11 + +// CHECK: vcvttpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] + vcvttpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2ps %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] + vcvtpd2ps %ymm12, %xmm10 + +// CHECK: vcvtpd2psx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] + vcvtpd2psx %xmm11, %xmm10 + +// CHECK: vcvtpd2psx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x5a,0x18] + vcvtpd2psx (%rax), %xmm11 + +// CHECK: vcvtpd2psy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] + vcvtpd2psy %ymm12, %xmm11 + +// CHECK: vcvtpd2psy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] + vcvtpd2psy (%rax), %xmm11 + +// CHECK: vcvtpd2dq %ymm12, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] + vcvtpd2dq %ymm12, %xmm10 + +// CHECK: vcvtpd2dqy %ymm12, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] + vcvtpd2dqy %ymm12, %xmm11 + +// CHECK: vcvtpd2dqy (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] + vcvtpd2dqy (%rax), %xmm11 + +// CHECK: vcvtpd2dqx %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] + vcvtpd2dqx %xmm11, %xmm10 + +// CHECK: vcvtpd2dqx (%rax), %xmm11 +// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] + vcvtpd2dqx (%rax), %xmm11 + +// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] + vcmpeqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] + vcmpleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] + vcmpltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] + vcmpneqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] + vcmpnleps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] + vcmpnltps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] + vcmpordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] + vcmpunordps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] + vcmpeqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] + vcmplepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] + vcmpltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] + vcmpneqpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] + vcmpnlepd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] + vcmpnltpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] + vcmpordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] + vcmpunordpd %ymm11, %ymm12, %ymm13 + +// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 +// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] + vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 + +// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 + +// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] + vcmpeq_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] + vcmpngeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] + vcmpngtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] + vcmpfalseps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] + vcmpneq_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] + vcmpgeps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] + vcmpgtps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] + vcmptrueps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] + vcmpeq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] + vcmplt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] + vcmple_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] + vcmpunord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] + vcmpneq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] + vcmpnlt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] + vcmpnle_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] + vcmpord_sps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] + vcmpeq_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] + vcmpnge_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] + vcmpngt_uqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] + vcmpfalse_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] + vcmpneq_osps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] + vcmpge_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] + vcmpgt_oqps %ymm11, %ymm12, %ymm13 + +// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] + vcmptrue_usps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb] + vaddsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubps (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x27,0xd0,0x20] + vaddsubps (%rax), %ymm11, %ymm12 + +// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb] + vaddsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vaddsubpd (%rax), %ymm11, %ymm12 +// CHECK: encoding: [0xc5,0x25,0xd0,0x20] + vaddsubpd (%rax), %ymm11, %ymm12 + +// CHECK: vhaddps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb] + vhaddps %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7c,0x28] + vhaddps (%rax), %ymm12, %ymm13 + +// CHECK: vhaddpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb] + vhaddpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhaddpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7c,0x28] + vhaddpd (%rax), %ymm12, %ymm13 + +// CHECK: vhsubps %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb] + vhsubps %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubps (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1f,0x7d,0x28] + vhsubps (%rax), %ymm12, %ymm13 + +// CHECK: vhsubpd %ymm11, %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb] + vhsubpd %ymm11, %ymm12, %ymm13 + +// CHECK: vhsubpd (%rax), %ymm12, %ymm13 +// CHECK: encoding: [0xc5,0x1d,0x7d,0x28] + vhsubpd (%rax), %ymm12, %ymm13 + +// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03] + vblendps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03] + vblendps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03] + vblendpd $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03] + vblendpd $3, (%rax), %ymm10, %ymm11 + +// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03] + vdpps $3, %ymm12, %ymm10, %ymm11 + +// CHECK: vdpps $3, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03] + vdpps $3, (%rax), %ymm10, %ymm11 + +// CHECK: vbroadcastf128 (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20] + vbroadcastf128 (%rax), %ymm12 + +// CHECK: vbroadcastsd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20] + vbroadcastsd (%rax), %ymm12 + +// CHECK: vbroadcastss (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20] + vbroadcastss (%rax), %xmm12 + +// CHECK: vbroadcastss (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20] + vbroadcastss (%rax), %ymm12 + +// CHECK: vinsertf128 $7, %xmm12, %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07] + vinsertf128 $7, %xmm12, %ymm12, %ymm10 + +// CHECK: vinsertf128 $7, (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07] + vinsertf128 $7, (%rax), %ymm12, %ymm10 + +// CHECK: vextractf128 $7, %ymm12, %xmm12 +// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07] + vextractf128 $7, %ymm12, %xmm12 + +// CHECK: vextractf128 $7, %ymm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07] + vextractf128 $7, %ymm12, (%rax) + +// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20] + vmaskmovpd %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20] + vmaskmovpd %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10] + vmaskmovpd (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10] + vmaskmovpd (%rax), %ymm12, %ymm10 + +// CHECK: vmaskmovps %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20] + vmaskmovps %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovps %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20] + vmaskmovps %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovps (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10] + vmaskmovps (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovps (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10] + vmaskmovps (%rax), %ymm12, %ymm10 + +// CHECK: vpermilps $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07] + vpermilps $7, %xmm11, %xmm10 + +// CHECK: vpermilps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07] + vpermilps $7, %ymm10, %ymm11 + +// CHECK: vpermilps $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07] + vpermilps $7, (%rax), %xmm10 + +// CHECK: vpermilps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07] + vpermilps $7, (%rax), %ymm10 + +// CHECK: vpermilps %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb] + vpermilps %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilps %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb] + vpermilps %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilps (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28] + vpermilps (%rax), %xmm10, %xmm13 + +// CHECK: vpermilps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18] + vpermilps (%rax), %ymm10, %ymm11 + +// CHECK: vpermilpd $7, %xmm11, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07] + vpermilpd $7, %xmm11, %xmm10 + +// CHECK: vpermilpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07] + vpermilpd $7, %ymm10, %ymm11 + +// CHECK: vpermilpd $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07] + vpermilpd $7, (%rax), %xmm10 + +// CHECK: vpermilpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07] + vpermilpd $7, (%rax), %ymm10 + +// CHECK: vpermilpd %xmm11, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb] + vpermilpd %xmm11, %xmm10, %xmm11 + +// CHECK: vpermilpd %ymm11, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb] + vpermilpd %ymm11, %ymm10, %ymm11 + +// CHECK: vpermilpd (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28] + vpermilpd (%rax), %xmm10, %xmm13 + +// CHECK: vpermilpd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18] + vpermilpd (%rax), %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07] + vperm2f128 $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vperm2f128 $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07] + vperm2f128 $7, (%rax), %ymm10, %ymm11 + +// CHECK: vcvtsd2si %xmm8, %r8d +// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0] + vcvtsd2si %xmm8, %r8d + +// CHECK: vcvtsd2si (%rcx), %ecx +// CHECK: encoding: [0xc5,0xfb,0x2d,0x09] + vcvtsd2si (%rcx), %ecx + +// CHECK: vcvtss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc] + vcvtss2si %xmm4, %rcx + +// CHECK: vcvtss2si (%rcx), %r8 +// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01] + vcvtss2si (%rcx), %r8 + +// CHECK: vcvtsi2sdl %r8d, %xmm8, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8] + vcvtsi2sdl %r8d, %xmm8, %xmm15 + +// CHECK: vcvtsi2sdl (%rbp), %xmm8, %xmm15 +// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00] + vcvtsi2sdl (%rbp), %xmm8, %xmm15 + +// CHECK: vcvtsi2sdq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1] + vcvtsi2sdq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2sdq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31] + vcvtsi2sdq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1] + vcvtsi2ssq %rcx, %xmm4, %xmm6 + +// CHECK: vcvtsi2ssq (%rcx), %xmm4, %xmm6 +// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31] + vcvtsi2ssq (%rcx), %xmm4, %xmm6 + +// CHECK: vcvttsd2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc] + vcvttsd2si %xmm4, %rcx + +// CHECK: vcvttsd2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09] + vcvttsd2si (%rcx), %rcx + +// CHECK: vcvttss2si %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc] + vcvttss2si %xmm4, %rcx + +// CHECK: vcvttss2si (%rcx), %rcx +// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09] + vcvttss2si (%rcx), %rcx + +// CHECK: vlddqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0xf0,0x20] + vlddqu (%rax), %ymm12 + +// CHECK: vmovddup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4] + vmovddup %ymm12, %ymm10 + +// CHECK: vmovddup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0x12,0x20] + vmovddup (%rax), %ymm12 + +// CHECK: vmovdqa %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4] + vmovdqa %ymm12, %ymm10 + +// CHECK: vmovdqa %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x7f,0x20] + vmovdqa %ymm12, (%rax) + +// CHECK: vmovdqa (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x6f,0x20] + vmovdqa (%rax), %ymm12 + +// CHECK: vmovdqu %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4] + vmovdqu %ymm12, %ymm10 + +// CHECK: vmovdqu %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7e,0x7f,0x20] + vmovdqu %ymm12, (%rax) + +// CHECK: vmovdqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x6f,0x20] + vmovdqu (%rax), %ymm12 + +// CHECK: vmovshdup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4] + vmovshdup %ymm12, %ymm10 + +// CHECK: vmovshdup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x16,0x20] + vmovshdup (%rax), %ymm12 + +// CHECK: vmovsldup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4] + vmovsldup %ymm12, %ymm10 + +// CHECK: vmovsldup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x12,0x20] + vmovsldup (%rax), %ymm12 + +// CHECK: vptest %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4] + vptest %ymm12, %ymm10 + +// CHECK: vptest (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20] + vptest (%rax), %ymm12 + +// CHECK: vroundpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07] + vroundpd $7, %ymm10, %ymm11 + +// CHECK: vroundpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07] + vroundpd $7, (%rax), %ymm10 + +// CHECK: vroundps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07] + vroundps $7, %ymm10, %ymm11 + +// CHECK: vroundps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07] + vroundps $7, (%rax), %ymm10 + +// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07] + vshufpd $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07] + vshufpd $7, (%rax), %ymm10, %ymm11 + +// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07] + vshufps $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufps $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07] + vshufps $7, (%rax), %ymm10, %ymm11 + +// CHECK: vtestpd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4] + vtestpd %xmm12, %xmm10 + +// CHECK: vtestpd %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4] + vtestpd %ymm12, %ymm10 + +// CHECK: vtestpd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20] + vtestpd (%rax), %xmm12 + +// CHECK: vtestpd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20] + vtestpd (%rax), %ymm12 + +// CHECK: vtestps %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4] + vtestps %xmm12, %xmm10 + +// CHECK: vtestps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4] + vtestps %ymm12, %ymm10 + +// CHECK: vtestps (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20] + vtestps (%rax), %xmm12 + +// CHECK: vtestps (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20] + vtestps (%rax), %ymm12 + +// CHECK: vextractps $10, %xmm8, %r8 +// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a] + vextractps $10, %xmm8, %r8 + +// CHECK: vextractps $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07] + vextractps $7, %xmm4, %rcx + +// CHECK: vmovd %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1] + vmovd %xmm4, %rcx + +// CHECK: vmovmskpd %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0x50,0xcc] + vmovmskpd %xmm4, %rcx + +// CHECK: vmovmskpd %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfd,0x50,0xcc] + vmovmskpd %ymm4, %rcx + +// CHECK: vmovmskps %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf8,0x50,0xcc] + vmovmskps %xmm4, %rcx + +// CHECK: vmovmskps %ymm4, %rcx +// CHECK: encoding: [0xc5,0xfc,0x50,0xcc] + vmovmskps %ymm4, %rcx + +// CHECK: vpextrb $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07] + vpextrb $7, %xmm4, %rcx + +// CHECK: vpinsrw $7, %r8, %xmm15, %xmm8 +// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07] + vpinsrw $7, %r8, %xmm15, %xmm8 + +// CHECK: vpinsrw $7, %rcx, %xmm4, %xmm6 +// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07] + vpinsrw $7, %rcx, %xmm4, %xmm6 + +// CHECK: vpmovmskb %xmm4, %rcx +// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc] + vpmovmskb %xmm4, %rcx + +// CHECK: vblendvpd %ymm11, 57005(%rax,%riz), %ymm12, %ymm13 +// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0] + vblendvpd %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 0fce592..f45b0a2 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -4,7 +4,7 @@ // CHECK: callw 42 // CHECK: encoding: [0x66,0xe8,A,A] callw 42 - + // rdar://8127102 // CHECK: movq %gs:(%rdi), %rax // CHECK: encoding: [0x65,0x48,0x8b,0x07] @@ -114,2889 +114,29 @@ movd %mm1, %rdx // CHECK: encoding: [0x0f,0x7e,0xca] movd %mm1, %edx -// CHECK: vaddss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0] -vaddss %xmm8, %xmm9, %xmm10 - -// CHECK: vmulss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0] -vmulss %xmm8, %xmm9, %xmm10 - -// CHECK: vsubss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0] -vsubss %xmm8, %xmm9, %xmm10 - -// CHECK: vdivss %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0] -vdivss %xmm8, %xmm9, %xmm10 - -// CHECK: vaddsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0] -vaddsd %xmm8, %xmm9, %xmm10 - -// CHECK: vmulsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0] -vmulsd %xmm8, %xmm9, %xmm10 - -// CHECK: vsubsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0] -vsubsd %xmm8, %xmm9, %xmm10 - -// CHECK: vdivsd %xmm8, %xmm9, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0] -vdivsd %xmm8, %xmm9, %xmm10 - -// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc] -vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc] -vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc] -vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc] -vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc] -vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc] -vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc] -vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc] -vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa] -vaddps %xmm10, %xmm11, %xmm15 - -// CHECK: vsubps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa] -vsubps %xmm10, %xmm11, %xmm15 - -// CHECK: vmulps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa] -vmulps %xmm10, %xmm11, %xmm15 - -// CHECK: vdivps %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa] -vdivps %xmm10, %xmm11, %xmm15 - -// CHECK: vaddpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa] -vaddpd %xmm10, %xmm11, %xmm15 - -// CHECK: vsubpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa] -vsubpd %xmm10, %xmm11, %xmm15 - -// CHECK: vmulpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa] -vmulpd %xmm10, %xmm11, %xmm15 - -// CHECK: vdivpd %xmm10, %xmm11, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa] -vdivpd %xmm10, %xmm11, %xmm15 - -// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc] -vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc] -vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc] -vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc] -vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc] -vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc] -vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc] -vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 -// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc] -vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 - -// CHECK: vmaxss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2] - vmaxss %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2] - vmaxsd %xmm10, %xmm14, %xmm12 - -// CHECK: vminss %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2] - vminss %xmm10, %xmm14, %xmm12 - -// CHECK: vminsd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2] - vminsd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc] - vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc] - vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc] - vminss -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc] - vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2] - vmaxps %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2] - vmaxpd %xmm10, %xmm14, %xmm12 - -// CHECK: vminps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2] - vminps %xmm10, %xmm14, %xmm12 - -// CHECK: vminpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2] - vminpd %xmm10, %xmm14, %xmm12 - -// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc] - vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc] - vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc] - vminps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc] - vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2] - vandps %xmm10, %xmm14, %xmm12 - -// CHECK: vandpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2] - vandpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2] - vorps %xmm10, %xmm14, %xmm12 - -// CHECK: vorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2] - vorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2] - vxorps %xmm10, %xmm14, %xmm12 - -// CHECK: vxorpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2] - vxorpd %xmm10, %xmm14, %xmm12 - -// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnps %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2] - vandnps %xmm10, %xmm14, %xmm12 - -// CHECK: vandnpd %xmm10, %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2] - vandnpd %xmm10, %xmm14, %xmm12 - -// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10 - -// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc] - vmovss -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovss %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe] - vmovss %xmm14, %xmm10, %xmm15 - -// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10 -// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc] - vmovsd -4(%rbx,%rcx,8), %xmm10 - -// CHECK: vmovsd %xmm14, %xmm10, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe] - vmovsd %xmm14, %xmm10, %xmm15 - // rdar://7840289 // CHECK: pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A] // CHECK: fixup A - offset: 5, value: CPI1_0-4 pshufb CPI1_0(%rip), %xmm1 -// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] - vunpckhps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] - vunpckhpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] - vunpcklps %xmm15, %xmm12, %xmm13 - -// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] - vunpcklpd %xmm15, %xmm12, %xmm13 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 - -// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00] - vcmpps $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00] - vcmpps $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07] - vcmpps $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00] - vcmppd $0, %xmm10, %xmm12, %xmm15 - -// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15 -// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00] - vcmppd $0, (%rax), %xmm12, %xmm15 - -// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07] - vcmppd $7, %xmm10, %xmm12, %xmm15 - -// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08] - vshufps $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08] - vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08] - vshufpd $8, %xmm11, %xmm12, %xmm13 - -// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08] - vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00] - vcmpeqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02] - vcmpleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01] - vcmpltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04] - vcmpneqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06] - vcmpnleps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05] - vcmpnltps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07] - vcmpordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03] - vcmpunordps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00] - vcmpeqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02] - vcmplepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01] - vcmpltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04] - vcmpneqpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06] - vcmpnlepd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05] - vcmpnltpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07] - vcmpordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03] - vcmpunordpd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] - vcmpeqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] - vcmpless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] - vcmpltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] - vcmpneqss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] - vcmpnless %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] - vcmpnltss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] - vcmpordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] - vcmpunordss %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] - vcmpeqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] - vcmplesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] - vcmpltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] - vcmpneqsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] - vcmpnlesd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] - vcmpnltsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] - vcmpordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] - vcmpunordsd %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 -// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] - vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 - -// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 - -// CHECK: vucomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] - vucomiss %xmm11, %xmm12 - -// CHECK: vucomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2e,0x20] - vucomiss (%rax), %xmm12 - -// CHECK: vcomiss %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] - vcomiss %xmm11, %xmm12 - -// CHECK: vcomiss (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x2f,0x20] - vcomiss (%rax), %xmm12 - -// CHECK: vucomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] - vucomisd %xmm11, %xmm12 - -// CHECK: vucomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2e,0x20] - vucomisd (%rax), %xmm12 - -// CHECK: vcomisd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] - vcomisd %xmm11, %xmm12 - -// CHECK: vcomisd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x2f,0x20] - vcomisd (%rax), %xmm12 - -// CHECK: vcvttss2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfa,0x2c,0x01] - vcvttss2si (%rcx), %eax - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x22,0x2a,0x20] - vcvtsi2ss (%rax), %xmm11, %xmm12 - -// CHECK: vcvttsd2si (%rcx), %eax -// CHECK: encoding: [0xc5,0xfb,0x2c,0x01] - vcvttsd2si (%rcx), %eax - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0x2a,0x20] - vcvtsi2sd (%rax), %xmm11, %xmm12 - -// CHECK: vmovaps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x28,0x20] - vmovaps (%rax), %xmm12 - -// CHECK: vmovaps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] - vmovaps %xmm11, %xmm12 - -// CHECK: vmovaps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x29,0x18] - vmovaps %xmm11, (%rax) - -// CHECK: vmovapd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x28,0x20] - vmovapd (%rax), %xmm12 - -// CHECK: vmovapd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] - vmovapd %xmm11, %xmm12 - -// CHECK: vmovapd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x29,0x18] - vmovapd %xmm11, (%rax) - -// CHECK: vmovups (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x10,0x20] - vmovups (%rax), %xmm12 - -// CHECK: vmovups %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] - vmovups %xmm11, %xmm12 - -// CHECK: vmovups %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x11,0x18] - vmovups %xmm11, (%rax) - -// CHECK: vmovupd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x10,0x20] - vmovupd (%rax), %xmm12 - -// CHECK: vmovupd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] - vmovupd %xmm11, %xmm12 - -// CHECK: vmovupd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x11,0x18] - vmovupd %xmm11, (%rax) - -// CHECK: vmovlps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x13,0x18] - vmovlps %xmm11, (%rax) - -// CHECK: vmovlps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x12,0x28] - vmovlps (%rax), %xmm12, %xmm13 - -// CHECK: vmovlpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x13,0x18] - vmovlpd %xmm11, (%rax) - -// CHECK: vmovlpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x12,0x28] - vmovlpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovhps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x17,0x18] - vmovhps %xmm11, (%rax) - -// CHECK: vmovhps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x18,0x16,0x28] - vmovhps (%rax), %xmm12, %xmm13 - -// CHECK: vmovhpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x17,0x18] - vmovhpd %xmm11, (%rax) - -// CHECK: vmovhpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x16,0x28] - vmovhpd (%rax), %xmm12, %xmm13 - -// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] - vmovlhps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] - vmovhlps %xmm11, %xmm12, %xmm13 - -// CHECK: vcvtss2sil %xmm11, %eax -// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3] - vcvtss2si %xmm11, %eax - -// CHECK: vcvtss2sil (%rax), %ebx -// CHECK: encoding: [0xc5,0xfa,0x2d,0x18] - vcvtss2si (%rax), %ebx - -// CHECK: vcvtdq2ps %xmm10, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2] - vcvtdq2ps %xmm10, %xmm12 - -// CHECK: vcvtdq2ps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x5b,0x20] - vcvtdq2ps (%rax), %xmm12 - -// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4] - vcvtsd2ss %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x13,0x5a,0x10] - vcvtsd2ss (%rax), %xmm13, %xmm10 - -// CHECK: vcvtps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc] - vcvtps2dq %xmm12, %xmm11 - -// CHECK: vcvtps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5b,0x18] - vcvtps2dq (%rax), %xmm11 - -// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4] - vcvtss2sd %xmm12, %xmm13, %xmm10 - -// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10 -// CHECK: encoding: [0xc5,0x12,0x5a,0x10] - vcvtss2sd (%rax), %xmm13, %xmm10 - -// CHECK: vcvtdq2ps %xmm13, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5] - vcvtdq2ps %xmm13, %xmm10 - -// CHECK: vcvtdq2ps (%ecx), %xmm13 -// CHECK: encoding: [0xc5,0x78,0x5b,0x29] - vcvtdq2ps (%ecx), %xmm13 - -// CHECK: vcvttps2dq %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc] - vcvttps2dq %xmm12, %xmm11 - -// CHECK: vcvttps2dq (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7a,0x5b,0x18] - vcvttps2dq (%rax), %xmm11 - -// CHECK: vcvtps2pd %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc] - vcvtps2pd %xmm12, %xmm11 - -// CHECK: vcvtps2pd (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x78,0x5a,0x18] - vcvtps2pd (%rax), %xmm11 - -// CHECK: vcvtpd2ps %xmm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc] - vcvtpd2ps %xmm12, %xmm11 - -// CHECK: vsqrtpd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3] - vsqrtpd %xmm11, %xmm12 - -// CHECK: vsqrtpd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x79,0x51,0x20] - vsqrtpd (%rax), %xmm12 - -// CHECK: vsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3] - vsqrtps %xmm11, %xmm12 - -// CHECK: vsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x51,0x20] - vsqrtps (%rax), %xmm12 - -// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3] - vsqrtsd %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtsd (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1b,0x51,0x10] - vsqrtsd (%rax), %xmm12, %xmm10 - -// CHECK: vsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3] - vsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x51,0x10] - vsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrsqrtps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3] - vrsqrtps %xmm11, %xmm12 - -// CHECK: vrsqrtps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x52,0x20] - vrsqrtps (%rax), %xmm12 - -// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3] - vrsqrtss %xmm11, %xmm12, %xmm10 - -// CHECK: vrsqrtss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x52,0x10] - vrsqrtss (%rax), %xmm12, %xmm10 - -// CHECK: vrcpps %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3] - vrcpps %xmm11, %xmm12 - -// CHECK: vrcpps (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x78,0x53,0x20] - vrcpps (%rax), %xmm12 - -// CHECK: vrcpss %xmm11, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3] - vrcpss %xmm11, %xmm12, %xmm10 - -// CHECK: vrcpss (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x1a,0x53,0x10] - vrcpss (%rax), %xmm12, %xmm10 - -// CHECK: vmovntdq %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0xe7,0x18] - vmovntdq %xmm11, (%rax) - -// CHECK: vmovntpd %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x79,0x2b,0x18] - vmovntpd %xmm11, (%rax) - -// CHECK: vmovntps %xmm11, (%rax) -// CHECK: encoding: [0xc5,0x78,0x2b,0x18] - vmovntps %xmm11, (%rax) - -// CHECK: vldmxcsr -4(%rip) -// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff] - vldmxcsr -4(%rip) - -// CHECK: vstmxcsr -4(%rsp) -// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] - vstmxcsr -4(%rsp) - -// CHECK: vpsubb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb] - vpsubb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf8,0x28] - vpsubb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb] - vpsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf9,0x28] - vpsubw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb] - vpsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfa,0x28] - vpsubd (%rax), %xmm12, %xmm13 - -// CHECK: vpsubq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb] - vpsubq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfb,0x28] - vpsubq (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb] - vpsubsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe8,0x28] - vpsubsb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb] - vpsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe9,0x28] - vpsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb] - vpsubusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd8,0x28] - vpsubusb (%rax), %xmm12, %xmm13 - -// CHECK: vpsubusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb] - vpsubusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsubusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd9,0x28] - vpsubusw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb] - vpaddb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfc,0x28] - vpaddb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb] - vpaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfd,0x28] - vpaddw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb] - vpaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xfe,0x28] - vpaddd (%rax), %xmm12, %xmm13 - -// CHECK: vpaddq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb] - vpaddq %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd4,0x28] - vpaddq (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb] - vpaddsb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xec,0x28] - vpaddsb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb] - vpaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xed,0x28] - vpaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb] - vpaddusb %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdc,0x28] - vpaddusb (%rax), %xmm12, %xmm13 - -// CHECK: vpaddusw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb] - vpaddusw %xmm11, %xmm12, %xmm13 - -// CHECK: vpaddusw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdd,0x28] - vpaddusw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb] - vpmulhuw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe4,0x28] - vpmulhuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb] - vpmulhw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe5,0x28] - vpmulhw (%rax), %xmm12, %xmm13 - -// CHECK: vpmullw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb] - vpmullw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmullw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd5,0x28] - vpmullw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuludq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb] - vpmuludq %xmm11, %xmm12, %xmm13 - -// CHECK: vpmuludq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf4,0x28] - vpmuludq (%rax), %xmm12, %xmm13 - -// CHECK: vpavgb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb] - vpavgb %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe0,0x28] - vpavgb (%rax), %xmm12, %xmm13 - -// CHECK: vpavgw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb] - vpavgw %xmm11, %xmm12, %xmm13 - -// CHECK: vpavgw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe3,0x28] - vpavgw (%rax), %xmm12, %xmm13 - -// CHECK: vpminsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb] - vpminsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpminsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xea,0x28] - vpminsw (%rax), %xmm12, %xmm13 - -// CHECK: vpminub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb] - vpminub %xmm11, %xmm12, %xmm13 - -// CHECK: vpminub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xda,0x28] - vpminub (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb] - vpmaxsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xee,0x28] - vpmaxsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxub %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb] - vpmaxub %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaxub (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xde,0x28] - vpmaxub (%rax), %xmm12, %xmm13 - -// CHECK: vpsadbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb] - vpsadbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsadbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf6,0x28] - vpsadbw (%rax), %xmm12, %xmm13 - -// CHECK: vpsllw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] - vpsllw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf1,0x28] - vpsllw (%rax), %xmm12, %xmm13 - -// CHECK: vpslld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] - vpslld %xmm11, %xmm12, %xmm13 - -// CHECK: vpslld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf2,0x28] - vpslld (%rax), %xmm12, %xmm13 - -// CHECK: vpsllq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] - vpsllq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsllq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xf3,0x28] - vpsllq (%rax), %xmm12, %xmm13 - -// CHECK: vpsraw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] - vpsraw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsraw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe1,0x28] - vpsraw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrad %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] - vpsrad %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrad (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xe2,0x28] - vpsrad (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] - vpsrlw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd1,0x28] - vpsrlw (%rax), %xmm12, %xmm13 - -// CHECK: vpsrld %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] - vpsrld %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrld (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd2,0x28] - vpsrld (%rax), %xmm12, %xmm13 - -// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] - vpsrlq %xmm11, %xmm12, %xmm13 - -// CHECK: vpsrlq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xd3,0x28] - vpsrlq (%rax), %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpslldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] - vpslldq $10, %xmm12, %xmm13 - -// CHECK: vpsllq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] - vpsllq $10, %xmm12, %xmm13 - -// CHECK: vpsllw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] - vpsllw $10, %xmm12, %xmm13 - -// CHECK: vpsrad $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] - vpsrad $10, %xmm12, %xmm13 - -// CHECK: vpsraw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] - vpsraw $10, %xmm12, %xmm13 - -// CHECK: vpsrld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] - vpsrld $10, %xmm12, %xmm13 - -// CHECK: vpsrldq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] - vpsrldq $10, %xmm12, %xmm13 - -// CHECK: vpsrlq $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] - vpsrlq $10, %xmm12, %xmm13 - -// CHECK: vpsrlw $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] - vpsrlw $10, %xmm12, %xmm13 - -// CHECK: vpslld $10, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] - vpslld $10, %xmm12, %xmm13 - -// CHECK: vpand %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] - vpand %xmm11, %xmm12, %xmm13 - -// CHECK: vpand (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdb,0x28] - vpand (%rax), %xmm12, %xmm13 - -// CHECK: vpor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] - vpor %xmm11, %xmm12, %xmm13 - -// CHECK: vpor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xeb,0x28] - vpor (%rax), %xmm12, %xmm13 - -// CHECK: vpxor %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] - vpxor %xmm11, %xmm12, %xmm13 - -// CHECK: vpxor (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xef,0x28] - vpxor (%rax), %xmm12, %xmm13 - -// CHECK: vpandn %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] - vpandn %xmm11, %xmm12, %xmm13 - -// CHECK: vpandn (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xdf,0x28] - vpandn (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb] - vpcmpeqb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x74,0x28] - vpcmpeqb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb] - vpcmpeqw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x75,0x28] - vpcmpeqw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb] - vpcmpeqd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x76,0x28] - vpcmpeqd (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb] - vpcmpgtb %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x64,0x28] - vpcmpgtb (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb] - vpcmpgtw %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x65,0x28] - vpcmpgtw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb] - vpcmpgtd %xmm11, %xmm12, %xmm13 - -// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x66,0x28] - vpcmpgtd (%rax), %xmm12, %xmm13 - -// CHECK: vpacksswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb] - vpacksswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpacksswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x63,0x28] - vpacksswb (%rax), %xmm12, %xmm13 - -// CHECK: vpackssdw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb] - vpackssdw %xmm11, %xmm12, %xmm13 - -// CHECK: vpackssdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6b,0x28] - vpackssdw (%rax), %xmm12, %xmm13 - -// CHECK: vpackuswb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb] - vpackuswb %xmm11, %xmm12, %xmm13 - -// CHECK: vpackuswb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x67,0x28] - vpackuswb (%rax), %xmm12, %xmm13 - -// CHECK: vpshufd $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04] - vpshufd $4, %xmm12, %xmm13 - -// CHECK: vpshufd $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04] - vpshufd $4, (%rax), %xmm13 - -// CHECK: vpshufhw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04] - vpshufhw $4, %xmm12, %xmm13 - -// CHECK: vpshufhw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04] - vpshufhw $4, (%rax), %xmm13 - -// CHECK: vpshuflw $4, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04] - vpshuflw $4, %xmm12, %xmm13 - -// CHECK: vpshuflw $4, (%rax), %xmm13 -// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] - vpshuflw $4, (%rax), %xmm13 - -// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] - vpunpcklbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x60,0x28] - vpunpcklbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] - vpunpcklwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x61,0x28] - vpunpcklwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] - vpunpckldq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x62,0x28] - vpunpckldq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] - vpunpcklqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6c,0x28] - vpunpcklqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] - vpunpckhbw %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x68,0x28] - vpunpckhbw (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] - vpunpckhwd %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x69,0x28] - vpunpckhwd (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] - vpunpckhdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6a,0x28] - vpunpckhdq (%rax), %xmm12, %xmm13 - -// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] - vpunpckhqdq %xmm11, %xmm12, %xmm13 - -// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x6d,0x28] - vpunpckhqdq (%rax), %xmm12, %xmm13 - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07] - vpinsrw $7, %eax, %xmm12, %xmm13 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm13 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpmovmskb %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4] - vpmovmskb %xmm12, %eax - -// CHECK: vmaskmovdqu %xmm14, %xmm15 -// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe] - vmaskmovdqu %xmm14, %xmm15 - -// CHECK: vmovd %eax, %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0xf0] - vmovd %eax, %xmm14 - -// CHECK: vmovd (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x79,0x6e,0x30] - vmovd (%rax), %xmm14 - -// CHECK: vmovd %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0x7e,0x30] - vmovd %xmm14, (%rax) - -// CHECK: vmovd %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovd %rax, %xmm14 - -// CHECK: vmovq %xmm14, (%rax) -// CHECK: encoding: [0xc5,0x79,0xd6,0x30] - vmovq %xmm14, (%rax) - -// CHECK: vmovq %xmm14, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6] - vmovq %xmm14, %xmm12 - -// CHECK: vmovq (%rax), %xmm14 -// CHECK: encoding: [0xc5,0x7a,0x7e,0x30] - vmovq (%rax), %xmm14 - -// CHECK: vmovq %rax, %xmm14 -// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0] - vmovq %rax, %xmm14 - -// CHECK: vmovq %xmm14, %rax -// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0] - vmovq %xmm14, %rax - -// CHECK: vcvtpd2dq %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3] - vcvtpd2dq %xmm11, %xmm12 - -// CHECK: vcvtdq2pd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3] - vcvtdq2pd %xmm11, %xmm12 - -// CHECK: vcvtdq2pd (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0xe6,0x20] - vcvtdq2pd (%rax), %xmm12 - -// CHECK: vmovshdup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3] - vmovshdup %xmm11, %xmm12 - -// CHECK: vmovshdup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x16,0x20] - vmovshdup (%rax), %xmm12 - -// CHECK: vmovsldup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3] - vmovsldup %xmm11, %xmm12 - -// CHECK: vmovsldup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7a,0x12,0x20] - vmovsldup (%rax), %xmm12 - -// CHECK: vmovddup %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3] - vmovddup %xmm11, %xmm12 - -// CHECK: vmovddup (%rax), %xmm12 -// CHECK: encoding: [0xc5,0x7b,0x12,0x20] - vmovddup (%rax), %xmm12 - -// CHECK: vaddsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb] - vaddsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubps (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x23,0xd0,0x20] - vaddsubps (%rax), %xmm11, %xmm12 - -// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb] - vaddsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vaddsubpd (%rax), %xmm11, %xmm12 -// CHECK: encoding: [0xc5,0x21,0xd0,0x20] - vaddsubpd (%rax), %xmm11, %xmm12 - -// CHECK: vhaddps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] - vhaddps %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] - vhaddps (%rax), %xmm12, %xmm13 - -// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] - vhaddpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhaddpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7c,0x28] - vhaddpd (%rax), %xmm12, %xmm13 - -// CHECK: vhsubps %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] - vhsubps %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubps (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] - vhsubps (%rax), %xmm12, %xmm13 - -// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] - vhsubpd %xmm11, %xmm12, %xmm13 - -// CHECK: vhsubpd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc5,0x19,0x7d,0x28] - vhsubpd (%rax), %xmm12, %xmm13 - -// CHECK: vpabsb %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] - vpabsb %xmm11, %xmm12 - -// CHECK: vpabsb (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] - vpabsb (%rax), %xmm12 - -// CHECK: vpabsw %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] - vpabsw %xmm11, %xmm12 - -// CHECK: vpabsw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] - vpabsw (%rax), %xmm12 - -// CHECK: vpabsd %xmm11, %xmm12 -// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] - vpabsd %xmm11, %xmm12 - -// CHECK: vpabsd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] - vpabsd (%rax), %xmm12 - -// CHECK: vphaddw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] - vphaddw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] - vphaddw (%rax), %xmm12, %xmm13 - -// CHECK: vphaddd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] - vphaddd %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] - vphaddd (%rax), %xmm12, %xmm13 - -// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] - vphaddsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphaddsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] - vphaddsw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] - vphsubw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] - vphsubw (%rax), %xmm12, %xmm13 - -// CHECK: vphsubd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] - vphsubd %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] - vphsubd (%rax), %xmm12, %xmm13 - -// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] - vphsubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vphsubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] - vphsubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] - vpmaddubsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] - vpmaddubsw (%rax), %xmm12, %xmm13 - -// CHECK: vpshufb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] - vpshufb %xmm11, %xmm12, %xmm13 - -// CHECK: vpshufb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] - vpshufb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignb %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] - vpsignb %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] - vpsignb (%rax), %xmm12, %xmm13 - -// CHECK: vpsignw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] - vpsignw %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] - vpsignw (%rax), %xmm12, %xmm13 - -// CHECK: vpsignd %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] - vpsignd %xmm11, %xmm12, %xmm13 - -// CHECK: vpsignd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] - vpsignd (%rax), %xmm12, %xmm13 - -// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] - vpmulhrsw %xmm11, %xmm12, %xmm13 - -// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] - vpmulhrsw (%rax), %xmm12, %xmm13 - -// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] - vpalignr $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] - vpalignr $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07] - vroundsd $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07] - vroundsd $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07] - vroundss $7, %xmm11, %xmm12, %xmm13 - -// CHECK: vroundss $7, (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07] - vroundss $7, (%rax), %xmm12, %xmm13 - -// CHECK: vroundpd $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07] - vroundpd $7, %xmm12, %xmm13 - -// CHECK: vroundpd $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07] - vroundpd $7, (%rax), %xmm13 - -// CHECK: vroundps $7, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07] - vroundps $7, %xmm12, %xmm13 - -// CHECK: vroundps $7, (%rax), %xmm13 -// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07] - vroundps $7, (%rax), %xmm13 - -// CHECK: vphminposuw %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec] - vphminposuw %xmm12, %xmm13 - -// CHECK: vphminposuw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] - vphminposuw (%rax), %xmm12 - -// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] - vpackusdw %xmm12, %xmm13, %xmm11 - -// CHECK: vpackusdw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] - vpackusdw (%rax), %xmm12, %xmm13 - -// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] - vpcmpeqq %xmm12, %xmm13, %xmm11 - -// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] - vpcmpeqq (%rax), %xmm12, %xmm13 - -// CHECK: vpminsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] - vpminsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] - vpminsb (%rax), %xmm12, %xmm13 - -// CHECK: vpminsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] - vpminsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpminsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] - vpminsd (%rax), %xmm12, %xmm13 - -// CHECK: vpminud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] - vpminud %xmm12, %xmm13, %xmm11 - -// CHECK: vpminud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] - vpminud (%rax), %xmm12, %xmm13 - -// CHECK: vpminuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] - vpminuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpminuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] - vpminuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] - vpmaxsb %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] - vpmaxsb (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] - vpmaxsd %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] - vpmaxsd (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] - vpmaxud %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxud (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] - vpmaxud (%rax), %xmm12, %xmm13 - -// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] - vpmaxuw %xmm12, %xmm13, %xmm11 - -// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] - vpmaxuw (%rax), %xmm12, %xmm13 - -// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] - vpmuldq %xmm12, %xmm13, %xmm11 - -// CHECK: vpmuldq (%rax), %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] - vpmuldq (%rax), %xmm12, %xmm13 - -// CHECK: vpmulld %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc] - vpmulld %xmm12, %xmm5, %xmm11 - -// CHECK: vpmulld (%rax), %xmm5, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28] - vpmulld (%rax), %xmm5, %xmm13 - -// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03] - vblendps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03] - vblendps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03] - vblendpd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03] - vblendpd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03] - vpblendw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03] - vpblendw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03] - vmpsadbw $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03] - vmpsadbw $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03] - vdpps $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdpps $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03] - vdpps $3, (%rax), %xmm5, %xmm11 - -// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03] - vdppd $3, %xmm12, %xmm5, %xmm11 - -// CHECK: vdppd $3, (%rax), %xmm5, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03] - vdppd $3, (%rax), %xmm5, %xmm11 - -// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0] - vblendvpd %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0] - vblendvpd %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0] - vblendvps %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0] - vblendvps %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0] - vpblendvb %xmm12, %xmm5, %xmm11, %xmm13 - -// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13 -// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0] - vpblendvb %xmm12, (%rax), %xmm11, %xmm13 - -// CHECK: vpmovsxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4] - vpmovsxbw %xmm12, %xmm10 - -// CHECK: vpmovsxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20] - vpmovsxbw (%rax), %xmm12 - -// CHECK: vpmovsxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4] - vpmovsxwd %xmm12, %xmm10 - -// CHECK: vpmovsxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20] - vpmovsxwd (%rax), %xmm12 - -// CHECK: vpmovsxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4] - vpmovsxdq %xmm12, %xmm10 - -// CHECK: vpmovsxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20] - vpmovsxdq (%rax), %xmm12 - -// CHECK: vpmovzxbw %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4] - vpmovzxbw %xmm12, %xmm10 - -// CHECK: vpmovzxbw (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20] - vpmovzxbw (%rax), %xmm12 - -// CHECK: vpmovzxwd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4] - vpmovzxwd %xmm12, %xmm10 - -// CHECK: vpmovzxwd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20] - vpmovzxwd (%rax), %xmm12 - -// CHECK: vpmovzxdq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4] - vpmovzxdq %xmm12, %xmm10 - -// CHECK: vpmovzxdq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20] - vpmovzxdq (%rax), %xmm12 - -// CHECK: vpmovsxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4] - vpmovsxbq %xmm12, %xmm10 - -// CHECK: vpmovsxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20] - vpmovsxbq (%rax), %xmm12 - -// CHECK: vpmovzxbq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4] - vpmovzxbq %xmm12, %xmm10 - -// CHECK: vpmovzxbq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20] - vpmovzxbq (%rax), %xmm12 - -// CHECK: vpmovsxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4] - vpmovsxbd %xmm12, %xmm10 - -// CHECK: vpmovsxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20] - vpmovsxbd (%rax), %xmm12 - -// CHECK: vpmovsxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4] - vpmovsxwq %xmm12, %xmm10 - -// CHECK: vpmovsxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20] - vpmovsxwq (%rax), %xmm12 - -// CHECK: vpmovzxbd %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4] - vpmovzxbd %xmm12, %xmm10 - -// CHECK: vpmovzxbd (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20] - vpmovzxbd (%rax), %xmm12 - -// CHECK: vpmovzxwq %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4] - vpmovzxwq %xmm12, %xmm10 - -// CHECK: vpmovzxwq (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20] - vpmovzxwq (%rax), %xmm12 - -// CHECK: vpextrw $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07] - vpextrw $7, %xmm12, %eax - -// CHECK: vpextrw $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07] - vpextrw $7, %xmm12, (%rax) - -// CHECK: vpextrd $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07] - vpextrd $7, %xmm12, %eax - -// CHECK: vpextrd $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07] - vpextrd $7, %xmm12, (%rax) - -// CHECK: vpextrb $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07] - vpextrb $7, %xmm12, %eax - -// CHECK: vpextrb $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07] - vpextrb $7, %xmm12, (%rax) - -// CHECK: vpextrq $7, %xmm12, %rcx -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07] - vpextrq $7, %xmm12, %rcx - -// CHECK: vpextrq $7, %xmm12, (%rcx) -// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] - vpextrq $7, %xmm12, (%rcx) - -// CHECK: vextractps $7, %xmm12, (%rax) -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] - vextractps $7, %xmm12, (%rax) - -// CHECK: vextractps $7, %xmm12, %eax -// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] - vextractps $7, %xmm12, %eax - -// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] - vpinsrw $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] - vpinsrw $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] - vpinsrb $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] - vpinsrb $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] - vpinsrd $7, %eax, %xmm12, %xmm10 - -// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] - vpinsrd $7, (%rax), %xmm12, %xmm10 - -// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] - vpinsrq $7, %rax, %xmm12, %xmm10 - -// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] - vpinsrq $7, (%rax), %xmm12, %xmm10 - -// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] - vinsertps $7, %xmm12, %xmm10, %xmm11 - -// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] - vinsertps $7, (%rax), %xmm10, %xmm11 - -// CHECK: vptest %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] - vptest %xmm12, %xmm10 - -// CHECK: vptest (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] - vptest (%rax), %xmm12 - -// CHECK: vmovntdqa (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] - vmovntdqa (%rax), %xmm12 - -// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] - vpcmpgtq %xmm12, %xmm10, %xmm11 - -// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] - vpcmpgtq (%rax), %xmm10, %xmm13 - -// CHECK: vpcmpistrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] - vpcmpistrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpistrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] - vpcmpistrm $7, (%rax), %xmm10 - -// CHECK: vpcmpestrm $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] - vpcmpestrm $7, %xmm12, %xmm10 - -// CHECK: vpcmpestrm $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] - vpcmpestrm $7, (%rax), %xmm10 - -// CHECK: vpcmpistri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] - vpcmpistri $7, %xmm12, %xmm10 - -// CHECK: vpcmpistri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] - vpcmpistri $7, (%rax), %xmm10 - -// CHECK: vpcmpestri $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] - vpcmpestri $7, %xmm12, %xmm10 - -// CHECK: vpcmpestri $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] - vpcmpestri $7, (%rax), %xmm10 - -// CHECK: vaesimc %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4] - vaesimc %xmm12, %xmm10 - -// CHECK: vaesimc (%rax), %xmm12 -// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20] - vaesimc (%rax), %xmm12 - -// CHECK: vaesenc %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc] - vaesenc %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenc (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28] - vaesenc (%rax), %xmm10, %xmm13 - -// CHECK: vaesenclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc] - vaesenclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesenclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28] - vaesenclast (%rax), %xmm10, %xmm13 - -// CHECK: vaesdec %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc] - vaesdec %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdec (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28] - vaesdec (%rax), %xmm10, %xmm13 - -// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11 -// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc] - vaesdeclast %xmm12, %xmm10, %xmm11 - -// CHECK: vaesdeclast (%rax), %xmm10, %xmm13 -// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28] - vaesdeclast (%rax), %xmm10, %xmm13 - -// CHECK: vaeskeygenassist $7, %xmm12, %xmm10 -// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07] - vaeskeygenassist $7, %xmm12, %xmm10 - -// CHECK: vaeskeygenassist $7, (%rax), %xmm10 -// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07] - vaeskeygenassist $7, (%rax), %xmm10 - -// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08] - vcmpeq_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09] - vcmpngeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a] - vcmpngtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b] - vcmpfalseps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c] - vcmpneq_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d] - vcmpgeps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e] - vcmpgtps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f] - vcmptrueps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10] - vcmpeq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11] - vcmplt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12] - vcmple_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13] - vcmpunord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14] - vcmpneq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15] - vcmpnlt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16] - vcmpnle_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17] - vcmpord_sps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18] - vcmpeq_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19] - vcmpnge_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a] - vcmpngt_uqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b] - vcmpfalse_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c] - vcmpneq_osps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d] - vcmpge_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e] - vcmpgt_oqps %xmm11, %xmm12, %xmm13 - -// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 -// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] - vcmptrue_usps %xmm11, %xmm12, %xmm13 - -// CHECK: vmovaps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x28,0x20] - vmovaps (%rax), %ymm12 - -// CHECK: vmovaps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] - vmovaps %ymm11, %ymm12 - -// CHECK: vmovaps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x29,0x18] - vmovaps %ymm11, (%rax) - -// CHECK: vmovapd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x28,0x20] - vmovapd (%rax), %ymm12 - -// CHECK: vmovapd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] - vmovapd %ymm11, %ymm12 - -// CHECK: vmovapd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x29,0x18] - vmovapd %ymm11, (%rax) - -// CHECK: vmovups (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x10,0x20] - vmovups (%rax), %ymm12 - -// CHECK: vmovups %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] - vmovups %ymm11, %ymm12 - -// CHECK: vmovups %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x11,0x18] - vmovups %ymm11, (%rax) - -// CHECK: vmovupd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x10,0x20] - vmovupd (%rax), %ymm12 - -// CHECK: vmovupd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] - vmovupd %ymm11, %ymm12 - -// CHECK: vmovupd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x11,0x18] - vmovupd %ymm11, (%rax) - -// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] - vunpckhps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] - vunpckhpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] - vunpcklps %ymm11, %ymm12, %ymm4 - -// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 -// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] - vunpcklpd %ymm11, %ymm12, %ymm4 - -// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] - vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] - vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] - vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] - vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vmovntdq %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0xe7,0x18] - vmovntdq %ymm11, (%rax) - -// CHECK: vmovntpd %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7d,0x2b,0x18] - vmovntpd %ymm11, (%rax) - -// CHECK: vmovntps %ymm11, (%rax) -// CHECK: encoding: [0xc5,0x7c,0x2b,0x18] - vmovntps %ymm11, (%rax) - -// CHECK: vmovmskps %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4] - vmovmskps %xmm12, %eax - -// CHECK: vmovmskpd %xmm12, %eax -// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4] - vmovmskpd %xmm12, %eax - -// CHECK: vmaxps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4] - vmaxps %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4] - vmaxpd %ymm12, %ymm4, %ymm6 - -// CHECK: vminps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4] - vminps %ymm12, %ymm4, %ymm6 - -// CHECK: vminpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4] - vminpd %ymm12, %ymm4, %ymm6 - -// CHECK: vsubps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4] - vsubps %ymm12, %ymm4, %ymm6 - -// CHECK: vsubpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4] - vsubpd %ymm12, %ymm4, %ymm6 - -// CHECK: vdivps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4] - vdivps %ymm12, %ymm4, %ymm6 - -// CHECK: vdivpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4] - vdivpd %ymm12, %ymm4, %ymm6 - -// CHECK: vaddps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4] - vaddps %ymm12, %ymm4, %ymm6 - -// CHECK: vaddpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4] - vaddpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmulps %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4] - vmulps %ymm12, %ymm4, %ymm6 - -// CHECK: vmulpd %ymm12, %ymm4, %ymm6 -// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4] - vmulpd %ymm12, %ymm4, %ymm6 - -// CHECK: vmaxps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5f,0x30] - vmaxps (%rax), %ymm4, %ymm6 - -// CHECK: vmaxpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5f,0x30] - vmaxpd (%rax), %ymm4, %ymm6 - -// CHECK: vminps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5d,0x30] - vminps (%rax), %ymm4, %ymm6 - -// CHECK: vminpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5d,0x30] - vminpd (%rax), %ymm4, %ymm6 - -// CHECK: vsubps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5c,0x30] - vsubps (%rax), %ymm4, %ymm6 - -// CHECK: vsubpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5c,0x30] - vsubpd (%rax), %ymm4, %ymm6 - -// CHECK: vdivps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x5e,0x30] - vdivps (%rax), %ymm4, %ymm6 - -// CHECK: vdivpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x5e,0x30] - vdivpd (%rax), %ymm4, %ymm6 - -// CHECK: vaddps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x58,0x30] - vaddps (%rax), %ymm4, %ymm6 - -// CHECK: vaddpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x58,0x30] - vaddpd (%rax), %ymm4, %ymm6 - -// CHECK: vmulps (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdc,0x59,0x30] - vmulps (%rax), %ymm4, %ymm6 - -// CHECK: vmulpd (%rax), %ymm4, %ymm6 -// CHECK: encoding: [0xc5,0xdd,0x59,0x30] - vmulpd (%rax), %ymm4, %ymm6 - -// CHECK: vsqrtpd %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3] - vsqrtpd %ymm11, %ymm12 - -// CHECK: vsqrtpd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7d,0x51,0x20] - vsqrtpd (%rax), %ymm12 - -// CHECK: vsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3] - vsqrtps %ymm11, %ymm12 - -// CHECK: vsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x51,0x20] - vsqrtps (%rax), %ymm12 - -// CHECK: vrsqrtps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3] - vrsqrtps %ymm11, %ymm12 - -// CHECK: vrsqrtps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x52,0x20] - vrsqrtps (%rax), %ymm12 - -// CHECK: vrcpps %ymm11, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3] - vrcpps %ymm11, %ymm12 - -// CHECK: vrcpps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x53,0x20] - vrcpps (%rax), %ymm12 - -// CHECK: vandps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc] - vandps %ymm12, %ymm14, %ymm11 - -// CHECK: vandpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc] - vandpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc] - vandps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc] - vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc] - vorps %ymm12, %ymm14, %ymm11 - -// CHECK: vorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc] - vorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc] - vorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc] - vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc] - vxorps %ymm12, %ymm14, %ymm11 - -// CHECK: vxorpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc] - vxorpd %ymm12, %ymm14, %ymm11 - -// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc] - vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc] - vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnps %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc] - vandnps %ymm12, %ymm14, %ymm11 - -// CHECK: vandnpd %ymm12, %ymm14, %ymm11 -// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc] - vandnpd %ymm12, %ymm14, %ymm11 - -// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc] - vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 -// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc] - vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10 - -// CHECK: vcvtps2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5] - vcvtps2pd %xmm13, %ymm12 - -// CHECK: vcvtps2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5a,0x20] - vcvtps2pd (%rax), %ymm12 - -// CHECK: vcvtdq2pd %xmm13, %ymm12 -// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5] - vcvtdq2pd %xmm13, %ymm12 - -// CHECK: vcvtdq2pd (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7e,0xe6,0x20] - vcvtdq2pd (%rax), %ymm12 - -// CHECK: vcvtdq2ps %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4] - vcvtdq2ps %ymm12, %ymm10 - -// CHECK: vcvtdq2ps (%rax), %ymm12 -// CHECK: encoding: [0xc5,0x7c,0x5b,0x20] - vcvtdq2ps (%rax), %ymm12 - -// CHECK: vcvtps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4] - vcvtps2dq %ymm12, %ymm10 - -// CHECK: vcvtps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7d,0x5b,0x10] - vcvtps2dq (%rax), %ymm10 - -// CHECK: vcvttps2dq %ymm12, %ymm10 -// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4] - vcvttps2dq %ymm12, %ymm10 - -// CHECK: vcvttps2dq (%rax), %ymm10 -// CHECK: encoding: [0xc5,0x7e,0x5b,0x10] - vcvttps2dq (%rax), %ymm10 - -// CHECK: vcvttpd2dq %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dq %xmm11, %xmm10 - -// CHECK: vcvttpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] - vcvttpd2dq %ymm12, %xmm10 - -// CHECK: vcvttpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] - vcvttpd2dqx %xmm11, %xmm10 - -// CHECK: vcvttpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0xe6,0x18] - vcvttpd2dqx (%rax), %xmm11 - -// CHECK: vcvttpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc] - vcvttpd2dqy %ymm12, %xmm11 - -// CHECK: vcvttpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0xe6,0x18] - vcvttpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2ps %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] - vcvtpd2ps %ymm12, %xmm10 - -// CHECK: vcvtpd2psx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] - vcvtpd2psx %xmm11, %xmm10 - -// CHECK: vcvtpd2psx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x79,0x5a,0x18] - vcvtpd2psx (%rax), %xmm11 - -// CHECK: vcvtpd2psy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc] - vcvtpd2psy %ymm12, %xmm11 - -// CHECK: vcvtpd2psy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7d,0x5a,0x18] - vcvtpd2psy (%rax), %xmm11 - -// CHECK: vcvtpd2dq %ymm12, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] - vcvtpd2dq %ymm12, %xmm10 - -// CHECK: vcvtpd2dqy %ymm12, %xmm11 -// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc] - vcvtpd2dqy %ymm12, %xmm11 - -// CHECK: vcvtpd2dqy (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7f,0xe6,0x18] - vcvtpd2dqy (%rax), %xmm11 - -// CHECK: vcvtpd2dqx %xmm11, %xmm10 -// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] - vcvtpd2dqx %xmm11, %xmm10 - -// CHECK: vcvtpd2dqx (%rax), %xmm11 -// CHECK: encoding: [0xc5,0x7b,0xe6,0x18] - vcvtpd2dqx (%rax), %xmm11 - -// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00] - vcmpeqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02] - vcmpleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01] - vcmpltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04] - vcmpneqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06] - vcmpnleps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05] - vcmpnltps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07] - vcmpordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03] - vcmpunordps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02] - vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00] - vcmpeqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02] - vcmplepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01] - vcmpltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04] - vcmpneqpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06] - vcmpnlepd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05] - vcmpnltpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07] - vcmpordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03] - vcmpunordpd %ymm11, %ymm12, %ymm13 - -// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00] - vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02] - vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01] - vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04] - vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06] - vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05] - vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12 -// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07] - vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12 - -// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13 -// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03] - vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13 - -// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08] - vcmpeq_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09] - vcmpngeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a] - vcmpngtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b] - vcmpfalseps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c] - vcmpneq_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d] - vcmpgeps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e] - vcmpgtps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f] - vcmptrueps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10] - vcmpeq_osps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11] - vcmplt_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12] - vcmple_oqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13] - vcmpunord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14] - vcmpneq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15] - vcmpnlt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16] - vcmpnle_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17] - vcmpord_sps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18] - vcmpeq_usps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19] - vcmpnge_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a] - vcmpngt_uqps %ymm11, %ymm12, %ymm13 - -// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b] - vcmpfalse_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 57005(,%riz), %rbx +// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00] + movq 57005(,%riz), %rbx -// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c] - vcmpneq_osps %ymm11, %ymm12, %ymm13 +// CHECK: movq 48879(,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00] + movq 48879(,%riz), %rax -// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d] - vcmpge_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq -4(,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff] + movq -4(,%riz,8), %rax -// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e] - vcmpgt_oqps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0x21] + movq (%rcx,%riz), %rax -// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13 -// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f] - vcmptrue_usps %ymm11, %ymm12, %ymm13 +// CHECK: movq (%rcx,%riz,8), %rax +// CHECK: encoding: [0x48,0x8b,0x04,0xe1] + movq (%rcx,%riz,8), %rax diff --git a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s new file mode 100644 index 0000000..d08a732 --- /dev/null +++ b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s @@ -0,0 +1,674 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc] + vfmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18] + vfmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc] + vfmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18] + vfmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc] + vfmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18] + vfmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc] + vfmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18] + vfmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc] + vfmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18] + vfmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc] + vfmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18] + vfmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc] + vfmaddsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18] + vfmaddsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc] + vfmaddsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18] + vfmaddsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc] + vfmaddsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18] + vfmaddsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc] + vfmaddsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18] + vfmaddsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc] + vfmaddsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18] + vfmaddsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc] + vfmaddsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmaddsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18] + vfmaddsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc] + vfmsubadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18] + vfmsubadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc] + vfmsubadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18] + vfmsubadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc] + vfmsubadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18] + vfmsubadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc] + vfmsubadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18] + vfmsubadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc] + vfmsubadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18] + vfmsubadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc] + vfmsubadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsubadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18] + vfmsubadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc] + vfmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18] + vfmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc] + vfmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18] + vfmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc] + vfmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18] + vfmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc] + vfmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18] + vfmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc] + vfmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18] + vfmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc] + vfmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18] + vfmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc] + vfnmadd132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18] + vfnmadd132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc] + vfnmadd132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18] + vfnmadd132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc] + vfnmadd213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18] + vfnmadd213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc] + vfnmadd213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18] + vfnmadd213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc] + vfnmadd231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18] + vfnmadd231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmadd231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc] + vfnmadd231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmadd231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18] + vfnmadd231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc] + vfnmsub132pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18] + vfnmsub132pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub132ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc] + vfnmsub132ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub132ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18] + vfnmsub132ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc] + vfnmsub213pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18] + vfnmsub213pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub213ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc] + vfnmsub213ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub213ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18] + vfnmsub213ps (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231pd %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc] + vfnmsub231pd %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231pd (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18] + vfnmsub231pd (%rax), %xmm10, %xmm11 + +// CHECK: vfnmsub231ps %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc] + vfnmsub231ps %xmm12, %xmm10, %xmm11 + +// CHECK: vfnmsub231ps (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18] + vfnmsub231ps (%rax), %xmm10, %xmm11 + +// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc] + vfmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18] + vfmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc] + vfmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18] + vfmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc] + vfmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18] + vfmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc] + vfmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18] + vfmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc] + vfmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18] + vfmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc] + vfmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18] + vfmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc] + vfmaddsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18] + vfmaddsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc] + vfmaddsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18] + vfmaddsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc] + vfmaddsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18] + vfmaddsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc] + vfmaddsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18] + vfmaddsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc] + vfmaddsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18] + vfmaddsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc] + vfmaddsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmaddsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18] + vfmaddsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc] + vfmsubadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18] + vfmsubadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc] + vfmsubadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18] + vfmsubadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc] + vfmsubadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18] + vfmsubadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc] + vfmsubadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18] + vfmsubadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc] + vfmsubadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18] + vfmsubadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc] + vfmsubadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsubadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18] + vfmsubadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc] + vfmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18] + vfmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc] + vfmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18] + vfmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc] + vfmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18] + vfmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc] + vfmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18] + vfmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc] + vfmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18] + vfmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc] + vfmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18] + vfmsub231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc] + vfnmadd132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18] + vfnmadd132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc] + vfnmadd132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18] + vfnmadd132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc] + vfnmadd213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18] + vfnmadd213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc] + vfnmadd213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18] + vfnmadd213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc] + vfnmadd231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18] + vfnmadd231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmadd231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc] + vfnmadd231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmadd231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18] + vfnmadd231ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc] + vfnmsub132pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18] + vfnmsub132pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub132ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc] + vfnmsub132ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub132ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18] + vfnmsub132ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc] + vfnmsub213pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18] + vfnmsub213pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub213ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc] + vfnmsub213ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub213ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18] + vfnmsub213ps (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231pd %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc] + vfnmsub231pd %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231pd (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18] + vfnmsub231pd (%rax), %ymm10, %ymm11 + +// CHECK: vfnmsub231ps %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc] + vfnmsub231ps %ymm12, %ymm10, %ymm11 + +// CHECK: vfnmsub231ps (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18] + vfnmsub231ps (%rax), %ymm10, %ymm11 + diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s index 1858441..9f94d84 100644 --- a/test/MC/AsmParser/X86/x86_64-new-encoder.s +++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s @@ -72,9 +72,9 @@ stosl // Not moffset forms of moves, they are x86-32 only! rdar://7947184 -movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] -movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] -movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] +movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00] // CHECK: pushfq # encoding: [0x9c] pushf @@ -150,3 +150,10 @@ btq $0x01,%rdx // CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00] movl %gs:124, %eax +// CHECK: jmpq *8(%rax) +// CHECK: encoding: [0xff,0x60,0x08] + jmp *8(%rax) + +// CHECK: btq $61, -216(%rbp) +// CHECK: encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d] + btq $61, -216(%rbp) diff --git a/test/MC/AsmParser/X86/x86_instruction_errors.s b/test/MC/AsmParser/X86/x86_instruction_errors.s new file mode 100644 index 0000000..183306b --- /dev/null +++ b/test/MC/AsmParser/X86/x86_instruction_errors.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err +// RUN: FileCheck < %t.err %s + +// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq') +cmp $0, 0(%eax) diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s index 4bc8a4b..a82d2a1 100644 --- a/test/MC/AsmParser/X86/x86_instructions.s +++ b/test/MC/AsmParser/X86/x86_instructions.s @@ -1,4 +1,6 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s +// RUN: llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err +// RUN: FileCheck < %t %s +// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s // CHECK: subb %al, %al subb %al, %al @@ -56,7 +58,7 @@ subl %eax, %ebx // FIXME: Check that this matches the correct instruction. -// CHECK: call *%rax +// CHECK: callq *%rax call *%rax // FIXME: Check that this matches the correct instruction. @@ -151,3 +153,23 @@ fadd %st(7) // CHECK: int3 INT3 + +// Allow scale factor without index register. +// CHECK: movaps %xmm3, (%esi) +// CHECK-STDERR: warning: scale factor without index register is ignored +movaps %xmm3, (%esi, 2) + +// CHECK: imull $12, %eax, %eax +imul $12, %eax + +// CHECK: imull %ecx, %eax +imull %ecx, %eax + +// PR8114 +// CHECK: outb %al, %dx +// CHECK: outw %ax, %dx +// CHECK: outl %eax, %dx + +out %al, (%dx) +out %ax, (%dx) +outl %eax, (%dx) diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s index bf958d8..ddadf79 100644 --- a/test/MC/AsmParser/X86/x86_operands.s +++ b/test/MC/AsmParser/X86/x86_operands.s @@ -1,5 +1,3 @@ -// FIXME: Actually test that we get the expected results. - // RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s # Immediates diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp index 64cb75b..a6d81da 100644 --- a/test/MC/AsmParser/dg.exp +++ b/test/MC/AsmParser/dg.exp @@ -1,4 +1,5 @@ load_lib llvm.exp -RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] - +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]] +} diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s index 3eb8e96..1fd1f6e 100644 --- a/test/MC/AsmParser/directive_abort.s +++ b/test/MC/AsmParser/directive_abort.s @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t # RUN: FileCheck -input-file %t %s -# CHECK: .abort "please stop assembing" -TEST0: - .abort "please stop assembing" +# CHECK: error: .abort 'please stop assembing' +TEST0: + .abort please stop assembing diff --git a/test/MC/AsmParser/directive_elf_size.s b/test/MC/AsmParser/directive_elf_size.s new file mode 100644 index 0000000..af35ae0 --- /dev/null +++ b/test/MC/AsmParser/directive_elf_size.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s + +a: + ret +.Lt: +# CHECK: .size a, .Lt-a + .size a, .Lt-a + diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s index beac69a..c7617a3 100644 --- a/test/MC/AsmParser/directive_values.s +++ b/test/MC/AsmParser/directive_values.s @@ -19,3 +19,20 @@ TEST2: # CHECK: .quad 9 TEST3: .quad 9 + + +# rdar://7997827 +TEST4: + .quad 0b0100 + .quad 4294967295 + .quad 4294967295+1 + .quad 4294967295LL+1 + .quad 0b10LL + 07ULL + 0x42AULL +# CHECK: TEST4 +# CHECK: .quad 4 +# CHECK: .quad 4294967295 +# CHECK: .quad 4294967296 +# CHECK: .quad 4294967296 +# CHECK: .quad 1075 + + diff --git a/test/MC/AsmParser/dollars-in-identifiers.s b/test/MC/AsmParser/dollars-in-identifiers.s new file mode 100644 index 0000000..e5695906 --- /dev/null +++ b/test/MC/AsmParser/dollars-in-identifiers.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s > %t +# RUN: FileCheck < %t %s + +// CHECK: .globl $foo +.globl $foo +// CHECK: .long ($foo) +.long ($foo) diff --git a/test/MC/AsmParser/macro-def-in-instantiation.s b/test/MC/AsmParser/macro-def-in-instantiation.s new file mode 100644 index 0000000..b6483b3 --- /dev/null +++ b/test/MC/AsmParser/macro-def-in-instantiation.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s + +.macro .make_macro +$0 $1 +$2 $3 +$4 +.endmacro + +.make_macro .macro,.mybyte,.byte,$0,.endmacro + +.data +// CHECK: .byte 10 +.mybyte 10 diff --git a/test/MC/AsmParser/macros-parsing.s b/test/MC/AsmParser/macros-parsing.s new file mode 100644 index 0000000..65f6454 --- /dev/null +++ b/test/MC/AsmParser/macros-parsing.s @@ -0,0 +1,23 @@ +// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.endmacro + +.macros_off +// CHECK-ERRORS: 9:1: warning: ignoring directive for now +.test0 +.macros_on + +.test0 + +// CHECK-ERRORS: macro '.test0' is already defined +.macro .test0 +.endmacro + +// CHECK-ERRORS: unexpected '.endmacro' in file +.endmacro + +// CHECK-ERRORS: no matching '.endmacro' in definition +.macro dummy + diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s new file mode 100644 index 0000000..214274d --- /dev/null +++ b/test/MC/AsmParser/macros.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err + +.macro .test0 +.macrobody0 +.endmacro +.macro .test1 +.test0 +.endmacro + +.test1 +// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now +// CHECK-ERRORS-NEXT: macrobody0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test0 +// CHECK-ERRORS-NEXT: ^ +// CHECK-ERRORS: 11:1: note: while in macro instantiation +// CHECK-ERRORS-NEXT: .test1 +// CHECK-ERRORS-NEXT: ^ + +.macro test2 +.byte $0 +.endmacro +test2 10 + +.macro test3 +.globl "$0 $1 $2 $$3 $n" +.endmacro + +// CHECK: .globl "1 23 $3 2" +test3 1,2 3 + +.macro test4 +.globl "$0 -- $1" +.endmacro + +// CHECK: .globl "ab)(,) -- (cd)" +test4 a b)(,),(cd) diff --git a/test/MC/COFF/basic-coff.ll b/test/MC/COFF/basic-coff.ll new file mode 100644 index 0000000..1e67db0 --- /dev/null +++ b/test/MC/COFF/basic-coff.ll @@ -0,0 +1,136 @@ +; This test checks that the COFF object emitter works for the most basic +; programs. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: coff-dump.py %abs_tmp | FileCheck %s +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +@.str = private constant [12 x i8] c"Hello World\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +; CHECK: { +; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C) +; CHECK: NumberOfSections = 2 +; CHECK: TimeDateStamp = {{[0-9]+}} +; CHECK: PointerToSymbolTable = 0x99 +; CHECK: NumberOfSymbols = 7 +; CHECK: SizeOfOptionalHeader = 0 +; CHECK: Characteristics = 0x0 +; CHECK: Sections = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 21 +; CHECK: PointerToRawData = 0x64 +; CHECK: PointerToRelocations = 0x79 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 2 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0x60500020 +; CHECK: IMAGE_SCN_CNT_CODE +; CHECK: IMAGE_SCN_ALIGN_16BYTES +; CHECK: IMAGE_SCN_MEM_EXECUTE +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: SectionData = +; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 31 |.....$.........1| +; CHECK: C0 83 C4 04 C3 |.....| +; CHECK: Relocations = [ +; CHECK: 0 = { +; CHECK: VirtualAddress = 0x6 +; CHECK: SymbolTableIndex = 5 +; CHECK: Type = IMAGE_REL_I386_DIR32 (6) +; CHECK: SymbolName = _main +; CHECK: } +; CHECK: 1 = { +; CHECK: VirtualAddress = 0xB +; CHECK: SymbolTableIndex = 6 +; CHECK: Type = IMAGE_REL_I386_REL32 (20) +; CHECK: SymbolName = L_.str +; CHECK: } +; CHECK: ] +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: VirtualSize = 0 +; CHECK: VirtualAddress = 0 +; CHECK: SizeOfRawData = 12 +; CHECK: PointerToRawData = 0x8D +; CHECK: PointerToRelocations = 0x0 +; CHECK: PointerToLineNumbers = 0x0 +; CHECK: NumberOfRelocations = 0 +; CHECK: NumberOfLineNumbers = 0 +; CHECK: Charateristics = 0xC0100040 +; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA +; CHECK: IMAGE_SCN_ALIGN_1BYTES +; CHECK: IMAGE_SCN_MEM_READ +; CHECK: IMAGE_SCN_MEM_WRITE +; CHECK: SectionData = +; CHECK: 48 65 6C 6C 6F 20 57 6F - 72 6C 64 00 |Hello World.| +; CHECK: Relocations = None +; CHECK: } +; CHECK: ] +; CHECK: Symbols = [ +; CHECK: 0 = { +; CHECK: Name = .text +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 1 = { +; CHECK: Name = .data +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 1 +; CHECK: AuxillaryData = +; CHECK: 0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................| +; CHECK: 00 00 |..| +; CHECK: } +; CHECK: 2 = { +; CHECK: Name = _main +; CHECK: Value = 0 +; CHECK: SectionNumber = 1 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 3 = { +; CHECK: Name = L_.str +; CHECK: Value = 0 +; CHECK: SectionNumber = 2 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: 4 = { +; CHECK: Name = _printf +; CHECK: Value = 0 +; CHECK: SectionNumber = 0 +; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0) +; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0) +; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2) +; CHECK: NumberOfAuxSymbols = 0 +; CHECK: AuxillaryData = +; CHECK: } +; CHECK: ] +; CHECK: } diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp new file mode 100644 index 0000000..7b7bd4e --- /dev/null +++ b/test/MC/COFF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/MC/COFF/switch-relocations.ll b/test/MC/COFF/switch-relocations.ll new file mode 100644 index 0000000..300c107 --- /dev/null +++ b/test/MC/COFF/switch-relocations.ll @@ -0,0 +1,34 @@ +; The purpose of this test is to see if the COFF object writer can properly +; relax the fixups that are created for jump tables on x86-64. See PR7960. + +; This test case was reduced from Lua/lapi.c. + +; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t +; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t + +define void @lua_gc(i32 %what) nounwind { +entry: + switch i32 %what, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb + i32 3, label %sw.bb14 + i32 4, label %sw.bb18 + i32 6, label %sw.bb57 + ] + +sw.bb: ; preds = %entry, %entry, %entry + ret void + +sw.bb14: ; preds = %entry + ret void + +sw.bb18: ; preds = %entry + ret void + +sw.bb57: ; preds = %entry + ret void + +sw.epilog: ; preds = %entry + ret void +} diff --git a/test/MC/COFF/symbol-fragment-offset.ll b/test/MC/COFF/symbol-fragment-offset.ll new file mode 100644 index 0000000..af7ace1 --- /dev/null +++ b/test/MC/COFF/symbol-fragment-offset.ll @@ -0,0 +1,182 @@ +; The purpose of this test is to see if the COFF object writer is emitting the
+; proper relocations for multiple pieces of data in a single data fragment.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: coff-dump.py %abs_tmp | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+@.str = private constant [7 x i8] c"Hello \00" ; <[7 x i8]*> [#uses=1]
+@str = internal constant [7 x i8] c"World!\00" ; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+; CHECK: {
+; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C)
+; CHECK: NumberOfSections = 2
+; CHECK: TimeDateStamp = {{[0-9]+}}
+; CHECK: PointerToSymbolTable = 0xBB
+; CHECK: NumberOfSymbols = 9
+; CHECK: SizeOfOptionalHeader = 0
+; CHECK: Characteristics = 0x0
+; CHECK: Sections = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 33
+; CHECK: PointerToRawData = 0x64
+; CHECK: PointerToRelocations = 0x85
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 4
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0x60500020
+; CHECK: IMAGE_SCN_CNT_CODE
+; CHECK: IMAGE_SCN_ALIGN_16BYTES
+; CHECK: IMAGE_SCN_MEM_EXECUTE
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: SectionData =
+; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+; CHECK: 04 24 00 00 00 00 E8 00 - 00 00 00 31 C0 83 C4 04 |.$.........1....|
+; CHECK: C3 |.|
+
+; CHECK: Relocations = [
+; CHECK: 0 = {
+; CHECK: VirtualAddress = 0x6
+; CHECK: SymbolTableIndex = 5
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _main
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: VirtualAddress = 0xB
+; CHECK: SymbolTableIndex = 6
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = L_.str
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: VirtualAddress = 0x12
+; CHECK: SymbolTableIndex = 7
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _printf
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: VirtualAddress = 0x17
+; CHECK: SymbolTableIndex = 8
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = _str
+; CHECK: }
+; CHECK: ]
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 14
+; CHECK: PointerToRawData = 0xAD
+; CHECK: PointerToRelocations = 0x0
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 0
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0xC0100040
+; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK: IMAGE_SCN_ALIGN_1BYTES
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: IMAGE_SCN_MEM_WRITE
+; CHECK: SectionData =
+; CHECK: 48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 |Hello .World!.|
+
+; CHECK: Relocations = None
+; CHECK: }
+; CHECK: ]
+; CHECK: Symbols = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 21 00 00 00 04 00 00 00 - 00 00 00 00 01 00 00 00 |!...............|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 0E 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: Name = _main
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: Name = L_.str
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 4 = {
+; CHECK: Name = _printf
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 5 = {
+; CHECK: Name = _str
+; CHECK: Value = 7
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 6 = {
+; CHECK: Name = _puts
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: ]
+; CHECK: }
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt index a1e229c..0b4c297 100644 --- a/test/MC/Disassembler/arm-tests.txt +++ b/test/MC/Disassembler/arm-tests.txt @@ -12,9 +12,21 @@ # CHECK: cmn r0, #1 0x01 0x00 0x70 0xe3 +# CHECK: dmb +0x5f 0xf0 0x7f 0xf5 + # CHECK: dmb nshst 0x56 0xf0 0x7f 0xf5 +# CHECK: dsb +0x4f 0xf0 0x7f 0xf5 + +# CHECK: dsb st +0x4e 0xf0 0x7f 0xf5 + +# CHECK: isb +0x6f 0xf0 0x7f 0xf5 + # CHECK: ldclvc p5, cr15, [r8], #-0 0x00 0xf5 0x78 0x7c @@ -42,9 +54,17 @@ # CHECK: mvnpls r7, #245, 2 0xf5 0x71 0xf0 0x53 +# CHECK-NOT: orr r7, r8, r7, rrx #0 +# CHECK: orr r7, r8, r7, rrx +0x67 0x70 0x88 0xe1 + # CHECK: pkhbt r8, r9, r10, lsl #4 0x1a 0x82 0x89 0xe6 +# CHECK-NOT: pkhbtls pc, r11, r11, lsl #0 +# CHECK: pkhbtls pc, r11, r11 +0x1b 0xf0 0x8b 0x96 + # CHECK: pop {r0, r2, r4, r6, r8, r10} 0x55 0x05 0xbd 0xe8 @@ -57,6 +77,14 @@ # CHECK: rfedb r0! 0x00 0x0a 0x30 0xf9 +# CHECK-NOT: rsbeq r0, r2, r0, lsl #0 +# CHECK: rsbeq r0, r2, r0 +0x00 0x00 0x62 0x00 + +# CHECK-NOT: rsceqs r0, r0, r1, lsl #0 +# CHECK: rsceqs r0, r0, r1 +0x01 0x00 0xf0 0x00 + # CHECK: sbcs r0, pc, #1 0x01 0x00 0xdf 0xe2 @@ -66,6 +94,10 @@ # CHECK: ssat r8, #1, r10, lsl #8 0x1a 0x84 0xa0 0xe6 +# CHECK-NOT: ssatmi r0, #17, r12, lsl #0 +# CHECK: ssatmi r0, #17, r12 +0x1c 0x00 0xb0 0x46 + # CHECK: stmdb r10!, {r4, r5, r6, r7, lr} 0xf0 0x40 0x2a 0xe9 @@ -75,3 +107,5 @@ # CHECK: ubfx r0, r0, #16, #1 0x50 0x08 0xe0 0xe7 +# CHECK: usat r8, #0, r10, asr #32 +0x5a 0x80 0xe0 0xe6 diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt index 51b31e7..826ff22 100644 --- a/test/MC/Disassembler/neon-tests.txt +++ b/test/MC/Disassembler/neon-tests.txt @@ -25,6 +25,9 @@ # CHECK: vmov.i64 q6, #0xFF00FF00FF 0x75 0xce 0x81 0xf2 +# CHECK: vmvn.i32 d0, #0x0 +0x30 0x00 0x80 0xf2 + # CHECK: vmul.f32 d0, d0, d6 0x16 0x0d 0x00 0xf3 diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt index 14e9129..06d12fe 100644 --- a/test/MC/Disassembler/thumb-tests.txt +++ b/test/MC/Disassembler/thumb-tests.txt @@ -42,6 +42,10 @@ # CHECK: pkhtb r2, r4, r6, asr #16 0xc4 0xea 0x26 0x42 +# CHECK-NOT: pkhbt r2, r4, r6, lsl #0 +# CHECK: pkhbt r2, r4, r6 +0xc4 0xea 0x06 0x02 + # CHECK: pop {r2, r4, r6, r8, r10, r12} 0xbd 0xe8 0x54 0x15 @@ -51,6 +55,14 @@ # CHECK: rsbs r0, r0, #0 0x40 0x42 +# CHECK-NOT: rsb r0, r2, r0, lsl #0 +# CHECK: rsb r0, r2, r0 +0xc2 0xeb 0x00 0x00 + +# CHECK-NOT: ssat r0, #17, r12, lsl #0 +# CHECK: ssat r0, #17, r12 +0x0c 0xf3 0x10 0x00 + # CHECK: strd r0, [r7, #64] 0xc7 0xe9 0x10 0x01 diff --git a/test/MC/ELF/bss.ll b/test/MC/ELF/bss.ll new file mode 100644 index 0000000..5112d2c --- /dev/null +++ b/test/MC/ELF/bss.ll @@ -0,0 +1,8 @@ +; RUN: llc -filetype=obj %s -o %t +; FIXME: Add ELF dumping tool to check results. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@g0 = global i8* null, align 4 ; <i8**> [#uses=0] + diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp new file mode 100644 index 0000000..7b7bd4e --- /dev/null +++ b/test/MC/ELF/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target X86] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]] +} diff --git a/test/Makefile b/test/Makefile index f6830e6..7ca46be 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,11 +10,11 @@ LEVEL = .. DIRS = -# -# Make Dejagnu the default for testing -# all:: check-local +# 'lit' is the default test runner. +check-local:: check-local-lit + # Include other test rules include Makefile.tests @@ -84,18 +84,18 @@ else # !SunOS ifeq ($(HOST_OS),AuroraUX) ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; else # !AuroraUX -# Fedora 13 x86-64 python fails with -v 51200 -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 768000 ; +# Fedora 13 x86-64 python fails with -v 76800 +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ; endif # AuroraUX endif # SunOS ifneq ($(RUNTEST),) -check-local:: site.exp +check-local-dg:: site.exp ( $(ULIMIT) \ PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \ $(RUNTEST) $(RUNTESTFLAGS) ) else -check-local:: site.exp +check-local-dg:: site.exp @echo "*** dejagnu not found. Make sure 'runtest' is in your PATH, then reconfigure LLVM." endif @@ -107,26 +107,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs ( $(ULIMIT) \ $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) ) -ifdef TESTONE -CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE)) -CLEANED_TESTONE := $(patsubst test/%,%,$(CLEANED_TESTONE)) -SUBDIR := $(shell dirname $(CLEANED_TESTONE)) -TESTPATH := $(LLVM_SRC_ROOT)/test/$(CLEANED_TESTONE) -check-one: site.exp $(TCLSH) - $(Verb)( echo "source $(LLVM_OBJ_ROOT)/test/site.exp" ; \ - echo "set subdir $(SUBDIR)" ; \ - echo "proc pass { msg } { puts \"PASS: \$$msg\" } "; \ - echo "proc fail { msg } { puts \"FAIL: \$$msg\" }" ; \ - echo "proc xfail { msg } { puts \"XFAIL: \$$msg\" }" ; \ - echo "proc xpass { msg } { puts \"XPASS: \$$msg\" }" ; \ - echo "proc verbose args { }" ; \ - echo "source $(LLVM_SRC_ROOT)/test/lib/llvm.exp" ; \ - echo "RunLLVMTests $(TESTPATH)" ) | \ - ( $(ULIMIT) \ - PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \ - $(TCLSH) ) -endif - clean:: $(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print` @@ -166,7 +146,7 @@ site.exp: FORCE @echo 'set gccpath "$(CC)"' >>site.tmp @echo 'set gxxpath "$(CXX)"' >>site.tmp @echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp - @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >> site.tmp + @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp @echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp @echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp @@ -203,6 +183,3 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \ $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@ -# Daniel hates Chris. -chris-lit: - make check-lit LIT_ARGS='-j16 -s' diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll new file mode 100644 index 0000000..60fab3d --- /dev/null +++ b/test/Other/close-stderr.ll @@ -0,0 +1,9 @@ +; RUN: sh -c "\ +; RUN: opt --reject-this-option 2>&-; echo \$?; \ +; RUN: opt -o /dev/null /dev/null 2>&-; echo \$?; \ +; RUN: " | FileCheck %s +; CHECK: {{^1$}} +; CHECK: {{^0$}} + +; Test that the error handling when writing to stderr fails exits the +; program cleanly rather than aborting. diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll index ecef9c4..926bdbc 100644 --- a/test/Other/constant-fold-gep.ll +++ b/test/Other/constant-fold-gep.ll @@ -71,8 +71,6 @@ ; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; PLAIN: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; PLAIN: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) @@ -82,8 +80,6 @@ ; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) ; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) ; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) -; OPT: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) ; TO: @a = constant i64 18480 ; TO: @b = constant i64 8 ; TO: @c = constant i64 16 @@ -93,8 +89,6 @@ ; TO: @g = constant i64 8 ; TO: @h = constant i64 8 ; TO: @i = constant i64 8 -; TO: @j = constant i64 8 -; TO: @k = constant i64 8 @a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) @b = constant i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64) @@ -105,8 +99,6 @@ @g = constant i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64) @h = constant i64 ptrtoint (double** getelementptr (double** null, i64 1) to i64) @i = constant i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) -@j = constant i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) -@k = constant i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) ; The target-dependent folder should cast GEP indices to integer-sized pointers. @@ -275,14 +267,6 @@ define i1* @hoo1() nounwind { ; PLAIN: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; PLAIN: ret i64 %t ; PLAIN: } -; PLAIN: define i64 @fj() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } -; PLAIN: define i64 @fk() nounwind { -; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; PLAIN: ret i64 %t -; PLAIN: } ; OPT: define i64 @fa() nounwind { ; OPT: ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) ; OPT: } @@ -310,12 +294,6 @@ define i1* @hoo1() nounwind { ; OPT: define i64 @fi() nounwind { ; OPT: ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) ; OPT: } -; OPT: define i64 @fj() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) -; OPT: } -; OPT: define i64 @fk() nounwind { -; OPT: ret i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) -; OPT: } ; TO: define i64 @fa() nounwind { ; TO: ret i64 18480 ; TO: } @@ -343,12 +321,6 @@ define i1* @hoo1() nounwind { ; TO: define i64 @fi() nounwind { ; TO: ret i64 8 ; TO: } -; TO: define i64 @fj() nounwind { -; TO: ret i64 8 -; TO: } -; TO: define i64 @fk() nounwind { -; TO: ret i64 8 -; TO: } ; SCEV: Classifying expressions for: @fa ; SCEV: %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64 ; SCEV: --> (2310 * sizeof(double)) @@ -376,12 +348,6 @@ define i1* @hoo1() nounwind { ; SCEV: Classifying expressions for: @fi ; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64 ; SCEV: --> alignof(i1*) -; SCEV: Classifying expressions for: @fj -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(double) -; SCEV: Classifying expressions for: @fk -; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64 -; SCEV: --> sizeof(double) define i64 @fa() nounwind { %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 @@ -419,14 +385,6 @@ define i64 @fi() nounwind { %t = bitcast i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) to i64 ret i64 %t } -define i64 @fj() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fk() nounwind { - %t = bitcast i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) to i64 - ret i64 %t -} ; PLAIN: define i64* @fM() nounwind { ; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/inline-asm-newline-terminator.ll index f6cc5c1..af93cc0 100644 --- a/test/Other/inline-asm-newline-terminator.ll +++ b/test/Other/inline-asm-newline-terminator.ll @@ -1,5 +1,4 @@ ; RUN: llc -filetype=obj -o - < %s -; XFAIL: vg_leak ; ModuleID = 't.c' target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/Other/lint.ll b/test/Other/lint.ll index dee3d11..fcef7ee 100644 --- a/test/Other/lint.ll +++ b/test/Other/lint.ll @@ -161,5 +161,7 @@ declare i32 @nonstruct_callee() nounwind define void @struct_caller() nounwind { entry: call %struct bitcast (i32 ()* @foo to %struct ()*)() - ret void + + ; CHECK: Undefined behavior: indirectbr with no destinations + indirectbr i8* null, [] } diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py new file mode 100755 index 0000000..0af3d36 --- /dev/null +++ b/test/Scripts/coff-dump.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python +#===-- coff-dump.py - COFF object file dump utility-------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +# +# COFF File Definition +# + +def string_table_entry (offset): + return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s')) + +def secname(value): + if value[0] == '/': + return string_table_entry (value [1:].rstrip('\0')) + else: + return '%s' + +def symname(value): + parts = struct.unpack("<2L", value) + if parts [0] == 0: + return string_table_entry (parts [1]) + else: + return '%s' + +file = ('struct', [ + ('MachineType', ('enum', '<H', '0x%X', { + 0x0: 'IMAGE_FILE_MACHINE_UNKNOWN', + 0x1d3: 'IMAGE_FILE_MACHINE_AM33', + 0x8664: 'IMAGE_FILE_MACHINE_AMD64', + 0x1c0: 'IMAGE_FILE_MACHINE_ARM', + 0xebc: 'IMAGE_FILE_MACHINE_EBC', + 0x14c: 'IMAGE_FILE_MACHINE_I386', + 0x200: 'IMAGE_FILE_MACHINE_IA64', + 0x904: 'IMAGE_FILE_MACHINE_M32R', + 0x266: 'IMAGE_FILE_MACHINE_MIPS16', + 0x366: 'IMAGE_FILE_MACHINE_MIPSFPU', + 0x466: 'IMAGE_FILE_MACHINE_MIPSFPU16', + 0x1f0: 'IMAGE_FILE_MACHINE_POWERPC', + 0x1f1: 'IMAGE_FILE_MACHINE_POWERPCFP', + 0x166: 'IMAGE_FILE_MACHINE_R4000', + 0x1a2: 'IMAGE_FILE_MACHINE_SH3', + 0x1a3: 'IMAGE_FILE_MACHINE_SH3DSP', + 0x1a6: 'IMAGE_FILE_MACHINE_SH4', + 0x1a8: 'IMAGE_FILE_MACHINE_SH5', + 0x1c2: 'IMAGE_FILE_MACHINE_THUMB', + 0x169: 'IMAGE_FILE_MACHINE_WCEMIPSV2', + })), + ('NumberOfSections', ('scalar', '<H', '%d')), + ('TimeDateStamp', ('scalar', '<L', '%d')), + ('PointerToSymbolTable', ('scalar', '<L', '0x%0X')), + ('NumberOfSymbols', ('scalar', '<L', '%d')), + ('SizeOfOptionalHeader', ('scalar', '<H', '%d')), + ('Characteristics', ('flags', '<H', '0x%x', [ + (0x0001, 'IMAGE_FILE_RELOCS_STRIPPED', ), + (0x0002, 'IMAGE_FILE_EXECUTABLE_IMAGE', ), + (0x0004, 'IMAGE_FILE_LINE_NUMS_STRIPPED', ), + (0x0008, 'IMAGE_FILE_LOCAL_SYMS_STRIPPED', ), + (0x0010, 'IMAGE_FILE_AGGRESSIVE_WS_TRIM', ), + (0x0020, 'IMAGE_FILE_LARGE_ADDRESS_AWARE', ), + (0x0080, 'IMAGE_FILE_BYTES_REVERSED_LO', ), + (0x0100, 'IMAGE_FILE_32BIT_MACHINE', ), + (0x0200, 'IMAGE_FILE_DEBUG_STRIPPED', ), + (0x0400, 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ), + (0x0800, 'IMAGE_FILE_NET_RUN_FROM_SWAP', ), + (0x1000, 'IMAGE_FILE_SYSTEM', ), + (0x2000, 'IMAGE_FILE_DLL', ), + (0x4000, 'IMAGE_FILE_UP_SYSTEM_ONLY', ), + (0x8000, 'IMAGE_FILE_BYTES_REVERSED_HI', ), + ])), + ('Sections', ('array', 'NumberOfSections', ('struct', [ + ('Name', ('scalar', '<8s', secname)), + ('VirtualSize', ('scalar', '<L', '%d' )), + ('VirtualAddress', ('scalar', '<L', '%d' )), + ('SizeOfRawData', ('scalar', '<L', '%d' )), + ('PointerToRawData', ('scalar', '<L', '0x%X' )), + ('PointerToRelocations', ('scalar', '<L', '0x%X' )), + ('PointerToLineNumbers', ('scalar', '<L', '0x%X' )), + ('NumberOfRelocations', ('scalar', '<H', '%d' )), + ('NumberOfLineNumbers', ('scalar', '<H', '%d' )), + ('Charateristics', ('flags', '<L', '0x%X', [ + (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'), + (0x00000020, 'IMAGE_SCN_CNT_CODE'), + (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'), + (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'), + (0x00000100, 'IMAGE_SCN_LNK_OTHER'), + (0x00000200, 'IMAGE_SCN_LNK_INFO'), + (0x00000800, 'IMAGE_SCN_LNK_REMOVE'), + (0x00001000, 'IMAGE_SCN_LNK_COMDAT'), + (0x00008000, 'IMAGE_SCN_GPREL'), + (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'), + (0x00020000, 'IMAGE_SCN_MEM_16BIT'), + (0x00040000, 'IMAGE_SCN_MEM_LOCKED'), + (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'), + (0x00F00000, 'IMAGE_SCN_ALIGN', { + 0x00100000: 'IMAGE_SCN_ALIGN_1BYTES', + 0x00200000: 'IMAGE_SCN_ALIGN_2BYTES', + 0x00300000: 'IMAGE_SCN_ALIGN_4BYTES', + 0x00400000: 'IMAGE_SCN_ALIGN_8BYTES', + 0x00500000: 'IMAGE_SCN_ALIGN_16BYTES', + 0x00600000: 'IMAGE_SCN_ALIGN_32BYTES', + 0x00700000: 'IMAGE_SCN_ALIGN_64BYTES', + 0x00800000: 'IMAGE_SCN_ALIGN_128BYTES', + 0x00900000: 'IMAGE_SCN_ALIGN_256BYTES', + 0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES', + 0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES', + 0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES', + 0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES', + 0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES', + }), + (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'), + (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'), + (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'), + (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'), + (0x10000000, 'IMAGE_SCN_MEM_SHARED'), + (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'), + (0x40000000, 'IMAGE_SCN_MEM_READ'), + (0x80000000, 'IMAGE_SCN_MEM_WRITE'), + ])), + ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))), + ('Relocations', ('ptr', 'PointerToRelocations', ('array', 'NumberOfRelocations', ('struct', [ + ('VirtualAddress', ('scalar', '<L', '0x%X')), + ('SymbolTableIndex', ('scalar', '<L', '%d' )), + ('Type', ('enum', '<H', '%d', ('MachineType', { + 0x14c: { + 0x0000: 'IMAGE_REL_I386_ABSOLUTE', + 0x0001: 'IMAGE_REL_I386_DIR16', + 0x0002: 'IMAGE_REL_I386_REL16', + 0x0006: 'IMAGE_REL_I386_DIR32', + 0x0007: 'IMAGE_REL_I386_DIR32NB', + 0x0009: 'IMAGE_REL_I386_SEG12', + 0x000A: 'IMAGE_REL_I386_SECTION', + 0x000B: 'IMAGE_REL_I386_SECREL', + 0x000C: 'IMAGE_REL_I386_TOKEN', + 0x000D: 'IMAGE_REL_I386_SECREL7', + 0x0014: 'IMAGE_REL_I386_REL32', + }, + 0x8664: { + 0x0000: 'IMAGE_REL_AMD64_ABSOLUTE', + 0x0001: 'IMAGE_REL_AMD64_ADDR64', + 0x0002: 'IMAGE_REL_AMD64_ADDR32', + 0x0003: 'IMAGE_REL_AMD64_ADDR32NB', + 0x0004: 'IMAGE_REL_AMD64_REL32', + 0x0005: 'IMAGE_REL_AMD64_REL32_1', + 0x0006: 'IMAGE_REL_AMD64_REL32_2', + 0x0007: 'IMAGE_REL_AMD64_REL32_3', + 0x0008: 'IMAGE_REL_AMD64_REL32_4', + 0x0009: 'IMAGE_REL_AMD64_REL32_5', + 0x000A: 'IMAGE_REL_AMD64_SECTION', + 0x000B: 'IMAGE_REL_AMD64_SECREL', + 0x000C: 'IMAGE_REL_AMD64_SECREL7', + 0x000D: 'IMAGE_REL_AMD64_TOKEN', + 0x000E: 'IMAGE_REL_AMD64_SREL32', + 0x000F: 'IMAGE_REL_AMD64_PAIR', + 0x0010: 'IMAGE_REL_AMD64_SSPAN32', + }, + }))), + ('SymbolName', ('ptr', '+ PointerToSymbolTable * - SymbolTableIndex 1 18', ('scalar', '<8s', symname))) + ])))), + ]))), + ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '* NumberOfSymbols 18', ('struct', [ + ('Name', ('scalar', '<8s', symname)), + ('Value', ('scalar', '<L', '%d' )), + ('SectionNumber', ('scalar', '<H', '%d' )), + ('SimpleType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_TYPE_NULL', + 1: 'IMAGE_SYM_TYPE_VOID', + 2: 'IMAGE_SYM_TYPE_CHAR', + 3: 'IMAGE_SYM_TYPE_SHORT', + 4: 'IMAGE_SYM_TYPE_INT', + 5: 'IMAGE_SYM_TYPE_LONG', + 6: 'IMAGE_SYM_TYPE_FLOAT', + 7: 'IMAGE_SYM_TYPE_DOUBLE', + 8: 'IMAGE_SYM_TYPE_STRUCT', + 9: 'IMAGE_SYM_TYPE_UNION', + 10: 'IMAGE_SYM_TYPE_ENUM', + 11: 'IMAGE_SYM_TYPE_MOE', + 12: 'IMAGE_SYM_TYPE_BYTE', + 13: 'IMAGE_SYM_TYPE_WORD', + 14: 'IMAGE_SYM_TYPE_UINT', + 15: 'IMAGE_SYM_TYPE_DWORD', + })), + ('ComplexType', ('enum', '<B', '%d', { + 0: 'IMAGE_SYM_DTYPE_NULL', + 1: 'IMAGE_SYM_DTYPE_POINTER', + 2: 'IMAGE_SYM_DTYPE_FUNCTION', + 3: 'IMAGE_SYM_DTYPE_ARRAY', + })), + ('StorageClass', ('enum', '<B', '%d', { + -1: 'IMAGE_SYM_CLASS_END_OF_FUNCTION', + 0: 'IMAGE_SYM_CLASS_NULL', + 1: 'IMAGE_SYM_CLASS_AUTOMATIC', + 2: 'IMAGE_SYM_CLASS_EXTERNAL', + 3: 'IMAGE_SYM_CLASS_STATIC', + 4: 'IMAGE_SYM_CLASS_REGISTER', + 5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF', + 6: 'IMAGE_SYM_CLASS_LABEL', + 7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL', + 8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT', + 9: 'IMAGE_SYM_CLASS_ARGUMENT', + 10: 'IMAGE_SYM_CLASS_STRUCT_TAG', + 11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION', + 12: 'IMAGE_SYM_CLASS_UNION_TAG', + 13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION', + 14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC', + 15: 'IMAGE_SYM_CLASS_ENUM_TAG', + 16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM', + 17: 'IMAGE_SYM_CLASS_REGISTER_PARAM', + 18: 'IMAGE_SYM_CLASS_BIT_FIELD', + 100: 'IMAGE_SYM_CLASS_BLOCK', + 101: 'IMAGE_SYM_CLASS_FUNCTION', + 102: 'IMAGE_SYM_CLASS_END_OF_STRUCT', + 103: 'IMAGE_SYM_CLASS_FILE', + 104: 'IMAGE_SYM_CLASS_SECTION', + 105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL', + 107: 'IMAGE_SYM_CLASS_CLR_TOKEN', + })), + ('NumberOfAuxSymbols', ('scalar', '<B', '%d' )), + ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')), + ])))), +]) + +# +# Definition Interpreter +# + +import sys, types, struct, re + +Input = None +Stack = [] +Fields = {} + +Indent = 0 +NewLine = True + +def indent(): + global Indent + Indent += 1 + +def dedent(): + global Indent + Indent -= 1 + +def write(input): + global NewLine + output = "" + + for char in input: + + if NewLine: + output += Indent * ' ' + NewLine = False + + output += char + + if char == '\n': + NewLine = True + + sys.stdout.write (output) + +def read(format): + return struct.unpack (format, Input.read(struct.calcsize(format))) + +def read_cstr (): + output = "" + while True: + char = Input.read (1) + if len (char) == 0: + raise RuntimeError ("EOF while reading cstr") + if char == '\0': + break + output += char + return output + +def push_pos(seek_to = None): + Stack [0:0] = [Input.tell ()] + if seek_to: + Input.seek (seek_to) + +def pop_pos(): + assert(len (Stack) > 0) + Input.seek (Stack [0]) + del Stack [0] + +def print_binary_data(size): + value = "" + while size > 0: + if size >= 16: + data = Input.read(16) + size -= 16 + else: + data = Input.read(size) + size = 0 + value += data + bytes = "" + text = "" + for index in xrange (16): + if index < len (data): + if index == 8: + bytes += "- " + ch = ord (data [index]) + bytes += "%02X " % ch + if ch >= 0x20 and ch <= 0x7F: + text += data [index] + else: + text += "." + else: + if index == 8: + bytes += " " + bytes += " " + + write ("%s|%s|\n" % (bytes, text)) + return value + +idlit = re.compile ("[a-zA-Z][a-zA-Z0-9_-]*") +numlit = re.compile ("[0-9]+") + +def read_value(expr): + + input = iter (expr.split ()) + + def eval(): + + token = input.next () + + if expr == 'cstr': + return read_cstr () + if expr == 'true': + return True + if expr == 'false': + return False + + if len (token) > 1 and token [0] in ('=', '@', '<', '!', '>'): + val = read(expr) + assert (len (val) == 1) + return val [0] + + if token == '+': + return eval () + eval () + if token == '-': + return eval () - eval () + if token == '*': + return eval () * eval () + if token == '/': + return eval () / eval () + + if idlit.match (token): + return Fields [token] + if numlit.match (token): + return int (token) + + raise RuntimeError ("unexpected token %s" % repr(token)) + + value = eval () + + try: + input.next () + except StopIteration: + return value + raise RuntimeError("unexpected input at end of expression") + +def write_value(format,value): + format_type = type (format) + if format_type is types.StringType: + write (format%value) + elif format_type is types.FunctionType: + write_value (format (value), value) + elif format_type is types.TupleType: + Fields ['this'] = value + handle_element (format) + else: + raise RuntimeError("unexpected type: %s" % repr(format_type)) + +def handle_scalar(entry): + iformat = entry [1] + oformat = entry [2] + + value = read_value (iformat) + + write_value (oformat, value) + + return value + +def handle_enum(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + if type (definitions) is types.TupleType: + selector = read_value (definitions [0]) + definitions = definitions [1] [selector] + + if value in definitions: + description = definitions[value] + else: + description = "unknown" + + write ("%s (" % description) + write_value (oformat, value) + write (")") + + return value + +def handle_flags(entry): + iformat = entry [1] + oformat = entry [2] + definitions = entry [3] + + value = read_value (iformat) + + write_value (oformat, value) + + indent () + for entry in definitions: + mask = entry [0] + name = entry [1] + if len (entry) == 3: + map = entry [2] + selection = value & mask + if selection in map: + write("\n%s" % map[selection]) + else: + write("\n%s <%d>" % (name, selection)) + elif len (entry) == 2: + if value & mask != 0: + write("\n%s" % name) + dedent () + + return value + +def handle_struct(entry): + global Fields + members = entry [1] + + newFields = {} + + write ("{\n"); + indent () + + for member in members: + name = member [0] + type = member [1] + + write("%s = "%name.ljust(24)) + + value = handle_element(type) + + write("\n") + + Fields [name] = value + newFields [name] = value + + dedent () + write ("}") + + return newFields + +def handle_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + + for index in xrange (value): + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + + dedent () + write ("]") + + return newItems + +def handle_byte_array(entry): + length = entry [1] + element = entry [2] + + newItems = [] + + write ("[\n") + indent () + + value = read_value (length) + end_of_array = Input.tell () + value + + index = 0 + while Input.tell () < end_of_array: + write ("%d = "%index) + value = handle_element(element) + write ("\n") + newItems.append (value) + index += 1 + + dedent () + write ("]") + + return newItems + +def handle_ptr(entry): + offset = entry[1] + element = entry [2] + + value = None + offset = read_value (offset) + + if offset != 0: + + push_pos (offset) + + value = handle_element (element) + + pop_pos () + + else: + write ("None") + + return value + +def handle_blob(entry): + length = entry [1] + + write ("\n") + indent () + + value = print_binary_data (read_value (length)) + + dedent () + + return value + +def handle_element(entry): + handlers = { + 'struct': handle_struct, + 'scalar': handle_scalar, + 'enum': handle_enum, + 'flags': handle_flags, + 'ptr': handle_ptr, + 'blob': handle_blob, + 'array': handle_array, + 'byte-array': handle_byte_array, + } + + if not entry [0] in handlers: + raise RuntimeError ("unexpected type '%s'" % str (entry[0])) + + return handlers [entry [0]] (entry) + +Input = open (sys.argv [1], "rb") +try: + handle_element (file) +finally: + Input.close () + Input = None diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat new file mode 100644 index 0000000..cc83eba --- /dev/null +++ b/test/Scripts/coff-dump.py.bat @@ -0,0 +1,4 @@ +@echo off + +%PYTHON_EXECUTABLE% %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9 + diff --git a/test/TableGen/FieldAccess.td b/test/TableGen/FieldAccess.td new file mode 100644 index 0000000..ad652e7 --- /dev/null +++ b/test/TableGen/FieldAccess.td @@ -0,0 +1,14 @@ +// RUN: tblgen %s +class Bla<string t> +{ + string blu = t; +} + +class Bli<Bla t> +{ + Bla bla = t; +} + +def a : Bli<Bla<"">>; +def b : Bla<!cast<Bla>(a.bla).blu>; // works +def c : Bla<a.bla.blu>; // doesn't work: Cannot access field 'blu' of value 'a.bla' diff --git a/test/TableGen/ListManip.td b/test/TableGen/ListManip.td new file mode 100644 index 0000000..c221bb1 --- /dev/null +++ b/test/TableGen/ListManip.td @@ -0,0 +1,10 @@ +// RUN: tblgen %s +class Bli<string _t> +{ + string t = _t; +} + +class Bla<list<Bli> _bli> +: Bli<!car(_bli).t> +{ +} diff --git a/test/TestRunner.sh b/test/TestRunner.sh index 4f04d81..ab50856 100755 --- a/test/TestRunner.sh +++ b/test/TestRunner.sh @@ -1,36 +1,5 @@ #!/bin/sh -# -# TestRunner.sh - This script is used to run the deja-gnu tests exactly like -# deja-gnu does, by executing the Tcl script specified in the test case's -# RUN: lines. This is made possible by a simple make target supported by the -# test/Makefile. All this script does is invoke that make target. -# -# Usage: -# TestRunner.sh {script_names} -# -# This script is typically used by cd'ing to a test directory and then -# running TestRunner.sh with a list of test file names you want to run. -# -TESTPATH=`pwd` -SUBDIR="" -if test `dirname $1` = "." ; then - while test `basename $TESTPATH` != "test" -a ! -z "$TESTPATH" ; do - tmp=`basename $TESTPATH` - SUBDIR="$tmp/$SUBDIR" - TESTPATH=`dirname $TESTPATH` - done -fi +# Deprecated, use 'llvm-lit'. -for TESTFILE in "$@" ; do - if test `dirname $TESTFILE` = . ; then - if test -d "$TESTPATH" ; then - cd $TESTPATH - make check-one TESTONE="$SUBDIR$TESTFILE" - cd $PWD - else - echo "Can't find llvm/test directory in " `pwd` - fi - else - make check-one TESTONE=$TESTFILE - fi -done +echo "warning: '$0' is deprecated, use 'llvm-lit' instead." +exec llvm-lit "$@" diff --git a/test/Transforms/ABCD/basic.ll b/test/Transforms/ABCD/basic.ll deleted file mode 100644 index f2ce1b9..0000000 --- a/test/Transforms/ABCD/basic.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -abcd -S | FileCheck %s - -define void @test() { -; CHECK: @test -; CHECK-NOT: br i1 %tmp95 -; CHECK: ret void -entry: - br label %bb19 - -bb: - br label %bb1 - -bb1: - %tmp7 = icmp sgt i32 %tmp94, 1 - br i1 %tmp7, label %bb.i.i, label %return - -bb.i.i: - br label %return - -bb19: - %tmp94 = ashr i32 undef, 3 - %tmp95 = icmp sgt i32 %tmp94, 16 - br i1 %tmp95, label %bb, label %return - -return: - ret void -} diff --git a/test/Transforms/ConstProp/constant-expr.ll b/test/Transforms/ConstProp/constant-expr.ll index 9963032..556ed1f 100644 --- a/test/Transforms/ConstProp/constant-expr.ll +++ b/test/Transforms/ConstProp/constant-expr.ll @@ -16,9 +16,9 @@ @E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) ; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y) @F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @F = global i1 false ; <i1*> [#uses=0] +; CHECK: @F = global i1 false @G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)) -; CHECK: @G = global i1 false ; <i1*> [#uses=0] +; CHECK: @G = global i1 false @H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*)) ; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y) diff --git a/test/Transforms/ConstantMerge/dont-merge.ll b/test/Transforms/ConstantMerge/dont-merge.ll index 877cf8d..e5337df 100644 --- a/test/Transforms/ConstantMerge/dont-merge.ll +++ b/test/Transforms/ConstantMerge/dont-merge.ll @@ -28,3 +28,17 @@ define void @test2(i32** %P1, i32 addrspace(30)** %P2) { store i32 addrspace(30)* @T2b, i32 addrspace(30)** %P2 ret void } + +; PR8144 - Don't merge globals marked attribute(used) +; CHECK: @T3A = +; CHECK: @T3B = + +@T3A = internal constant i32 0 +@T3B = internal constant i32 0 +@llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section +"llvm.metadata" + +define void @test3() { + call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B + ret void +} diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll new file mode 100644 index 0000000..fef5b85 --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll @@ -0,0 +1,25 @@ +; RUN: opt -S < %s -correlated-propagation | FileCheck %s + +; CHECK: @test +define i16 @test(i32 %a, i1 %b) { +entry: + %c = icmp eq i32 %a, 0 + br i1 %c, label %left, label %right + +right: + %d = trunc i32 %a to i1 + br label %merge + +left: + br i1 %b, label %merge, label %other + +other: + ret i16 23 + +merge: + %f = phi i1 [%b, %left], [%d, %right] +; CHECK: select i1 %f, i16 1, i16 0 + %h = select i1 %f, i16 1, i16 0 +; CHECK: ret i16 %h + ret i16 %h +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll new file mode 100644 index 0000000..24666e9 --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/basic.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s +; PR2581 + +; CHECK: @test1 +define i32 @test1(i1 %C) nounwind { + br i1 %C, label %exit, label %body + +body: ; preds = %0 +; CHECK-NOT: select + %A = select i1 %C, i32 10, i32 11 ; <i32> [#uses=1] +; CHECK: ret i32 11 + ret i32 %A + +exit: ; preds = %0 +; CHECK: ret i32 10 + ret i32 10 +} + +; PR4420 +declare i1 @ext() +; CHECK: @test2 +define i1 @test2() { +entry: + %cond = tail call i1 @ext() ; <i1> [#uses=2] + br i1 %cond, label %bb1, label %bb2 + +bb1: ; preds = %entry + %cond2 = tail call i1 @ext() ; <i1> [#uses=1] + br i1 %cond2, label %bb3, label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK-NOT: phi i1 + %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ] ; <i1> [#uses=1] +; CHECK: ret i1 false + ret i1 %cond_merge + +bb3: ; preds = %bb1 + %res = tail call i1 @ext() ; <i1> [#uses=1] +; CHECK: ret i1 %res + ret i1 %res +} + +; PR4855 +@gv = internal constant i8 7 +; CHECK: @test3 +define i8 @test3(i8* %a) nounwind { +entry: + %cond = icmp eq i8* %a, @gv + br i1 %cond, label %bb2, label %bb + +bb: ; preds = %entry + ret i8 0 + +bb2: ; preds = %entry +; CHECK-NOT: load i8* %a + %should_be_const = load i8* %a +; CHECK: ret i8 7 + ret i8 %should_be_const +} + +; PR1757 +; CHECK: @test4 +define i32 @test4(i32) { +EntryBlock: +; CHECK: icmp sgt i32 %0, 2 + %.demorgan = icmp sgt i32 %0, 2 + br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo + +GreaterThanTwo: +; CHECK-NOT: icmp eq i32 %0, 2 + icmp eq i32 %0, 2 +; CHECK: br i1 false + br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo + +NotTwoAndGreaterThanTwo: + ret i32 2 + +Impossible: + ret i32 1 + +LessThanOrEqualToTwo: + ret i32 0 +}
\ No newline at end of file diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp new file mode 100644 index 0000000..de42dad --- /dev/null +++ b/test/Transforms/CorrelatedValuePropagation/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] diff --git a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll index 641e920..f079108 100644 --- a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll +++ b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -gvn | llvm-dis ; PR4256 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" %llvm.dbg.anchor.type = type { i32, i32 } %struct.cset = type { i8*, i8, i8, i32, i8* } %struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* } diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll index 5e64f80..390e77a 100644 --- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll +++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -globalopt -S > %t ; Check that the new global values still have their address space -; RUN: cat %t | grep global.*addrspace +; RUN: cat %t | grep addrspace.*global @struct = internal addrspace(1) global { i32, i32 } zeroinitializer @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll index 701472c..bb1fc84 100644 --- a/test/Transforms/GlobalOpt/crash.ll +++ b/test/Transforms/GlobalOpt/crash.ll @@ -40,3 +40,18 @@ xx: } declare noalias i8* @malloc(i64) nounwind + + +; PR8063 +@permute_bitrev.bitrev = internal global i32* null, align 8 +define void @permute_bitrev() nounwind { +entry: + %tmp = load i32** @permute_bitrev.bitrev, align 8 + %conv = sext i32 0 to i64 + %mul = mul i64 %conv, 4 + %call = call i8* @malloc(i64 %mul) + %0 = bitcast i8* %call to i32* + store i32* %0, i32** @permute_bitrev.bitrev, align 8 + ret void +} + diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll index f4bab35..bd174a8 100644 --- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll @@ -21,10 +21,10 @@ define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { entry: %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1] %1 = load i32* %0 -; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) ; <i32> [#uses=1] +; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) %2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1] %3 = load i8* %2 -; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) ; <i8> [#uses=1] +; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) %4 = zext i8 %3 to i32 %5 = add i32 %4, %1 ret i32 %5 diff --git a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll deleted file mode 100644 index c8f97e3..0000000 --- a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll +++ /dev/null @@ -1,25 +0,0 @@ -; The induction variable canonicalization pass shouldn't leave dead -; instructions laying around! -; -; RUN: opt < %s -indvars -S | \ -; RUN: not grep {#uses=0} - -define i32 @mul(i32 %x, i32 %y) { -entry: - br label %tailrecurse - -tailrecurse: ; preds = %endif, %entry - %accumulator.tr = phi i32 [ %x, %entry ], [ %tmp.9, %endif ] ; <i32> [#uses=2] - %y.tr = phi i32 [ %y, %entry ], [ %tmp.8, %endif ] ; <i32> [#uses=2] - %tmp.1 = icmp eq i32 %y.tr, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif - -endif: ; preds = %tailrecurse - %tmp.8 = add i32 %y.tr, -1 ; <i32> [#uses=1] - %tmp.9 = add i32 %accumulator.tr, %x ; <i32> [#uses=1] - br label %tailrecurse - -return: ; preds = %tailrecurse - ret i32 %accumulator.tr -} - diff --git a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll index d73eee8..d211e3b 100644 --- a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll +++ b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -indvars ; PR4258 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-linux-gnu" +target triple = "i386-pc-linux-gnu" define void @0(i32*, i32*, i32, i32) nounwind { br i1 false, label %bb.nph1.preheader, label %.outer._crit_edge diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll index ab43833..516fd80 100644 --- a/test/Transforms/IndVarSimplify/crash.ll +++ b/test/Transforms/IndVarSimplify/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -indvars %s -disable-output +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" declare i32 @putchar(i8) nounwind @@ -17,3 +18,38 @@ define void @t2(i1* %P) nounwind { ; <label>:6 ; preds = %1 ret void } + +; PR7562 +define void @fannkuch() nounwind { +entry: ; preds = %entry + br label %bb12 + +bb12: ; preds = %bb29, %entry + %i.1 = phi i32 [ undef, %entry ], [ %i.0, %bb29 ] ; <i32> [#uses=2] + %r.1 = phi i32 [ undef, %entry ], [ %r.0, %bb29 ] ; <i32> [#uses=2] + br i1 undef, label %bb13, label %bb24 + +bb13: ; preds = %bb12 + br label %bb24 + +bb24: ; preds = %bb30, %bb13, %bb12 + %i.2 = phi i32 [ %i.1, %bb13 ], [ %i.0, %bb30 ], [ %i.1, %bb12 ] ; <i32> [#uses=1] + %r.0 = phi i32 [ %r.1, %bb13 ], [ %2, %bb30 ], [ %r.1, %bb12 ] ; <i32> [#uses=3] + br label %bb28 + +bb27: ; preds = %bb28 + %0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb28 + +bb28: ; preds = %bb27, %bb26 + %i.0 = phi i32 [ %i.2, %bb24 ], [ %0, %bb27 ] ; <i32> [#uses=4] + %1 = icmp slt i32 %i.0, %r.0 ; <i1> [#uses=1] + br i1 %1, label %bb27, label %bb29 + +bb29: ; preds = %bb28 + br i1 undef, label %bb12, label %bb30 + +bb30: ; preds = %bb29 + %2 = add nsw i32 %r.0, 1 ; <i32> [#uses=1] + br label %bb24 +} diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll index 4ec4aca..269478a 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll @@ -5,7 +5,7 @@ ; exit is taken. Indvars should correctly compute the exit values. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-linux-gnu" +target triple = "x86_64-pc-linux-gnu" %struct..0anon = type <{ i8, [3 x i8] }> define i32 @main() nounwind { diff --git a/test/Transforms/IndVarSimplify/uglygep.ll b/test/Transforms/IndVarSimplify/uglygep.ll new file mode 100644 index 0000000..0014b68 --- /dev/null +++ b/test/Transforms/IndVarSimplify/uglygep.ll @@ -0,0 +1,40 @@ +; RUN: opt -indvars -S < %s | not grep uglygep +; rdar://8197217 + +; Indvars should be able to emit a clean GEP here, not an uglygep. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11.0" + +@numf2s = external global i32 ; <i32*> [#uses=1] +@numf1s = external global i32 ; <i32*> [#uses=1] +@tds = external global double** ; <double***> [#uses=1] + +define void @init_td(i32 %tmp7) nounwind { +entry: + br label %bb4 + +bb4: ; preds = %bb3, %entry + %i.0 = phi i32 [ 0, %entry ], [ %tmp9, %bb3 ] ; <i32> [#uses=3] + br label %bb + +bb: ; preds = %bb4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %j.0 = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ] ; <i32> [#uses=3] + %tmp8 = icmp slt i32 %j.0, %tmp7 ; <i1> [#uses=1] + br i1 %tmp8, label %bb1, label %bb3 + +bb1: ; preds = %bb2 + %tmp = load double*** @tds, align 8 ; <double**> [#uses=1] + %tmp1 = sext i32 %i.0 to i64 ; <i64> [#uses=1] + %tmp2 = getelementptr inbounds double** %tmp, i64 %tmp1 ; <double**> [#uses=1] + %tmp3 = load double** %tmp2, align 1 ; <double*> [#uses=1] + %tmp6 = add nsw i32 %j.0, 1 ; <i32> [#uses=1] + br label %bb2 + +bb3: ; preds = %bb2 + %tmp9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb4 +} diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll index d8ad5a9..27916b9 100644 --- a/test/Transforms/InstCombine/align-addr.ll +++ b/test/Transforms/InstCombine/align-addr.ll @@ -1,10 +1,13 @@ -; RUN: opt < %s -instcombine -S | grep {align 16} | count 1 +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Instcombine should be able to prove vector alignment in the ; presence of a few mild address computation tricks. -define void @foo(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { +; CHECK: @test0( +; CHECK: align 16 + +define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind { entry: %c = ptrtoint i8* %b to i64 %d = and i64 %c, -16 @@ -29,3 +32,29 @@ return: ret void } +; When we see a unaligned load from an insufficiently aligned global or +; alloca, increase the alignment of the load, turning it into an aligned load. + +; CHECK: @test1( +; CHECK: tmp = load +; CHECK: GLOBAL{{.*}}align 16 + +@GLOBAL = internal global [4 x i32] zeroinitializer + +define <16 x i8> @test1(<2 x i64> %x) { +entry: + %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 + ret <16 x i8> %tmp +} + +; When a load or store lacks an explicit alignment, add one. + +; CHECK: @test2( +; CHECK: load double* %p, align 8 +; CHECK: store double %n, double* %p, align 8 + +define double @test2(double* %p, double %n) nounwind { + %t = load double* %p + store double %n, double* %p + ret double %t +} diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll deleted file mode 100644 index 71512b3..0000000 --- a/test/Transforms/InstCombine/align-inc.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16} -; RUN: opt < %s -instcombine -S | grep {tmp = load} -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -@GLOBAL = internal global [4 x i32] zeroinitializer - -define <16 x i8> @foo(<2 x i64> %x) { -entry: - %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 - ret <16 x i8> %tmp -} - diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll index f97fb45..d774c09 100644 --- a/test/Transforms/InstCombine/bit-checks.ll +++ b/test/Transforms/InstCombine/bit-checks.ll @@ -13,3 +13,14 @@ entry: %retval.0 = select i1 %or.cond, i32 2, i32 1 ; <i32> [#uses=1] ret i32 %retval.0 } + +define i32 @main2(i32 %argc, i8** nocapture %argv) nounwind readnone ssp { +entry: + %and = and i32 %argc, 1 ; <i32> [#uses=1] + %tobool = icmp eq i32 %and, 0 ; <i1> [#uses=1] + %and2 = and i32 %argc, 2 ; <i32> [#uses=1] + %tobool3 = icmp eq i32 %and2, 0 ; <i1> [#uses=1] + %or.cond = or i1 %tobool, %tobool3 ; <i1> [#uses=1] + %storemerge = select i1 %or.cond, i32 0, i32 1 ; <i32> [#uses=1] + ret i32 %storemerge +}
\ No newline at end of file diff --git a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll deleted file mode 100644 index 4e9dfbb..0000000 --- a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 0} -; PR4487 - -; Bitcasts between vectors and scalars are valid, despite being ill-advised. - -define i32 @test(i64 %a) { -bb20: - %t1 = bitcast i64 %a to <2 x i32> - %t2 = bitcast i64 %a to <2 x i32> - %t3 = xor <2 x i32> %t1, %t2 - %t4 = extractelement <2 x i32> %t3, i32 0 - ret i32 %t4 -} - diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll new file mode 100644 index 0000000..0718b8a --- /dev/null +++ b/test/Transforms/InstCombine/bitcast.ll @@ -0,0 +1,105 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Bitcasts between vectors and scalars are valid. +; PR4487 +define i32 @test1(i64 %a) { + %t1 = bitcast i64 %a to <2 x i32> + %t2 = bitcast i64 %a to <2 x i32> + %t3 = xor <2 x i32> %t1, %t2 + %t4 = extractelement <2 x i32> %t3, i32 0 + ret i32 %t4 + +; CHECK: @test1 +; CHECK: ret i32 0 +} + +; Optimize bitcasts that are extracting low element of vector. This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + +; Optimize bitcasts that are extracting other elements of a vector. This +; happens because of SRoA. +; rdar://7892780 +define float @test3(<2 x float> %A, <2 x i64> %B) { + %tmp28 = bitcast <2 x float> %A to i64 + %tmp29 = lshr i64 %tmp28, 32 + %tmp23 = trunc i64 %tmp29 to i32 + %tmp24 = bitcast i32 %tmp23 to float + + %tmp = bitcast <2 x i64> %B to i128 + %tmp1 = lshr i128 %tmp, 64 + %tmp2 = trunc i128 %tmp1 to i32 + %tmp4 = bitcast i32 %tmp2 to float + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test3 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1 +; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float> +; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + + +define <2 x i32> @test4(i32 %A, i32 %B){ + %tmp38 = zext i32 %A to i64 + %tmp32 = zext i32 %B to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x i32> + ret <2 x i32> %tmp43 + ; CHECK: @test4 + ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0 + ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1 + ; CHECK-NEXT: ret <2 x i32> + +} + +; rdar://8360454 +define <2 x float> @test5(float %A, float %B) { + %tmp37 = bitcast float %A to i32 + %tmp38 = zext i32 %tmp37 to i64 + %tmp31 = bitcast float %B to i32 + %tmp32 = zext i32 %tmp31 to i64 + %tmp33 = shl i64 %tmp32, 32 + %ins35 = or i64 %tmp33, %tmp38 + %tmp43 = bitcast i64 %ins35 to <2 x float> + ret <2 x float> %tmp43 + ; CHECK: @test5 + ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0 + ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1 + ; CHECK-NEXT: ret <2 x float> +} + +define <2 x float> @test6(float %A){ + %tmp23 = bitcast float %A to i32 ; <i32> [#uses=1] + %tmp24 = zext i32 %tmp23 to i64 ; <i64> [#uses=1] + %tmp25 = shl i64 %tmp24, 32 ; <i64> [#uses=1] + %mask20 = or i64 %tmp25, 1109917696 ; <i64> [#uses=1] + %tmp35 = bitcast i64 %mask20 to <2 x float> ; <<2 x float>> [#uses=1] + ret <2 x float> %tmp35 +; CHECK: @test6 +; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1 +; CHECK: ret +} diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 08dcfa7..d672d8c 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -10,16 +10,16 @@ declare i32 @llvm.ctlz.i32(i32) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i8 @llvm.ctlz.i8(i8) nounwind readnone -define i8 @test1(i8 %A, i8 %B) { +define i8 @uaddtest1(i8 %A, i8 %B) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B) %y = extractvalue %overflow.result %x, 0 ret i8 %y -; CHECK: @test1 +; CHECK: @uaddtest1 ; CHECK-NEXT: %y = add i8 %A, %B ; CHECK-NEXT: ret i8 %y } -define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) { %and.A = and i8 %A, 127 %and.B = and i8 %B, 127 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B) @@ -27,7 +27,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test2 +; CHECK: @uaddtest2 ; CHECK-NEXT: %and.A = and i8 %A, 127 ; CHECK-NEXT: %and.B = and i8 %B, 127 ; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B @@ -35,7 +35,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { +define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) { %or.A = or i8 %A, -128 %or.B = or i8 %B, -128 %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B) @@ -43,7 +43,7 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test3 +; CHECK: @uaddtest3 ; CHECK-NEXT: %or.A = or i8 %A, -128 ; CHECK-NEXT: %or.B = or i8 %B, -128 ; CHECK-NEXT: %1 = add i8 %or.A, %or.B @@ -51,34 +51,44 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %1 } -define i8 @test4(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest4(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 undef, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test4 +; CHECK: @uaddtest4 ; CHECK-NEXT: ret i8 undef } -define i8 @test5(i8 %A, i1* %overflowPtr) { +define i8 @uaddtest5(i8 %A, i1* %overflowPtr) { + %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 0, i8 %A) + %y = extractvalue %overflow.result %x, 0 + %z = extractvalue %overflow.result %x, 1 + store i1 %z, i1* %overflowPtr + ret i8 %y +; CHECK: @uaddtest5 +; CHECK: ret i8 %A +} + +define i8 @umultest1(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test5 +; CHECK: @umultest1 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 0 } -define i8 @test6(i8 %A, i1* %overflowPtr) { +define i8 @umultest2(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 1, i8 %A) %y = extractvalue %overflow.result %x, 0 %z = extractvalue %overflow.result %x, 1 store i1 %z, i1* %overflowPtr ret i8 %y -; CHECK: @test6 +; CHECK: @umultest2 ; CHECK-NEXT: store i1 false, i1* %overflowPtr ; CHECK-NEXT: ret i8 %A } diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll index fc321e9..c6c3f2f 100644 --- a/test/Transforms/InstCombine/phi.ll +++ b/test/Transforms/InstCombine/phi.ll @@ -402,3 +402,24 @@ if.else: ; preds = %entry store i32 %tmp5, i32* %res br label %if.end } + +; PR4413 +declare i32 @ext() +; CHECK: @test17 +define i32 @test17(i1 %a) { +entry: + br i1 %a, label %bb1, label %bb2 + +bb1: ; preds = %entry + %0 = tail call i32 @ext() ; <i32> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %entry + %cond = phi i1 [ true, %bb1 ], [ false, %entry ] ; <i1> [#uses=1] +; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] + %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] ; <i32> [#uses=1] + %res = select i1 %cond, i32 %val, i32 0 ; <i32> [#uses=1] +; CHECK: ret i32 %cond + ret i32 %res +} + diff --git a/test/Transforms/InstCombine/shift-simplify.ll b/test/Transforms/InstCombine/shift-simplify.ll deleted file mode 100644 index e5cc705..0000000 --- a/test/Transforms/InstCombine/shift-simplify.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt < %s -instcombine -S | \ -; RUN: egrep {shl|lshr|ashr} | count 3 - -define i32 @test0(i32 %A, i32 %B, i32 %C) { - %X = shl i32 %A, %C - %Y = shl i32 %B, %C - %Z = and i32 %X, %Y - ret i32 %Z -} - -define i32 @test1(i32 %A, i32 %B, i32 %C) { - %X = lshr i32 %A, %C - %Y = lshr i32 %B, %C - %Z = or i32 %X, %Y - ret i32 %Z -} - -define i32 @test2(i32 %A, i32 %B, i32 %C) { - %X = ashr i32 %A, %C - %Y = ashr i32 %B, %C - %Z = xor i32 %X, %Y - ret i32 %Z -} - -define i1 @test3(i32 %X) { - %tmp1 = shl i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test4(i32 %X) { - %tmp1 = lshr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - -define i1 @test5(i32 %X) { - %tmp1 = ashr i32 %X, 7 - %tmp2 = icmp slt i32 %tmp1, 0 - ret i1 %tmp2 -} - diff --git a/test/Transforms/InstCombine/shift-trunc-shift.ll b/test/Transforms/InstCombine/shift-trunc-shift.ll deleted file mode 100644 index 7133d29..0000000 --- a/test/Transforms/InstCombine/shift-trunc-shift.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: opt < %s -instcombine -S | grep lshr.*63 - -define i32 @t1(i64 %d18) { -entry: - %tmp916 = lshr i64 %d18, 32 ; <i64> [#uses=1] - %tmp917 = trunc i64 %tmp916 to i32 ; <i32> [#uses=1] - %tmp10 = lshr i32 %tmp917, 31 ; <i32> [#uses=1] - ret i32 %tmp10 -} - diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index feed37b..871e9fe 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -130,8 +130,8 @@ define i8 @test13(i8 %A) { ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14(i32 %A) { ; CHECK: @test14 -; CHECK-NEXT: or i32 %A, 19744 -; CHECK-NEXT: and i32 +; CHECK-NEXT: %B = and i32 %A, -19760 +; CHECK-NEXT: or i32 %B, 19744 ; CHECK-NEXT: ret i32 %B = lshr i32 %A, 4 ; <i32> [#uses=1] %C = or i32 %B, 1234 ; <i32> [#uses=1] @@ -343,3 +343,101 @@ bb2: } +define i32 @test29(i64 %d18) { +entry: + %tmp916 = lshr i64 %d18, 32 + %tmp917 = trunc i64 %tmp916 to i32 + %tmp10 = lshr i32 %tmp917, 31 + ret i32 %tmp10 +; CHECK: @test29 +; CHECK: %tmp916 = lshr i64 %d18, 63 +; CHECK: %tmp10 = trunc i64 %tmp916 to i32 +} + + +define i32 @test30(i32 %A, i32 %B, i32 %C) { + %X = shl i32 %A, %C + %Y = shl i32 %B, %C + %Z = and i32 %X, %Y + ret i32 %Z +; CHECK: @test30 +; CHECK: %X1 = and i32 %A, %B +; CHECK: %Z = shl i32 %X1, %C +} + +define i32 @test31(i32 %A, i32 %B, i32 %C) { + %X = lshr i32 %A, %C + %Y = lshr i32 %B, %C + %Z = or i32 %X, %Y + ret i32 %Z +; CHECK: @test31 +; CHECK: %X1 = or i32 %A, %B +; CHECK: %Z = lshr i32 %X1, %C +} + +define i32 @test32(i32 %A, i32 %B, i32 %C) { + %X = ashr i32 %A, %C + %Y = ashr i32 %B, %C + %Z = xor i32 %X, %Y + ret i32 %Z +; CHECK: @test32 +; CHECK: %X1 = xor i32 %A, %B +; CHECK: %Z = ashr i32 %X1, %C +; CHECK: ret i32 %Z +} + +define i1 @test33(i32 %X) { + %tmp1 = shl i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test33 +; CHECK: %tmp1.mask = and i32 %X, 16777216 +; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0 +} + +define i1 @test34(i32 %X) { + %tmp1 = lshr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test34 +; CHECK: ret i1 false +} + +define i1 @test35(i32 %X) { + %tmp1 = ashr i32 %X, 7 + %tmp2 = icmp slt i32 %tmp1, 0 + ret i1 %tmp2 +; CHECK: @test35 +; CHECK: %tmp2 = icmp slt i32 %X, 0 +; CHECK: ret i1 %tmp2 +} + +define i128 @test36(i128 %A, i128 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp23 = shl i128 %B, 64 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + ret i128 %tmp45 + +; CHECK: @test36 +; CHECK: %tmp231 = or i128 %B, %A +; CHECK: %ins = and i128 %tmp231, 18446744073709551615 +; CHECK: ret i128 %ins +} + +define i64 @test37(i128 %A, i32 %B) { +entry: + %tmp27 = shl i128 %A, 64 + %tmp22 = zext i32 %B to i128 + %tmp23 = shl i128 %tmp22, 96 + %ins = or i128 %tmp23, %tmp27 + %tmp45 = lshr i128 %ins, 64 + %tmp46 = trunc i128 %tmp45 to i64 + ret i64 %tmp46 + +; CHECK: @test37 +; CHECK: %tmp23 = shl i128 %tmp22, 32 +; CHECK: %ins = or i128 %tmp23, %A +; CHECK: %tmp46 = trunc i128 %ins to i64 +} diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll new file mode 100644 index 0000000..69e511b --- /dev/null +++ b/test/Transforms/InstCombine/sqrt.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -instcombine %s | FileCheck %s + +define float @test1(float %x) nounwind readnone ssp { +entry: +; CHECK: @test1 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} + +declare double @sqrt(double) + +; PR8096 +define float @test2(float %x) nounwind readnone ssp { +entry: +; CHECK: @test2 +; CHECK-NOT: fpext +; CHECK-NOT: sqrt( +; CHECK: sqrtf( +; CHECK-NOT: fptrunc + %conv = fpext float %x to double ; <double> [#uses=1] + %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1] + %conv1 = fptrunc double %call to float ; <float> [#uses=1] +; CHECK: ret float + ret float %conv1 +} diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll deleted file mode 100644 index 93e3753..0000000 --- a/test/Transforms/InstCombine/trunc-mask-ext.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt < %s -instcombine -S > %t -; RUN: not grep zext %t -; RUN: not grep sext %t - -; Instcombine should be able to eliminate all of these ext casts. - -declare void @use(i32) - -define i64 @foo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 15 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @bar(i64 %a) { - %b = trunc i64 %a to i32 - %c = shl i32 %b, 4 - %q = ashr i32 %c, 4 - %d = sext i32 %q to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @goo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %d = zext i32 %c to i64 - call void @use(i32 %b) - ret i64 %d -} -define i64 @hoo(i64 %a) { - %b = trunc i64 %a to i32 - %c = and i32 %b, 8 - %x = xor i32 %c, 8 - %d = zext i32 %x to i64 - call void @use(i32 %b) - ret i64 %d -} diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll new file mode 100644 index 0000000..f98bfd9 --- /dev/null +++ b/test/Transforms/InstCombine/trunc.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Instcombine should be able to eliminate all of these ext casts. + +declare void @use(i32) + +define i64 @test1(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 15 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test1 +; CHECK: %d = and i64 %a, 15 +; CHECK: ret i64 %d +} +define i64 @test2(i64 %a) { + %b = trunc i64 %a to i32 + %c = shl i32 %b, 4 + %q = ashr i32 %c, 4 + %d = sext i32 %q to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test2 +; CHECK: shl i64 %a, 36 +; CHECK: %d = ashr i64 {{.*}}, 36 +; CHECK: ret i64 %d +} +define i64 @test3(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %d = zext i32 %c to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test3 +; CHECK: %d = and i64 %a, 8 +; CHECK: ret i64 %d +} +define i64 @test4(i64 %a) { + %b = trunc i64 %a to i32 + %c = and i32 %b, 8 + %x = xor i32 %c, 8 + %d = zext i32 %x to i64 + call void @use(i32 %b) + ret i64 %d +; CHECK: @test4 +; CHECK: = and i64 %a, 8 +; CHECK: %d = xor i64 {{.*}}, 8 +; CHECK: ret i64 %d +} + +define i32 @test5(i32 %A) { + %B = zext i32 %A to i128 + %C = lshr i128 %B, 16 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test5 +; CHECK: %C = lshr i32 %A, 16 +; CHECK: ret i32 %C +} + +define i32 @test6(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i32 + ret i32 %D +; CHECK: @test6 +; CHECK: %C = lshr i64 %A, 32 +; CHECK: %D = trunc i64 %C to i32 +; CHECK: ret i32 %D +} + +define i92 @test7(i64 %A) { + %B = zext i64 %A to i128 + %C = lshr i128 %B, 32 + %D = trunc i128 %C to i92 + ret i92 %D +; CHECK: @test7 +; CHECK: %B = zext i64 %A to i92 +; CHECK: %C = lshr i92 %B, 32 +; CHECK: ret i92 %C +} + +define i64 @test8(i32 %A, i32 %B) { + %tmp38 = zext i32 %A to i128 + %tmp32 = zext i32 %B to i128 + %tmp33 = shl i128 %tmp32, 32 + %ins35 = or i128 %tmp33, %tmp38 + %tmp42 = trunc i128 %ins35 to i64 + ret i64 %tmp42 +; CHECK: @test8 +; CHECK: %tmp38 = zext i32 %A to i64 +; CHECK: %tmp32 = zext i32 %B to i64 +; CHECK: %tmp33 = shl i64 %tmp32, 32 +; CHECK: %ins35 = or i64 %tmp33, %tmp38 +; CHECK: ret i64 %ins35 +} + diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll index 7c2b4b0..229f1a8 100644 --- a/test/Transforms/InstCombine/urem-simplify-bug.ll +++ b/test/Transforms/InstCombine/urem-simplify-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5 } +; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5} @.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1] @.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1] diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll new file mode 100644 index 0000000..17a0aba --- /dev/null +++ b/test/Transforms/JumpThreading/2010-08-26-and.ll @@ -0,0 +1,162 @@ +; RUN: opt -jump-threading -enable-jump-threading-lvi -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%class.StringSwitch = type { i8*, i32, i32, i8 } + +@.str = private constant [4 x i8] c"red\00" ; <[4 x i8]*> [#uses=1] +@.str1 = private constant [7 x i8] c"orange\00" ; <[7 x i8]*> [#uses=1] +@.str2 = private constant [7 x i8] c"yellow\00" ; <[7 x i8]*> [#uses=1] +@.str3 = private constant [6 x i8] c"green\00" ; <[6 x i8]*> [#uses=1] +@.str4 = private constant [5 x i8] c"blue\00" ; <[5 x i8]*> [#uses=1] +@.str5 = private constant [7 x i8] c"indigo\00" ; <[7 x i8]*> [#uses=1] +@.str6 = private constant [7 x i8] c"violet\00" ; <[7 x i8]*> [#uses=1] +@.str7 = private constant [12 x i8] c"Color = %d\0A\00" ; <[12 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp142 = icmp sgt i32 %argc, 1 ; <i1> [#uses=1] + br i1 %cmp142, label %bb.nph, label %for.end + +bb.nph: ; preds = %entry + %tmp = add i32 %argc, -2 ; <i32> [#uses=1] + %tmp144 = zext i32 %tmp to i64 ; <i64> [#uses=1] + %tmp145 = add i64 %tmp144, 1 ; <i64> [#uses=1] + br label %land.lhs.true.i + +land.lhs.true.i: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %bb.nph + %retval.0.i.pre161 = phi i32 [ undef, %bb.nph ], [ %retval.0.i.pre, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i32> [#uses=3] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1] + %tmp146 = add i64 %indvar, 1 ; <i64> [#uses=3] + %arrayidx = getelementptr i8** %argv, i64 %tmp146 ; <i8**> [#uses=1] + %tmp6 = load i8** %arrayidx, align 8 ; <i8*> [#uses=8] + %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1] + %conv.i.i = trunc i64 %call.i.i to i32 ; <i32> [#uses=6]\ +; CHECK: switch i32 %conv.i.i +; CHECK-NOT: if.then.i40 +; CHECK: } + switch i32 %conv.i.i, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit [ + i32 3, label %land.lhs.true5.i + i32 6, label %land.lhs.true5.i37 + ] + +land.lhs.true5.i: ; preds = %land.lhs.true.i + %call.i = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* %tmp6, i64 4) nounwind ; <i32> [#uses=1] + %cmp9.i = icmp eq i32 %call.i, 0 ; <i1> [#uses=1] + br i1 %cmp9.i, label %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit: ; preds = %land.lhs.true5.i + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i37: ; preds = %land.lhs.true.i + %call.i35 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str1, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i36 = icmp eq i32 %call.i35, 0 ; <i1> [#uses=1] + br i1 %cmp9.i36, label %if.then.i40, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +if.then.i40: ; preds = %land.lhs.true5.i37 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i40, %land.lhs.true5.i37, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, %land.lhs.true5.i, %land.lhs.true.i + %retval.0.i.pre159 = phi i32 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre161, %land.lhs.true5.i37 ], [ 2, %if.then.i40 ], [ %retval.0.i.pre161, %land.lhs.true5.i ], [ %retval.0.i.pre161, %land.lhs.true.i ] ; <i32> [#uses=2] + %tmp2.i44 = phi i8 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ 0, %land.lhs.true5.i37 ], [ 1, %if.then.i40 ], [ 0, %land.lhs.true5.i ], [ 0, %land.lhs.true.i ] ; <i8> [#uses=3] + %tobool.i46 = icmp eq i8 %tmp2.i44, 0 ; <i1> [#uses=1] + %cmp.i49 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond = and i1 %tobool.i46, %cmp.i49 ; <i1> [#uses=1] + br i1 %or.cond, label %land.lhs.true5.i55, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +land.lhs.true5.i55: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %call.i53 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i54 = icmp eq i32 %call.i53, 0 ; <i1> [#uses=1] + br i1 %cmp9.i54, label %if.then.i58, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +if.then.i58: ; preds = %land.lhs.true5.i55 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60: ; preds = %if.then.i58, %land.lhs.true5.i55, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre158 = phi i32 [ %retval.0.i.pre159, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre159, %land.lhs.true5.i55 ], [ 3, %if.then.i58 ] ; <i32> [#uses=2] + %tmp2.i63 = phi i8 [ %tmp2.i44, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i44, %land.lhs.true5.i55 ], [ 1, %if.then.i58 ] ; <i8> [#uses=3] + %tmp14.i64 = and i8 %tmp2.i63, 1 ; <i8> [#uses=1] + %tobool.i65 = icmp eq i8 %tmp14.i64, 0 ; <i1> [#uses=1] + %cmp.i68 = icmp eq i32 %conv.i.i, 5 ; <i1> [#uses=1] + %or.cond168 = and i1 %tobool.i65, %cmp.i68 ; <i1> [#uses=1] + br i1 %or.cond168, label %land.lhs.true5.i74, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i74: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %call.i72 = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i8* %tmp6, i64 6) nounwind ; <i32> [#uses=1] + %cmp9.i73 = icmp eq i32 %call.i72, 0 ; <i1> [#uses=1] + br i1 %cmp9.i73, label %if.then.i77, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +if.then.i77: ; preds = %land.lhs.true5.i74 + br label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i77, %land.lhs.true5.i74, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 + %retval.0.i.pre157 = phi i32 [ %retval.0.i.pre158, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %retval.0.i.pre158, %land.lhs.true5.i74 ], [ 4, %if.then.i77 ] ; <i32> [#uses=2] + %tmp2.i81 = phi i8 [ %tmp2.i63, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %tmp2.i63, %land.lhs.true5.i74 ], [ 1, %if.then.i77 ] ; <i8> [#uses=3] + %tmp14.i82 = and i8 %tmp2.i81, 1 ; <i8> [#uses=1] + %tobool.i83 = icmp eq i8 %tmp14.i82, 0 ; <i1> [#uses=1] + %cmp.i86 = icmp eq i32 %conv.i.i, 4 ; <i1> [#uses=1] + %or.cond169 = and i1 %tobool.i83, %cmp.i86 ; <i1> [#uses=1] + br i1 %or.cond169, label %land.lhs.true5.i92, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +land.lhs.true5.i92: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %call.i90 = call i32 @memcmp(i8* getelementptr inbounds ([5 x i8]* @.str4, i64 0, i64 0), i8* %tmp6, i64 5) nounwind ; <i32> [#uses=1] + %cmp9.i91 = icmp eq i32 %call.i90, 0 ; <i1> [#uses=1] + br i1 %cmp9.i91, label %if.then.i95, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +if.then.i95: ; preds = %land.lhs.true5.i92 + br label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + +_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i95, %land.lhs.true5.i92, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre156 = phi i32 [ %retval.0.i.pre157, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre157, %land.lhs.true5.i92 ], [ 5, %if.then.i95 ] ; <i32> [#uses=2] + %tmp2.i99 = phi i8 [ %tmp2.i81, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i81, %land.lhs.true5.i92 ], [ 1, %if.then.i95 ] ; <i8> [#uses=3] + %tmp14.i100 = and i8 %tmp2.i99, 1 ; <i8> [#uses=1] + %tobool.i101 = icmp eq i8 %tmp14.i100, 0 ; <i1> [#uses=1] + %cmp.i104 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond170 = and i1 %tobool.i101, %cmp.i104 ; <i1> [#uses=1] + br i1 %or.cond170, label %land.lhs.true5.i110, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +land.lhs.true5.i110: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %call.i108 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i109 = icmp eq i32 %call.i108, 0 ; <i1> [#uses=1] + br i1 %cmp9.i109, label %if.then.i113, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +if.then.i113: ; preds = %land.lhs.true5.i110 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115: ; preds = %if.then.i113, %land.lhs.true5.i110, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit + %retval.0.i.pre155 = phi i32 [ %retval.0.i.pre156, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre156, %land.lhs.true5.i110 ], [ 6, %if.then.i113 ] ; <i32> [#uses=2] + %tmp2.i118 = phi i8 [ %tmp2.i99, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i99, %land.lhs.true5.i110 ], [ 1, %if.then.i113 ] ; <i8> [#uses=3] + %tmp14.i119 = and i8 %tmp2.i118, 1 ; <i8> [#uses=1] + %tobool.i120 = icmp eq i8 %tmp14.i119, 0 ; <i1> [#uses=1] + %cmp.i123 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1] + %or.cond171 = and i1 %tobool.i120, %cmp.i123 ; <i1> [#uses=1] + br i1 %or.cond171, label %land.lhs.true5.i129, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +land.lhs.true5.i129: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %call.i127 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str6, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1] + %cmp9.i128 = icmp eq i32 %call.i127, 0 ; <i1> [#uses=1] + br i1 %cmp9.i128, label %if.then.i132, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +if.then.i132: ; preds = %land.lhs.true5.i129 + br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 + +_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.then.i132, %land.lhs.true5.i129, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 + %retval.0.i.pre = phi i32 [ %retval.0.i.pre155, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %retval.0.i.pre155, %land.lhs.true5.i129 ], [ 7, %if.then.i132 ] ; <i32> [#uses=2] + %tmp2.i137 = phi i8 [ %tmp2.i118, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %tmp2.i118, %land.lhs.true5.i129 ], [ 1, %if.then.i132 ] ; <i8> [#uses=1] + %tmp7.i138 = and i8 %tmp2.i137, 1 ; <i8> [#uses=1] + %tobool.i139 = icmp eq i8 %tmp7.i138, 0 ; <i1> [#uses=1] + %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1] + %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0] + %exitcond = icmp eq i64 %tmp146, %tmp145 ; <i1> [#uses=1] + br i1 %exitcond, label %for.end, label %land.lhs.true.i + +for.end: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %entry + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) nounwind readonly + +declare i64 @strlen(i8* nocapture) nounwind readonly diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll index 503d301..cd274e7 100644 --- a/test/Transforms/JumpThreading/basic.ll +++ b/test/Transforms/JumpThreading/basic.ll @@ -147,11 +147,17 @@ define i32 @test6(i32 %A) { ; CHECK: @test6 %tmp455 = icmp eq i32 %A, 42 br i1 %tmp455, label %BB1, label %BB2 - -BB2: + +; CHECK: call i32 @f2() +; CHECK-NEXT: ret i32 3 + ; CHECK: call i32 @f1() -; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 4 +; CHECK-NOT: br +; CHECK: call void @f3() +; CHECK-NOT: br +; CHECK: ret i32 4 + +BB2: call i32 @f1() br label %BB1 @@ -415,4 +421,58 @@ F2: ; CHECK-NEXT: br i1 %N, label %T2, label %F2 } +; CHECK: @test14 +define i32 @test14(i32 %in) { +entry: + %A = icmp eq i32 %in, 0 +; CHECK: br i1 %A, label %right_ret, label %merge + br i1 %A, label %left, label %right + +; CHECK-NOT: left: +left: + br label %merge + +; CHECK-NOT: right: +right: + %B = call i32 @f1() + br label %merge + +merge: +; CHECK-NOT: %C = phi i32 [%in, %left], [%B, %right] + %C = phi i32 [%in, %left], [%B, %right] + %D = add i32 %C, 1 + %E = icmp eq i32 %D, 2 + br i1 %E, label %left_ret, label %right_ret + +; CHECK: left_ret: +left_ret: + ret i32 0 + +right_ret: + ret i32 1 +} + +; PR5652 +; CHECK: @test15 +define i32 @test15(i32 %len) { +entry: +; CHECK: icmp ult i32 %len, 13 + %tmp = icmp ult i32 %len, 13 + br i1 %tmp, label %check, label %exit0 + +exit0: + ret i32 0 + +check: + %tmp9 = icmp ult i32 %len, 21 + br i1 %tmp9, label %exit1, label %exit2 + +exit2: +; CHECK-NOT: ret i32 2 + ret i32 2 + +exit1: + ret i32 1 +; CHECK: } +} diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll index f0fc61e..751bc65 100644 --- a/test/Transforms/JumpThreading/crash.ll +++ b/test/Transforms/JumpThreading/crash.ll @@ -216,6 +216,9 @@ bb61: ; PR5698 define void @test7(i32 %x) { +entry: + br label %tailrecurse + tailrecurse: switch i32 %x, label %return [ i32 2, label %bb2 @@ -433,4 +436,51 @@ for.cond1040: ; preds = %for.body1044, %for. ret void } +; PR7755 +define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp { +entry: + %cmp = icmp sgt i32 undef, 1 ; <i1> [#uses=1] + br i1 %c, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + br i1 %c2, label %lor.lhs.false.i, label %land.end + +lor.lhs.false.i: ; preds = %land.rhs + br i1 %c3, label %land.end, label %land.end + +land.end: + %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] ; <i1> [#uses=1] + %cmp12 = and i1 %cmp, %0 + %xor1 = xor i1 %cmp12, %c4 + br i1 %xor1, label %if.then, label %if.end + +if.then: + ret void + +if.end: + ret void +} + +define void @test17() { +entry: + br i1 undef, label %bb269.us.us, label %bb269.us.us.us + +bb269.us.us.us: + %indvar = phi i64 [ %indvar.next, %bb287.us.us.us ], [ 0, %entry ] + %0 = icmp eq i16 undef, 0 + br i1 %0, label %bb287.us.us.us, label %bb286.us.us.us + +bb287.us.us.us: + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 4 + br i1 %exitcond, label %bb288.bb289.loopexit_crit_edge, label %bb269.us.us.us +bb286.us.us.us: + unreachable + +bb269.us.us: + unreachable + +bb288.bb289.loopexit_crit_edge: + unreachable +} diff --git a/test/Transforms/JumpThreading/lvi-load.ll b/test/Transforms/JumpThreading/lvi-load.ll new file mode 100644 index 0000000..0bf4187 --- /dev/null +++ b/test/Transforms/JumpThreading/lvi-load.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -jump-threading -enable-jump-threading-lvi -dce < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.4" + +%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* } +%"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" = type { i64 } +%"struct.llvm::Type" = type opaque +%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" } +%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* } +%"struct.llvm::ValueName" = type opaque + +@_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__ = internal constant [5 x i8] c"cast\00", align 8 ; <[5 x i8]*> [#uses=1] +@.str = private constant [31 x i8] c"include/llvm/Support/Casting.h\00", align 8 ; <[31 x i8]*> [#uses=1] +@.str1 = private constant [59 x i8] c"isa<X>(Val) && \22cast<Ty>() argument of incompatible type!\22\00", align 8 ; <[59 x i8]*> [#uses=1] + +; CHECK: Z3fooPN4llvm5ValueE +define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp { +entry: + %0 = getelementptr inbounds %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1] + %1 = load i8* %0, align 8 ; <i8> [#uses=2] + %2 = icmp ugt i8 %1, 20 ; <i1> [#uses=1] + br i1 %2, label %bb.i, label %bb2 + +bb.i: ; preds = %entry + %toBoolnot.i.i = icmp ult i8 %1, 21 ; <i1> [#uses=1] + br i1 %toBoolnot.i.i, label %bb6.i.i, label %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + +; CHECK-NOT: assert +bb6.i.i: ; preds = %bb.i + tail call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8]* @_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str, i64 0, i64 0), i32 202, i8* getelementptr inbounds ([59 x i8]* @.str1, i64 0, i64 0)) noreturn + unreachable + +_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %bb.i +; CHECK-NOT: null + %3 = icmp eq %"struct.llvm::Value"* %V, null ; <i1> [#uses=1] + br i1 %3, label %bb2, label %bb + +bb: ; preds = %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + tail call void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"* %V) +; CHECK: ret + ret i8 1 + +bb2: ; preds = %entry, %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit + ret i8 0 +} + +declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn + +declare void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"*) diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll index 7545641..5381c88 100644 --- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll +++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry} -; RUN: opt < %s -lcssa -S | \ +; RUN: opt < %s -loopsimplify -lcssa -S | \ ; RUN: grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry} %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* } diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll new file mode 100644 index 0000000..88be5c4 --- /dev/null +++ b/test/Transforms/LICM/crash.ll @@ -0,0 +1,61 @@ +; RUN: opt -licm %s -disable-output + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + + +; PR8068 +@g_12 = external global i8, align 1 +define void @test1() nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.cond, %bb.nph + store i8 0, i8* @g_12, align 1 + %tmp6 = load i8* @g_12, align 1 + br label %for.cond + +for.cond: ; preds = %for.body + store i8 %tmp6, i8* @g_12, align 1 + br i1 false, label %for.cond.for.end10_crit_edge, label %for.body + +for.cond.for.end10_crit_edge: ; preds = %for.cond + br label %for.end10 + +for.end10: ; preds = %for.cond.for.end10_crit_edge, %entry + ret void +} + +; PR8067 +@g_8 = external global i32, align 4 + +define void @test2() noreturn nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %tmp7 = load i32* @g_8, align 4 + store i32* @g_8, i32** undef, align 16 + store i32 undef, i32* @g_8, align 4 + br label %for.body +} + +; PR8102 +define void @test3() { +entry: + %__first = alloca { i32* } + br i1 undef, label %for.cond, label %for.end + +for.cond: ; preds = %for.cond, %entry + %tmp1 = getelementptr { i32*}* %__first, i32 0, i32 0 + %tmp2 = load i32** %tmp1, align 4 + %call = tail call i32* @test3helper(i32* %tmp2) + %tmp3 = getelementptr { i32*}* %__first, i32 0, i32 0 + store i32* %call, i32** %tmp3, align 4 + br i1 false, label %for.cond, label %for.end + +for.end: ; preds = %for.cond, %entry + ret void +} + +declare i32* @test3helper(i32*) diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index e7d36af..6f28d53 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -48,3 +48,19 @@ Out: ; preds = %Loop %C = sub i32 %A, %B ; <i32> [#uses=1] ret i32 %C } + + +; This loop invariant instruction should be constant folded, not hoisted. +define i32 @test3(i1 %c) { +; CHECK: define i32 @test3 +; CHECK: call void @foo2(i32 6) + %A = load i32* @X ; <i32> [#uses=2] + br label %Loop +Loop: + %B = add i32 4, 2 ; <i32> [#uses=2] + call void @foo2( i32 %B ) + br i1 %c, label %Loop, label %Out +Out: ; preds = %Loop + %C = sub i32 %A, %B ; <i32> [#uses=1] + ret i32 %C +} diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index ef28c38..c1d2b24 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -licm -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + @X = global i32 7 ; <i32*> [#uses=4] define void @test1(i32 %i) { @@ -32,23 +34,21 @@ Entry: br label %Loop ; CHECK: @test2 ; CHECK: Entry: -; CHECK-NEXT: %X1 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X2 = getelementptr i32* @X, i64 0 -; CHECK-NEXT: %X1.promoted = load i32* %X1 +; CHECK-NEXT: %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 - %X1 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X1 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] %A = load i32* %X1 ; <i32> [#uses=1] %V = add i32 %A, 1 ; <i32> [#uses=1] - %X2 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1] + %X2 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1] store i32 %V, i32* %X2 br i1 false, label %Loop, label %Exit Exit: ; preds = %Loop ret void ; CHECK: Exit: -; CHECK-NEXT: store i32 %V, i32* %X1 +; CHECK-NEXT: store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1) ; CHECK-NEXT: ret void } @@ -71,3 +71,50 @@ Out: ; preds = %Loop ret void } +; PR8041 +define void @test4(i8* %x, i8 %n) { +; CHECK: @test4 + %handle1 = alloca i8* + %handle2 = alloca i8* + store i8* %x, i8** %handle1 + br label %loop + +loop: + %tmp = getelementptr i8* %x, i64 8 + store i8* %tmp, i8** %handle2 + br label %subloop + +subloop: + %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ] + %offsetx2 = load i8** %handle2 + store i8 %n, i8* %offsetx2 + %newoffsetx2 = getelementptr i8* %offsetx2, i64 -1 + store i8* %newoffsetx2, i8** %handle2 + %nextcount = add i8 %count, 1 + %innerexitcond = icmp sge i8 %nextcount, 8 + br i1 %innerexitcond, label %innerexit, label %subloop + +; Should have promoted 'handle2' accesses. +; CHECK: subloop: +; CHECK-NEXT: phi i8* [ +; CHECK-NEXT: %count = phi i8 [ +; CHECK-NEXT: store i8 %n +; CHECK-NOT: store +; CHECK: br i1 + +innerexit: + %offsetx1 = load i8** %handle1 + %val = load i8* %offsetx1 + %cond = icmp eq i8 %val, %n + br i1 %cond, label %exit, label %loop + +; Should not have promoted offsetx1 loads. +; CHECK: innerexit: +; CHECK: %val = load i8* %offsetx1 +; CHECK: %cond = icmp eq i8 %val, %n +; CHECK: br i1 %cond, label %exit, label %loop + +exit: + ret void +} + diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index 11112eb..68e4b64 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -233,3 +233,17 @@ Out: ; preds = %Loop ; CHECK-NEXT: ret i32 %tmp.6 } +; Should delete, not sink, dead instructions. +define void @test11() { + br label %Loop +Loop: + %dead = getelementptr %Ty* @X2, i64 0, i32 0 + br i1 false, label %Loop, label %Out +Out: + ret void +; CHECK: @test11 +; CHECK: Out: +; CHECK-NEXT: ret void +} + + diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll index 9a64e2a..5403e72 100644 --- a/test/Transforms/LoopRotate/phi-duplicate.ll +++ b/test/Transforms/LoopRotate/phi-duplicate.ll @@ -27,9 +27,21 @@ for.body: ; preds = %for.cond for.end: ; preds = %for.cond ret void } -; Should only end up with one phi. -; CHECK: for.body: -; CHECK-NEXT: %j.02 = phi i64 -; CHECK-NOT: phi -; CHECK: ret void +; Should only end up with one phi. Also, the original for.cond block should +; be moved to the end of the loop so that the new loop header pleasantly +; ends up at the top. + +; CHECK: define void @test +; CHECK-NEXT: entry: +; CHECK-NEXT: icmp slt i64 +; CHECK-NEXT: br i1 +; CHECK-NOT: : +; CHECK: bb.nph: +; CHECK-NEXT: br label %for.body +; CHECK-NOT: : +; CHECK: for.body: +; CHECK-NEXT: %j.02 = phi i64 +; CHECK-NOT: phi +; CHECK: ret void +; CHECK-NEXT: } diff --git a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll new file mode 100644 index 0000000..2a1ee7d --- /dev/null +++ b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -domfrontier -loopsimplify -domfrontier -verify-dom-info -analyze + + +define void @a() nounwind { +entry: + br i1 undef, label %bb37, label %bb1.i + +bb1.i: ; preds = %bb1.i, %bb + %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; <i64> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 576 ; <i1> [#uses=1] + br i1 %exitcond, label %bb37, label %bb1.i + +bb37: ; preds = %bb1.i, %bb + br label %return + + +return: ; preds = %bb39 + ret void +} diff --git a/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/test/Transforms/LoopSimplify/indirectbr-backedge.ll new file mode 100644 index 0000000..ca6e47f --- /dev/null +++ b/test/Transforms/LoopSimplify/indirectbr-backedge.ll @@ -0,0 +1,35 @@ +; RUN: opt -loopsimplify -S < %s | FileCheck %s + +; LoopSimplify shouldn't split loop backedges that use indirectbr. + +; CHECK: bb1: ; preds = %bb5, %bb +; CHECK-NEXT: indirectbr + +; CHECK: bb5: ; preds = %bb1 +; CHECK-NEXT: br label %bb1{{$}} + +define void @foo(i8* %p) nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb1, %bb + indirectbr i8* %p, [label %bb6, label %bb7, label %bb1, label %bb2, label %bb3, label %bb5, label %bb4] + +bb2: ; preds = %bb1 + ret void + +bb3: ; preds = %bb1 + ret void + +bb4: ; preds = %bb1 + ret void + +bb5: ; preds = %bb1 + br label %bb1 + +bb6: ; preds = %bb1 + ret void + +bb7: ; preds = %bb1 + ret void +} diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll new file mode 100644 index 0000000..017a0d2 --- /dev/null +++ b/test/Transforms/LoopSimplify/preserve-scev.ll @@ -0,0 +1,50 @@ +; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>} +; PR8079 + +; LoopSimplify should invalidate indvars when splitting out the +; inner loop. + +@maxStat = external global i32 + +define i32 @test() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %if.then5, %if.end, %entry + %cuts.1 = phi i32 [ 0, %entry ], [ %inc, %if.then5 ], [ %cuts.1, %if.end ] + %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ] + %add = add i32 %0, 1 + %cmp = icmp slt i32 %0, 1 + %tmp1 = load i32* @maxStat, align 4 + br i1 %cmp, label %for.body, label %for.cond14.preheader + +for.cond14.preheader: ; preds = %for.cond + %cmp1726 = icmp sgt i32 %tmp1, 0 + br i1 %cmp1726, label %for.body18, label %return + +for.body: ; preds = %for.cond + %cmp2 = icmp sgt i32 %tmp1, 100 + br i1 %cmp2, label %return, label %if.end + +if.end: ; preds = %for.body + %cmp4 = icmp sgt i32 %tmp1, -1 + br i1 %cmp4, label %if.then5, label %for.cond + +if.then5: ; preds = %if.end + call void @foo() nounwind + %inc = add i32 %cuts.1, 1 + br label %for.cond + +for.body18: ; preds = %for.body18, %for.cond14.preheader + %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ] + call void @foo() nounwind + %1 = add nsw i32 %i13.027, 1 + %tmp16 = load i32* @maxStat, align 4 + %cmp17 = icmp slt i32 %1, %tmp16 + br i1 %cmp17, label %for.body18, label %return + +return: ; preds = %for.body18, %for.body, %for.cond14.preheader + ret i32 0 +} + +declare void @foo() nounwind diff --git a/test/Transforms/LoopStrengthReduce/pr3571.ll b/test/Transforms/LoopStrengthReduce/pr3571.ll index 9ad27d5..a23e4db 100644 --- a/test/Transforms/LoopStrengthReduce/pr3571.ll +++ b/test/Transforms/LoopStrengthReduce/pr3571.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-reduce | llvm-dis ; PR3571 -target triple = "i386-mingw32" +target triple = "i386-pc-mingw32" define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind { entry: br label %_ZNK11QModelIndex7isValidEv.exit.i diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll index dca97e9..8af5cf1 100644 --- a/test/Transforms/LoopStrengthReduce/uglygep.ll +++ b/test/Transforms/LoopStrengthReduce/uglygep.ll @@ -4,7 +4,6 @@ ; should be able to form pretty GEPs. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" define void @Z4() nounwind { bb: diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll new file mode 100644 index 0000000..73391ca --- /dev/null +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -0,0 +1,53 @@ +; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s +; PR5373 + +; Loop unswitching shouldn't trivially unswitch the true case of condition %a +; in the code here because it leads to an infinite loop. While this doesn't +; contain any instructions with side effects, it's still a kind of side effect. +; It can trivially unswitch on the false cas of condition %a though. + +; STATS: 2 loop-unswitch - Number of branches unswitched +; STATS: 1 loop-unswitch - Number of unswitches that are trivial + +; CHECK: @func_16 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split + +; CHECK: entry.split: +; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1 + +; CHECK: cond.end.us: +; CHECK-NEXT: br label %cond.end.us + +; CHECK: abort0.split: +; CHECK-NEXT: call void @end0() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: abort1: +; CHECK-NEXT: call void @end1() noreturn nounwind +; CHECK-NEXT: unreachable + +; CHECK: } + +define void @func_16(i1 %a, i1 %b) nounwind { +entry: + br label %for.body + +for.body: + br i1 %a, label %cond.end, label %abort0 + +cond.end: + br i1 %b, label %for.body, label %abort1 + +abort0: + call void @end0() noreturn nounwind + unreachable + +abort1: + call void @end1() noreturn nounwind + unreachable +} + +declare void @end0() noreturn +declare void @end1() noreturn diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll new file mode 100644 index 0000000..5b110d6 --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-load.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* %ptr, i8 %delta) +declare i8 @llvm.atomic.load.min.i8.p0i8(i8* %ptr, i8 %delta) + +define i8 @add() { +; CHECK: @add + %i = alloca i8 + %j = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: add +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @nand() { +; CHECK: @nand + %i = alloca i8 + %j = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: and +; CHECK-NEXT: xor +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @min() { +; CHECK: @min + %i = alloca i8 + %j = call i8 @llvm.atomic.load.min.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll new file mode 100644 index 0000000..0a59c85 --- /dev/null +++ b/test/Transforms/LowerAtomic/atomic-swap.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %ptr, i8 %cmp, i8 %val) +declare i8 @llvm.atomic.swap.i8.p0i8(i8* %ptr, i8 %val) + +define i8 @cmpswap() { +; CHECK: @cmpswap + %i = alloca i8 + %j = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %i, i8 0, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: icmp +; CHECK-NEXT: select +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} + +define i8 @swap() { +; CHECK: @swap + %i = alloca i8 + %j = call i8 @llvm.atomic.swap.i8.p0i8(i8* %i, i8 42) +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/LowerAtomic/barrier.ll b/test/Transforms/LowerAtomic/barrier.ll new file mode 100644 index 0000000..218c5ba --- /dev/null +++ b/test/Transforms/LowerAtomic/barrier.ll @@ -0,0 +1,10 @@ +; RUN: opt < %s -loweratomic -S | FileCheck %s + +declare void @llvm.memory.barrier(i1 %ll, i1 %ls, i1 %sl, i1 %ss, i1 %device) + +define void @barrier() { +; CHECK: @barrier + call void @llvm.memory.barrier(i1 0, i1 0, i1 0, i1 0, i1 0) +; CHECK-NEXT: ret + ret void +} diff --git a/test/Transforms/SSI/dg.exp b/test/Transforms/LowerAtomic/dg.exp index f200589..f200589 100644 --- a/test/Transforms/SSI/dg.exp +++ b/test/Transforms/LowerAtomic/dg.exp diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll new file mode 100644 index 0000000..dc64a08 --- /dev/null +++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll @@ -0,0 +1,18 @@ +; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged +; This used to crash with an assert. + +define <2 x i8> @v1(<2 x i8> %x) { + ret <2 x i8> %x +} + +define <4 x i8> @v2(<4 x i8> %x) { + ret <4 x i8> %x +} + +define [2 x i8] @a1([2 x i8] %x) { + ret [2 x i8] %x +} + +define [4 x i8] @a2([4 x i8] %x) { + ret [4 x i8] %x +} diff --git a/test/Transforms/PartialSpecialize/two-specializations.ll b/test/Transforms/PartialSpecialize/two-specializations.ll index c85ddb7..bc3da22 100644 --- a/test/Transforms/PartialSpecialize/two-specializations.ll +++ b/test/Transforms/PartialSpecialize/two-specializations.ll @@ -1,8 +1,8 @@ ; If there are two specializations of a function, make sure each callsite ; calls the right one. ; -; RN: opt -S -partialspecialization %s | FileCheck %s -; RUN: true +; RUN: opt -S -partialspecialization -disable-inlining %s | opt -S -inline | FileCheck %s -check-prefix=CORRECT +; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s declare void @callback1() declare void @callback2() @@ -14,14 +14,18 @@ define internal void @UseCallback(void()* %pCallback) { define void @foo(void()* %pNonConstCallback) { Entry: +; CORRECT: Entry +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void %pNonConstCallback() +; CORRECT-NEXT: call void @callback1() +; CORRECT-NEXT: call void @callback2() +; CORRECT-NEXT: call void @callback2() ; CHECK: Entry -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void %pNonConstCallback() -; CHECK-NEXT: call void @callback1() -; CHECK-NEXT: call void @callback2() -; CHECK-NEXT: call void @callback2() +; CHECK-NOT: call void @UseCallback(void ()* @callback1) +; CHECK-NOT: call void @UseCallback(void ()* @callback2) +; CHECK: ret void call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback1) call void @UseCallback(void()* @callback2) diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll new file mode 100644 index 0000000..c6572fa --- /dev/null +++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll @@ -0,0 +1,28 @@ +; RUN: opt %s -ipsccp -S | FileCheck %s +; PR7876 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define internal i32 @foo() nounwind noinline ssp { +entry: + ret i32 0 +; CHECK: @foo +; CHECK: entry: +; CHECK: ret i32 0 +} + +declare i32 @bar() + +define internal i32 @test(i32 %c) nounwind noinline ssp { +bb: + %tmp1 = icmp ne i32 %c, 0 ; <i1> [#uses=1] + %tmp2 = select i1 %tmp1, i32 ()* @foo, i32 ()* @bar ; <i32 ()*> [#uses=1] + %tmp3 = tail call i32 %tmp2() nounwind ; <i32> [#uses=1] + ret i32 %tmp3 +} + +define i32 @main() nounwind ssp { +bb: + %tmp = tail call i32 @test(i32 1) ; <i32> [#uses=1] + ret i32 %tmp +} diff --git a/test/Transforms/SSI/2009-07-09-Invoke.ll b/test/Transforms/SSI/2009-07-09-Invoke.ll deleted file mode 100644 index 20a2217..0000000 --- a/test/Transforms/SSI/2009-07-09-Invoke.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output -; PR4511 - - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" } - %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" } - %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 } - %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" } - -declare void @_Unwind_Resume(i8*) - -declare fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*) - -define fastcc void @_ZNSt6vectorISsSaISsEE9push_backERKSs(%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >"* nocapture %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* nocapture %__x) { -entry: - br i1 undef, label %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i, label %bb - -bb: ; preds = %entry - ret void - -_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i: ; preds = %entry - %0 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef) - to label %invcont14.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=3] - -invcont14.i: ; preds = %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %1 = icmp eq %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, null ; <i1> [#uses=1] - br i1 %1, label %bb19.i, label %bb.i17.i - -bb.i17.i: ; preds = %invcont14.i - %2 = invoke fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* undef, i32 0) - to label %bb2.i25.i unwind label %ppad.i.i.i23.i ; <i8*> [#uses=0] - -ppad.i.i.i23.i: ; preds = %bb.i17.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i24.i unwind label %lpad.i29.i - -.noexc.i24.i: ; preds = %ppad.i.i.i23.i - unreachable - -bb2.i25.i: ; preds = %bb.i17.i - unreachable - -lpad.i29.i: ; preds = %ppad.i.i.i23.i - invoke void @_Unwind_Resume(i8* undef) - to label %.noexc.i9 unwind label %ppad81.i - -.noexc.i9: ; preds = %lpad.i29.i - unreachable - -bb19.i: ; preds = %invcont14.i - %3 = getelementptr %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, i32 1 ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=2] - %4 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %3) - to label %invcont20.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - -invcont20.i: ; preds = %bb19.i - unreachable - -invcont32.i: ; preds = %ppad81.i - unreachable - -ppad81.i: ; preds = %bb19.i, %lpad.i29.i, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i - %__new_finish.0.i = phi %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* [ %0, %lpad.i29.i ], [ undef, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i ], [ %3, %bb19.i ] ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0] - br i1 undef, label %invcont32.i, label %bb.i.i.i.i - -bb.i.i.i.i: ; preds = %bb.i.i.i.i, %ppad81.i - br label %bb.i.i.i.i -} - -declare fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* nocapture, i32) diff --git a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll deleted file mode 100644 index 0fe37ec..0000000 --- a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -declare fastcc i32 @ras_Empty(i8** nocapture) nounwind readonly - -define i32 @cc_Tautology() nounwind { -entry: - unreachable - -cc_InitData.exit: ; No predecessors! - %0 = call fastcc i32 @ras_Empty(i8** undef) nounwind ; <i32> [#uses=1] - %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] - br i1 %1, label %bb2, label %bb6 - -bb2: ; preds = %cc_InitData.exit - unreachable - -bb6: ; preds = %cc_InitData.exit - ret i32 undef -} diff --git a/test/Transforms/SSI/2009-08-17-CritEdge.ll b/test/Transforms/SSI/2009-08-17-CritEdge.ll deleted file mode 100644 index 61bd2dc..0000000 --- a/test/Transforms/SSI/2009-08-17-CritEdge.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @test(i32 %x) { -entry: - br label %label1 -label1: - %A = phi i32 [ 0, %entry ], [ %A.1, %label2 ] - %B = icmp slt i32 %A, %x - br i1 %B, label %label2, label %label2 -label2: - %A.1 = add i32 %A, 1 - br label %label1 -label3: ; No predecessors! - ret void -} diff --git a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll deleted file mode 100644 index 64bed19..0000000 --- a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -disable-output - -define void @foo() { -entry: - %tmp0 = load i64* undef, align 4 ; <i64> [#uses=3] - br i1 undef, label %end_stmt_playback, label %bb16 - -readJournalHdr.exit: ; No predecessors! - br label %end_stmt_playback - -bb16: ; preds = %bb7 - %tmp1 = icmp slt i64 0, %tmp0 ; <i1> [#uses=1] - br i1 %tmp1, label %bb16, label %bb17 - -bb17: ; preds = %bb16 - store i64 %tmp0, i64* undef, align 4 - br label %end_stmt_playback - -end_stmt_playback: ; preds = %bb17, %readJournalHdr.exit, %bb6, %bb2 - store i64 %tmp0, i64* undef, align 4 - ret void -} diff --git a/test/Transforms/SSI/ssiphi.ll b/test/Transforms/SSI/ssiphi.ll deleted file mode 100644 index a42b70c..0000000 --- a/test/Transforms/SSI/ssiphi.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -ssi-everything -S | FileCheck %s - -declare void @use(i32) -declare i32 @create() - -define i32 @foo() { -entry: - %x = call i32 @create() - %y = icmp slt i32 %x, 10 - br i1 %y, label %T, label %F -T: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -F: -; CHECK: SSI_sigma - call void @use(i32 %x) - br label %join -join: -; CHECK: SSI_phi - ret i32 %x -} diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index 4f875b0..fe55426 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>} +; RUN: opt < %s -scalarrepl -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "x86_64-apple-darwin10.0.0" -define void @test(<4 x float>* %F, float %f) { +define void @test1(<4 x float>* %F, float %f) { entry: %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3] %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2] @@ -14,6 +14,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test1 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0 } define void @test2(<4 x float>* %F, float %f) { @@ -28,6 +33,11 @@ entry: %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] store <4 x float> %tmp6, <4 x float>* %F ret void +; CHECK: @test2 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2 } define void @test3(<4 x float>* %F, float* %f) { @@ -40,6 +50,11 @@ entry: %tmp.upgrd.4 = load float* %tmp.upgrd.3 ; <float> [#uses=1] store float %tmp.upgrd.4, float* %f ret void +; CHECK: @test3 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2 } define void @test4(<4 x float>* %F, float* %f) { @@ -52,6 +67,11 @@ entry: %tmp.upgrd.6 = load float* %G.upgrd.5 ; <float> [#uses=1] store float %tmp.upgrd.6, float* %f ret void +; CHECK: @test4 +; CHECK-NOT: alloca +; CHECK: %tmp = load <4 x float>* %F +; CHECK: fadd <4 x float> %tmp, %tmp +; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0 } define i32 @test5(float %X) { ;; should turn into bitcast. @@ -61,5 +81,22 @@ define i32 @test5(float %X) { ;; should turn into bitcast. %a = bitcast float* %X1 to i32* %tmp = load i32* %a ret i32 %tmp +; CHECK: @test5 +; CHECK-NEXT: bitcast float %X to i32 +; CHECK-NEXT: ret i32 +} + + +;; should not turn into <1 x i64> - It is a banned MMX datatype. +;; rdar://8380055 +define i64 @test6(<2 x float> %X) { + %X_addr = alloca <2 x float> + store <2 x float> %X, <2 x float>* %X_addr + %P = bitcast <2 x float>* %X_addr to i64* + %tmp = load i64* %P + ret i64 %tmp +; CHECK: @test6 +; CHECK-NEXT: bitcast <2 x float> %X to i64 +; CHECK-NEXT: ret i64 } diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll index ba33d84..9c15efc 100644 --- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll +++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -simplifycfg -disable-output ; PR2256 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-mingw32" +target triple = "x86_64-pc-mingw32" define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval %Z, i1 %cond) nounwind { bb: ; preds = %entry diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll index 83a9fa7..7315ff6 100644 --- a/test/Transforms/SimplifyCFG/basictest.ll +++ b/test/Transforms/SimplifyCFG/basictest.ll @@ -54,6 +54,5 @@ bb1: ; preds = %entry return: ; preds = %entry ret void ; CHECK: @test5 -; CHECK-NEXT: bb: ; CHECK-NEXT: ret void } diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll new file mode 100644 index 0000000..de4f5b6 --- /dev/null +++ b/test/Transforms/SimplifyCFG/indirectbr.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -simplifycfg < %s | FileCheck %s + +; SimplifyCFG should eliminate redundant indirectbr edges. + +; CHECK: indbrtest0 +; CHECK: indirectbr i8* %t, [label %BB0, label %BB1, label %BB2] +; CHECK: %x = phi i32 [ 0, %BB0 ], [ 1, %entry ] + +declare void @foo() +declare void @A() +declare void @B(i32) +declare void @C() + +define void @indbrtest0(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest0, %BB0), i8** %P + store i8* blockaddress(@indbrtest0, %BB1), i8** %P + store i8* blockaddress(@indbrtest0, %BB2), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2] +BB0: + call void @A() + br label %BB1 +BB1: + %x = phi i32 [ 0, %BB0 ], [ 1, %entry ], [ 1, %entry ] + call void @B(i32 %x) + ret void +BB2: + call void @C() + ret void +} + +; SimplifyCFG should convert the indirectbr into a directbr. It would be even +; better if it removed the branch altogether, but simplifycfdg currently misses +; that because the predecessor is the entry block. + +; CHECK: indbrtest1 +; CHECK: br label %BB0 + +define void @indbrtest1(i8** %P, i8** %Q) { +entry: + store i8* blockaddress(@indbrtest1, %BB0), i8** %P + call void @foo() + %t = load i8** %Q + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + call void @A() + ret void +} + +; SimplifyCFG should notice that BB0 does not have its address taken and +; remove it from entry's successor list. + +; CHECK: indbrtest2 +; CHECK: entry: +; CHECK-NEXT: unreachable + +define void @indbrtest2(i8* %t) { +entry: + indirectbr i8* %t, [label %BB0, label %BB0] +BB0: + ret void +} diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll new file mode 100644 index 0000000..3965c37 --- /dev/null +++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll @@ -0,0 +1,19 @@ +; RUN: opt -strip-dead-debug-info -disable-output %s +define i32 @foo() nounwind ssp { +entry: + ret i32 0, !dbg !8 +} + +!llvm.dbg.sp = !{!0} +!llvm.dbg.gv = !{!6} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ] +!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ] +!8 = metadata !{i32 3, i32 13, metadata !9, null} +!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll index b2a9ed2..9475f87 100644 --- a/test/Transforms/TailCallElim/accum_recursion.ll +++ b/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,15 +1,74 @@ -; RUN: opt < %s -tailcallelim -S | not grep call +; RUN: opt < %s -tailcallelim -S | FileCheck %s -define i32 @factorial(i32 %x) { +define i32 @test1_factorial(i32 %x) { entry: %tmp.1 = icmp sgt i32 %x, 0 ; <i1> [#uses=1] br i1 %tmp.1, label %then, label %else then: ; preds = %entry %tmp.6 = add i32 %x, -1 ; <i32> [#uses=1] - %tmp.4 = call i32 @factorial( i32 %tmp.6 ) ; <i32> [#uses=1] + %tmp.4 = call i32 @test1_factorial( i32 %tmp.6 ) ; <i32> [#uses=1] %tmp.7 = mul i32 %tmp.4, %x ; <i32> [#uses=1] ret i32 %tmp.7 else: ; preds = %entry ret i32 1 } +; CHECK: define i32 @test1_factorial +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: else: + +; This is a more aggressive form of accumulator recursion insertion, which +; requires noticing that X doesn't change as we perform the tailcall. + +define i32 @test2_mul(i32 %x, i32 %y) { +entry: + %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] + br i1 %tmp.1, label %return, label %endif +endif: ; preds = %entry + %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] + %tmp.5 = call i32 @test2_mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] + %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] + ret i32 %tmp.9 +return: ; preds = %entry + ret i32 %x +} + +; CHECK: define i32 @test2_mul +; CHECK: phi i32 +; CHECK-NOT: call i32 +; CHECK: return: + + +define i64 @test3_fib(i64 %n) nounwind readnone { +; CHECK: @test3_fib +entry: +; CHECK: tailrecurse: +; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] +; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] + switch i64 %n, label %bb1 [ +; CHECK: switch i64 %n.tr, label %bb1 [ + i64 0, label %bb2 + i64 1, label %bb2 + ] + +bb1: +; CHECK: bb1: + %0 = add i64 %n, -1 +; CHECK: %0 = add i64 %n.tr, -1 + %1 = tail call i64 @test3_fib(i64 %0) nounwind +; CHECK: %1 = tail call i64 @test3_fib(i64 %0) + %2 = add i64 %n, -2 +; CHECK: %2 = add i64 %n.tr, -2 + %3 = tail call i64 @test3_fib(i64 %2) nounwind +; CHECK-NOT: tail call i64 @test3_fib + %4 = add nsw i64 %3, %1 +; CHECK: add nsw i64 %accumulator.tr, %1 + ret i64 %4 +; CHECK: br label %tailrecurse + +bb2: +; CHECK: bb2: + ret i64 %n +; CHECK: ret i64 %accumulator.tr +} diff --git a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll deleted file mode 100644 index 2a90cf3..0000000 --- a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll +++ /dev/null @@ -1,20 +0,0 @@ -; This is a more aggressive form of accumulator recursion insertion, which -; requires noticing that X doesn't change as we perform the tailcall. Thanks -; go out to the anonymous users of the demo script for "suggesting" -; optimizations that should be done. :) - -; RUN: opt < %s -tailcallelim -S | not grep call - -define i32 @mul(i32 %x, i32 %y) { -entry: - %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] - br i1 %tmp.1, label %return, label %endif -endif: ; preds = %entry - %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1] - %tmp.5 = call i32 @mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1] - %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1] - ret i32 %tmp.9 -return: ; preds = %entry - ret i32 %x -} - diff --git a/test/Transforms/TailCallElim/switch.ll b/test/Transforms/TailCallElim/switch.ll deleted file mode 100644 index 3388431..0000000 --- a/test/Transforms/TailCallElim/switch.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt %s -tailcallelim -S | FileCheck %s - -define i64 @fib(i64 %n) nounwind readnone { -; CHECK: @fib -entry: -; CHECK: tailrecurse: -; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ] -; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ] - switch i64 %n, label %bb1 [ -; CHECK: switch i64 %n.tr, label %bb1 [ - i64 0, label %bb2 - i64 1, label %bb2 - ] - -bb1: -; CHECK: bb1: - %0 = add i64 %n, -1 -; CHECK: %0 = add i64 %n.tr, -1 - %1 = tail call i64 @fib(i64 %0) nounwind -; CHECK: %1 = tail call i64 @fib(i64 %0) - %2 = add i64 %n, -2 -; CHECK: %2 = add i64 %n.tr, -2 - %3 = tail call i64 @fib(i64 %2) nounwind -; CHECK-NOT: tail call i64 @fib - %4 = add nsw i64 %3, %1 -; CHECK: add nsw i64 %accumulator.tr, %1 - ret i64 %4 -; CHECK: br label %tailrecurse - -bb2: -; CHECK: bb2: - ret i64 %n -; CHECK: ret i64 %accumulator.tr -} diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll index 88a5656..03e99bc 100644 --- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll +++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate +; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate ; XFAIL: * define i32 @foo(i32 %l) nounwind { diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll new file mode 100644 index 0000000..bf5563d --- /dev/null +++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll @@ -0,0 +1,21 @@ +; RUN: not llvm-as < %s 2> %t +; RUN: grep {Broken module} %t +; PR7316 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" +target triple = "x86-unknown-unknown" +@aa = global [32 x i8] zeroinitializer, align 1 +@bb = global [16 x i8] zeroinitializer, align 1 +define void @x() nounwind { +L.0: + %0 = getelementptr [32 x i8]* @aa, i32 0, i32 4 + %1 = bitcast i8* %0 to [16 x i8]* + %2 = bitcast [16 x i8]* %1 to [0 x i8]* + %3 = getelementptr [16 x i8]* @bb + %4 = bitcast [16 x i8]* %3 to [0 x i8]* + call void @llvm.memcpy.i32([0 x i8]* %2, [0 x i8]* %4, i32 16, i32 1) + br label %return +return: + ret void +} +declare void @llvm.memcpy.i32([0 x i8]*, [0 x i8]*, i32, i32) nounwind diff --git a/test/lit.cfg b/test/lit.cfg index 5e7e0e4..f15777c 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -46,7 +46,16 @@ if llvm_obj_root is not None: config.environment['PATH'] = path # Propogate 'HOME' through the environment. -config.environment['HOME'] = os.environ['HOME'] +if 'HOME' in os.environ: + config.environment['HOME'] = os.environ['HOME'] + +# Propogate 'INCLUDE' through the environment. +if 'INCLUDE' in os.environ: + config.environment['INCLUDE'] = os.environ['INCLUDE'] + +# Propogate 'LIB' through the environment. +if 'LIB' in os.environ: + config.environment['LIB'] = os.environ['LIB'] # Propogate LLVM_SRC_ROOT into the environment. config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '') @@ -110,7 +119,7 @@ import re site_exp = {} # FIXME: Implement lit.site.cfg. for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')): - m = re.match('set ([^ ]+) "([^"]*)"', line) + m = re.match('set ([^ ]+) "(.*)"', line) if m: site_exp[m.group(1)] = m.group(2) @@ -147,13 +156,13 @@ def llvm_supports_target(name): def llvm_supports_darwin_and_target(name): return 'darwin' in config.target_triple and llvm_supports_target(name) -langs = set(site_exp['llvmgcc_langs'].split(',')) +langs = set([s.strip() for s in site_exp['llvmgcc_langs'].split(',')]) def llvm_gcc_supports(name): - return name in langs + return name.strip() in langs -bindings = set(site_exp['llvm_bindings'].split(',')) +bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')]) def llvm_supports_binding(name): - return name in bindings + return name.strip() in bindings # Provide on_clone hook for reading 'dg.exp'. import os |