Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64
      4 
      5 define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
      6 ; X32-LABEL: commute_fold_vpcomb:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    vpcomgtb (%eax), %xmm0, %xmm0
     10 ; X32-NEXT:    retl
     11 ;
     12 ; X64-LABEL: commute_fold_vpcomb:
     13 ; X64:       # %bb.0:
     14 ; X64-NEXT:    vpcomgtb (%rdi), %xmm0, %xmm0
     15 ; X64-NEXT:    retq
     16   %1 = load <16 x i8>, <16 x i8>* %a0
     17   %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
     18   ret <16 x i8> %2
     19 }
     20 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
     21 
     22 define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
     23 ; X32-LABEL: commute_fold_vpcomd:
     24 ; X32:       # %bb.0:
     25 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     26 ; X32-NEXT:    vpcomged (%eax), %xmm0, %xmm0
     27 ; X32-NEXT:    retl
     28 ;
     29 ; X64-LABEL: commute_fold_vpcomd:
     30 ; X64:       # %bb.0:
     31 ; X64-NEXT:    vpcomged (%rdi), %xmm0, %xmm0
     32 ; X64-NEXT:    retq
     33   %1 = load <4 x i32>, <4 x i32>* %a0
     34   %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
     35   ret <4 x i32> %2
     36 }
     37 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
     38 
     39 define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
     40 ; X32-LABEL: commute_fold_vpcomq:
     41 ; X32:       # %bb.0:
     42 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     43 ; X32-NEXT:    vpcomltq (%eax), %xmm0, %xmm0
     44 ; X32-NEXT:    retl
     45 ;
     46 ; X64-LABEL: commute_fold_vpcomq:
     47 ; X64:       # %bb.0:
     48 ; X64-NEXT:    vpcomltq (%rdi), %xmm0, %xmm0
     49 ; X64-NEXT:    retq
     50   %1 = load <2 x i64>, <2 x i64>* %a0
     51   %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
     52   ret <2 x i64> %2
     53 }
     54 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
     55 
     56 define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
     57 ; X32-LABEL: commute_fold_vpcomub:
     58 ; X32:       # %bb.0:
     59 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     60 ; X32-NEXT:    vpcomleub (%eax), %xmm0, %xmm0
     61 ; X32-NEXT:    retl
     62 ;
     63 ; X64-LABEL: commute_fold_vpcomub:
     64 ; X64:       # %bb.0:
     65 ; X64-NEXT:    vpcomleub (%rdi), %xmm0, %xmm0
     66 ; X64-NEXT:    retq
     67   %1 = load <16 x i8>, <16 x i8>* %a0
     68   %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
     69   ret <16 x i8> %2
     70 }
     71 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
     72 
     73 define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
     74 ; X32-LABEL: commute_fold_vpcomud:
     75 ; X32:       # %bb.0:
     76 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     77 ; X32-NEXT:    vpcomequd (%eax), %xmm0, %xmm0
     78 ; X32-NEXT:    retl
     79 ;
     80 ; X64-LABEL: commute_fold_vpcomud:
     81 ; X64:       # %bb.0:
     82 ; X64-NEXT:    vpcomequd (%rdi), %xmm0, %xmm0
     83 ; X64-NEXT:    retq
     84   %1 = load <4 x i32>, <4 x i32>* %a0
     85   %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
     86   ret <4 x i32> %2
     87 }
     88 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
     89 
     90 define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
     91 ; X32-LABEL: commute_fold_vpcomuq:
     92 ; X32:       # %bb.0:
     93 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     94 ; X32-NEXT:    vpcomnequq (%eax), %xmm0, %xmm0
     95 ; X32-NEXT:    retl
     96 ;
     97 ; X64-LABEL: commute_fold_vpcomuq:
     98 ; X64:       # %bb.0:
     99 ; X64-NEXT:    vpcomnequq (%rdi), %xmm0, %xmm0
    100 ; X64-NEXT:    retq
    101   %1 = load <2 x i64>, <2 x i64>* %a0
    102   %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
    103   ret <2 x i64> %2
    104 }
    105 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
    106 
    107 define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
    108 ; X32-LABEL: commute_fold_vpcomuw:
    109 ; X32:       # %bb.0:
    110 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    111 ; X32-NEXT:    vpcomfalseuw (%eax), %xmm0, %xmm0
    112 ; X32-NEXT:    retl
    113 ;
    114 ; X64-LABEL: commute_fold_vpcomuw:
    115 ; X64:       # %bb.0:
    116 ; X64-NEXT:    vpcomfalseuw (%rdi), %xmm0, %xmm0
    117 ; X64-NEXT:    retq
    118   %1 = load <8 x i16>, <8 x i16>* %a0
    119   %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
    120   ret <8 x i16> %2
    121 }
    122 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    123 
    124 define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
    125 ; X32-LABEL: commute_fold_vpcomw:
    126 ; X32:       # %bb.0:
    127 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    128 ; X32-NEXT:    vpcomtruew (%eax), %xmm0, %xmm0
    129 ; X32-NEXT:    retl
    130 ;
    131 ; X64-LABEL: commute_fold_vpcomw:
    132 ; X64:       # %bb.0:
    133 ; X64-NEXT:    vpcomtruew (%rdi), %xmm0, %xmm0
    134 ; X64-NEXT:    retq
    135   %1 = load <8 x i16>, <8 x i16>* %a0
    136   %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
    137   ret <8 x i16> %2
    138 }
    139 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    140 
    141 define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
    142 ; X32-LABEL: commute_fold_vpmacsdd:
    143 ; X32:       # %bb.0:
    144 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    145 ; X32-NEXT:    vpmacsdd %xmm1, (%eax), %xmm0, %xmm0
    146 ; X32-NEXT:    retl
    147 ;
    148 ; X64-LABEL: commute_fold_vpmacsdd:
    149 ; X64:       # %bb.0:
    150 ; X64-NEXT:    vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
    151 ; X64-NEXT:    retq
    152   %1 = load <4 x i32>, <4 x i32>* %a0
    153   %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
    154   ret <4 x i32> %2
    155 }
    156 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    157 
    158 define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    159 ; X32-LABEL: commute_fold_vpmacsdqh:
    160 ; X32:       # %bb.0:
    161 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    162 ; X32-NEXT:    vpmacsdqh %xmm1, (%eax), %xmm0, %xmm0
    163 ; X32-NEXT:    retl
    164 ;
    165 ; X64-LABEL: commute_fold_vpmacsdqh:
    166 ; X64:       # %bb.0:
    167 ; X64-NEXT:    vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
    168 ; X64-NEXT:    retq
    169   %1 = load <4 x i32>, <4 x i32>* %a0
    170   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    171   ret <2 x i64> %2
    172 }
    173 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    174 
    175 define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    176 ; X32-LABEL: commute_fold_vpmacsdql:
    177 ; X32:       # %bb.0:
    178 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    179 ; X32-NEXT:    vpmacsdql %xmm1, (%eax), %xmm0, %xmm0
    180 ; X32-NEXT:    retl
    181 ;
    182 ; X64-LABEL: commute_fold_vpmacsdql:
    183 ; X64:       # %bb.0:
    184 ; X64-NEXT:    vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
    185 ; X64-NEXT:    retq
    186   %1 = load <4 x i32>, <4 x i32>* %a0
    187   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    188   ret <2 x i64> %2
    189 }
    190 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    191 
    192 define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
    193 ; X32-LABEL: commute_fold_vpmacssdd:
    194 ; X32:       # %bb.0:
    195 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    196 ; X32-NEXT:    vpmacssdd %xmm1, (%eax), %xmm0, %xmm0
    197 ; X32-NEXT:    retl
    198 ;
    199 ; X64-LABEL: commute_fold_vpmacssdd:
    200 ; X64:       # %bb.0:
    201 ; X64-NEXT:    vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
    202 ; X64-NEXT:    retq
    203   %1 = load <4 x i32>, <4 x i32>* %a0
    204   %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
    205   ret <4 x i32> %2
    206 }
    207 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    208 
    209 define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    210 ; X32-LABEL: commute_fold_vpmacssdqh:
    211 ; X32:       # %bb.0:
    212 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    213 ; X32-NEXT:    vpmacssdqh %xmm1, (%eax), %xmm0, %xmm0
    214 ; X32-NEXT:    retl
    215 ;
    216 ; X64-LABEL: commute_fold_vpmacssdqh:
    217 ; X64:       # %bb.0:
    218 ; X64-NEXT:    vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
    219 ; X64-NEXT:    retq
    220   %1 = load <4 x i32>, <4 x i32>* %a0
    221   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    222   ret <2 x i64> %2
    223 }
    224 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    225 
    226 define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    227 ; X32-LABEL: commute_fold_vpmacssdql:
    228 ; X32:       # %bb.0:
    229 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    230 ; X32-NEXT:    vpmacssdql %xmm1, (%eax), %xmm0, %xmm0
    231 ; X32-NEXT:    retl
    232 ;
    233 ; X64-LABEL: commute_fold_vpmacssdql:
    234 ; X64:       # %bb.0:
    235 ; X64-NEXT:    vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
    236 ; X64-NEXT:    retq
    237   %1 = load <4 x i32>, <4 x i32>* %a0
    238   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    239   ret <2 x i64> %2
    240 }
    241 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    242 
    243 define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    244 ; X32-LABEL: commute_fold_vpmacsswd:
    245 ; X32:       # %bb.0:
    246 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    247 ; X32-NEXT:    vpmacsswd %xmm1, (%eax), %xmm0, %xmm0
    248 ; X32-NEXT:    retl
    249 ;
    250 ; X64-LABEL: commute_fold_vpmacsswd:
    251 ; X64:       # %bb.0:
    252 ; X64-NEXT:    vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
    253 ; X64-NEXT:    retq
    254   %1 = load <8 x i16>, <8 x i16>* %a0
    255   %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    256   ret <4 x i32> %2
    257 }
    258 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    259 
    260 define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
    261 ; X32-LABEL: commute_fold_vpmacssww:
    262 ; X32:       # %bb.0:
    263 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    264 ; X32-NEXT:    vpmacssww %xmm1, (%eax), %xmm0, %xmm0
    265 ; X32-NEXT:    retl
    266 ;
    267 ; X64-LABEL: commute_fold_vpmacssww:
    268 ; X64:       # %bb.0:
    269 ; X64-NEXT:    vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
    270 ; X64-NEXT:    retq
    271   %1 = load <8 x i16>, <8 x i16>* %a0
    272   %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
    273   ret <8 x i16> %2
    274 }
    275 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    276 
    277 define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    278 ; X32-LABEL: commute_fold_vpmacswd:
    279 ; X32:       # %bb.0:
    280 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    281 ; X32-NEXT:    vpmacswd %xmm1, (%eax), %xmm0, %xmm0
    282 ; X32-NEXT:    retl
    283 ;
    284 ; X64-LABEL: commute_fold_vpmacswd:
    285 ; X64:       # %bb.0:
    286 ; X64-NEXT:    vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
    287 ; X64-NEXT:    retq
    288   %1 = load <8 x i16>, <8 x i16>* %a0
    289   %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    290   ret <4 x i32> %2
    291 }
    292 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    293 
    294 define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
    295 ; X32-LABEL: commute_fold_vpmacsww:
    296 ; X32:       # %bb.0:
    297 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    298 ; X32-NEXT:    vpmacsww %xmm1, (%eax), %xmm0, %xmm0
    299 ; X32-NEXT:    retl
    300 ;
    301 ; X64-LABEL: commute_fold_vpmacsww:
    302 ; X64:       # %bb.0:
    303 ; X64-NEXT:    vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
    304 ; X64-NEXT:    retq
    305   %1 = load <8 x i16>, <8 x i16>* %a0
    306   %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
    307   ret <8 x i16> %2
    308 }
    309 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    310 
    311 define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    312 ; X32-LABEL: commute_fold_vpmadcsswd:
    313 ; X32:       # %bb.0:
    314 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    315 ; X32-NEXT:    vpmadcsswd %xmm1, (%eax), %xmm0, %xmm0
    316 ; X32-NEXT:    retl
    317 ;
    318 ; X64-LABEL: commute_fold_vpmadcsswd:
    319 ; X64:       # %bb.0:
    320 ; X64-NEXT:    vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
    321 ; X64-NEXT:    retq
    322   %1 = load <8 x i16>, <8 x i16>* %a0
    323   %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    324   ret <4 x i32> %2
    325 }
    326 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    327 
    328 define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    329 ; X32-LABEL: commute_fold_vpmadcswd:
    330 ; X32:       # %bb.0:
    331 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    332 ; X32-NEXT:    vpmadcswd %xmm1, (%eax), %xmm0, %xmm0
    333 ; X32-NEXT:    retl
    334 ;
    335 ; X64-LABEL: commute_fold_vpmadcswd:
    336 ; X64:       # %bb.0:
    337 ; X64-NEXT:    vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
    338 ; X64-NEXT:    retq
    339   %1 = load <8 x i16>, <8 x i16>* %a0
    340   %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    341   ret <4 x i32> %2
    342 }
    343 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    344