Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
      2 
      3 define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
      4   ;CHECK-LABEL: commute_fold_vpcomb
      5   ;CHECK:       vpcomgtb (%rdi), %xmm0, %xmm0
      6   %1 = load <16 x i8>, <16 x i8>* %a0
      7   %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
      8   ret <16 x i8> %2
      9 }
     10 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
     11 
     12 define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
     13   ;CHECK-LABEL: commute_fold_vpcomd
     14   ;CHECK:       vpcomged (%rdi), %xmm0, %xmm0
     15   %1 = load <4 x i32>, <4 x i32>* %a0
     16   %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
     17   ret <4 x i32> %2
     18 }
     19 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
     20 
     21 define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
     22   ;CHECK-LABEL: commute_fold_vpcomq
     23   ;CHECK:       vpcomltq (%rdi), %xmm0, %xmm0
     24   %1 = load <2 x i64>, <2 x i64>* %a0
     25   %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
     26   ret <2 x i64> %2
     27 }
     28 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
     29 
     30 define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
     31   ;CHECK-LABEL: commute_fold_vpcomub
     32   ;CHECK:       vpcomleub (%rdi), %xmm0, %xmm0
     33   %1 = load <16 x i8>, <16 x i8>* %a0
     34   %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
     35   ret <16 x i8> %2
     36 }
     37 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
     38 
     39 define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
     40   ;CHECK-LABEL: commute_fold_vpcomud
     41   ;CHECK:       vpcomequd (%rdi), %xmm0, %xmm0
     42   %1 = load <4 x i32>, <4 x i32>* %a0
     43   %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
     44   ret <4 x i32> %2
     45 }
     46 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
     47 
     48 define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
     49   ;CHECK-LABEL: commute_fold_vpcomuq
     50   ;CHECK:       vpcomnequq (%rdi), %xmm0, %xmm0
     51   %1 = load <2 x i64>, <2 x i64>* %a0
     52   %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
     53   ret <2 x i64> %2
     54 }
     55 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
     56 
     57 define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
     58   ;CHECK-LABEL: commute_fold_vpcomuw
     59   ;CHECK:       vpcomfalseuw (%rdi), %xmm0, %xmm0
     60   %1 = load <8 x i16>, <8 x i16>* %a0
     61   %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
     62   ret <8 x i16> %2
     63 }
     64 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
     65 
     66 define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
     67   ;CHECK-LABEL: commute_fold_vpcomw
     68   ;CHECK:       vpcomtruew (%rdi), %xmm0, %xmm0
     69   %1 = load <8 x i16>, <8 x i16>* %a0
     70   %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
     71   ret <8 x i16> %2
     72 }
     73 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
     74 
     75 define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
     76   ;CHECK-LABEL: commute_fold_vpmacsdd
     77   ;CHECK:       vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
     78   %1 = load <4 x i32>, <4 x i32>* %a0
     79   %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
     80   ret <4 x i32> %2
     81 }
     82 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
     83 
     84 define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
     85   ;CHECK-LABEL: commute_fold_vpmacsdqh
     86   ;CHECK:       vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
     87   %1 = load <4 x i32>, <4 x i32>* %a0
     88   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
     89   ret <2 x i64> %2
     90 }
     91 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
     92 
     93 define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
     94   ;CHECK-LABEL: commute_fold_vpmacsdql
     95   ;CHECK:       vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
     96   %1 = load <4 x i32>, <4 x i32>* %a0
     97   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
     98   ret <2 x i64> %2
     99 }
    100 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    101 
    102 define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
    103   ;CHECK-LABEL: commute_fold_vpmacssdd
    104   ;CHECK:       vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
    105   %1 = load <4 x i32>, <4 x i32>* %a0
    106   %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
    107   ret <4 x i32> %2
    108 }
    109 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    110 
    111 define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    112   ;CHECK-LABEL: commute_fold_vpmacssdqh
    113   ;CHECK:       vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
    114   %1 = load <4 x i32>, <4 x i32>* %a0
    115   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    116   ret <2 x i64> %2
    117 }
    118 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    119 
    120 define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
    121   ;CHECK-LABEL: commute_fold_vpmacssdql
    122   ;CHECK:       vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
    123   %1 = load <4 x i32>, <4 x i32>* %a0
    124   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
    125   ret <2 x i64> %2
    126 }
    127 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    128 
    129 define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    130   ;CHECK-LABEL: commute_fold_vpmacsswd
    131   ;CHECK:       vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
    132   %1 = load <8 x i16>, <8 x i16>* %a0
    133   %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    134   ret <4 x i32> %2
    135 }
    136 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    137 
    138 define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
    139   ;CHECK-LABEL: commute_fold_vpmacssww
    140   ;CHECK:       vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
    141   %1 = load <8 x i16>, <8 x i16>* %a0
    142   %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
    143   ret <8 x i16> %2
    144 }
    145 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    146 
    147 define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    148   ;CHECK-LABEL: commute_fold_vpmacswd
    149   ;CHECK:       vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
    150   %1 = load <8 x i16>, <8 x i16>* %a0
    151   %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    152   ret <4 x i32> %2
    153 }
    154 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    155 
    156 define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
    157   ;CHECK-LABEL: commute_fold_vpmacsww
    158   ;CHECK:       vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
    159   %1 = load <8 x i16>, <8 x i16>* %a0
    160   %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
    161   ret <8 x i16> %2
    162 }
    163 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    164 
    165 define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    166   ;CHECK-LABEL: commute_fold_vpmadcsswd
    167   ;CHECK:       vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
    168   %1 = load <8 x i16>, <8 x i16>* %a0
    169   %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    170   ret <4 x i32> %2
    171 }
    172 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    173 
    174 define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
    175   ;CHECK-LABEL: commute_fold_vpmadcswd
    176   ;CHECK:       vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
    177   %1 = load <8 x i16>, <8 x i16>* %a0
    178   %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
    179   ret <4 x i32> %2
    180 }
    181 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    182 
    183 
    184 
    185