Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
      2 
      3 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. 
      4 
      5 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
      6 ; CHECK-LABEL:       test_x86_avx_vinsertf128_pd_256_1: 
      7 ; CHECK:             vinsertf128 $1, %xmm1, %ymm0, %ymm0
      8   %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
      9   ret <4 x double> %res
     10 }
     11 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
     12 
     13 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
     14 ; CHECK-LABEL:      test_x86_avx_vinsertf128_ps_256_1: 
     15 ; CHECK:            vinsertf128 $1, %xmm1, %ymm0, %ymm0
     16   %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
     17   ret <8 x float> %res
     18 }
     19 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
     20 
     21 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
     22 ; CHECK-LABEL:    test_x86_avx_vinsertf128_si_256_1: 
     23 ; CHECK:          vinsertf128 $1, %xmm1, %ymm0, %ymm0
     24   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
     25   ret <8 x i32> %res
     26 }
     27 
     28 ; Verify that high bits of the immediate are masked off. This should be the equivalent
     29 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
     30 ; not a vinsertf128 $1.
     31 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
     32 ; CHECK-LABEL:    test_x86_avx_vinsertf128_si_256_2: 
     33 ; CHECK-NOT:      vinsertf128 $1, %xmm1, %ymm0, %ymm0
     34   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
     35   ret <8 x i32> %res
     36 }
     37 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
     38 
     39 ; We don't check any vextractf128 variant with immediate 0 because that's just a move. 
     40 
     41 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
     42 ; CHECK-LABEL:       test_x86_avx_vextractf128_pd_256_1: 
     43 ; CHECK:             vextractf128 $1, %ymm0, %xmm0
     44   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
     45   ret <2 x double> %res
     46 }
     47 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
     48 
     49 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
     50 ; CHECK-LABEL:       test_x86_avx_vextractf128_ps_256_1: 
     51 ; CHECK:             vextractf128 $1, %ymm0, %xmm0
     52   %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
     53   ret <4 x float> %res
     54 }
     55 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
     56 
     57 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
     58 ; CHECK-LABEL:    test_x86_avx_vextractf128_si_256_1: 
     59 ; CHECK:          vextractf128 $1, %ymm0, %xmm0
     60   %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
     61   ret <4 x i32> %res
     62 }
     63 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
     64 
     65 ; Verify that high bits of the immediate are masked off. This should be the equivalent
     66 ; of a vextractf128 $0 which should be optimized away, so just check that it's
     67 ; not a vextractf128 of any kind.
     68 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
     69 ; CHECK-LABEL:       test_x86_avx_extractf128_pd_256_2: 
     70 ; CHECK-NOT:         vextractf128
     71   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
     72   ret <2 x double> %res
     73 }
     74 
     75 
     76 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
     77 ; CHECK-LABEL:       test_x86_avx_blend_pd_256: 
     78 ; CHECK:             vblendpd
     79   %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
     80   ret <4 x double> %res
     81 }
     82 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
     83 
     84 
     85 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
     86 ; CHECK-LABEL:      test_x86_avx_blend_ps_256: 
     87 ; CHECK:            vblendps
     88   %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
     89   ret <8 x float> %res
     90 }
     91 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
     92 
     93 
     94 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
     95 ; CHECK-LABEL:      test_x86_avx_dp_ps_256: 
     96 ; CHECK:            vdpps
     97   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
     98   ret <8 x float> %res
     99 }
    100 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
    101 
    102 
    103 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    104 ; CHECK-LABEL:    test_x86_sse2_psll_dq: 
    105 ; CHECK:          vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    106   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    107   ret <2 x i64> %res
    108 }
    109 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    110 
    111 
    112 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    113 ; CHECK-LABEL:    test_x86_sse2_psrl_dq: 
    114 ; CHECK:          vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    115   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    116   ret <2 x i64> %res
    117 }
    118 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    119 
    120 
    121 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
    122 ; CHECK-LABEL:       test_x86_sse41_blendpd: 
    123 ; CHECK:             vblendpd
    124   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
    125   ret <2 x double> %res
    126 }
    127 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
    128 
    129 
    130 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
    131 ; CHECK-LABEL:      test_x86_sse41_blendps: 
    132 ; CHECK:            vblendps
    133   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
    134   ret <4 x float> %res
    135 }
    136 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
    137 
    138 
    139 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
    140 ; CHECK-LABEL:    test_x86_sse41_pblendw: 
    141 ; CHECK:          vpblendw
    142   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
    143   ret <8 x i16> %res
    144 }
    145 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    146 
    147 
    148 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
    149 ; CHECK-LABEL: test_x86_sse41_pmovsxbd:
    150 ; CHECK:       # BB#0:
    151 ; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
    152 ; CHECK-NEXT:    retl
    153   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    154   ret <4 x i32> %res
    155 }
    156 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
    157 
    158 
    159 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
    160 ; CHECK-LABEL: test_x86_sse41_pmovsxbq:
    161 ; CHECK:       # BB#0:
    162 ; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
    163 ; CHECK-NEXT:    retl
    164   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    165   ret <2 x i64> %res
    166 }
    167 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
    168 
    169 
    170 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
    171 ; CHECK-LABEL: test_x86_sse41_pmovsxbw:
    172 ; CHECK:       # BB#0:
    173 ; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
    174 ; CHECK-NEXT:    retl
    175   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    176   ret <8 x i16> %res
    177 }
    178 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
    179 
    180 
    181 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
    182 ; CHECK-LABEL: test_x86_sse41_pmovsxdq:
    183 ; CHECK:       # BB#0:
    184 ; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
    185 ; CHECK-NEXT:    retl
    186   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    187   ret <2 x i64> %res
    188 }
    189 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
    190 
    191 
    192 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
    193 ; CHECK-LABEL: test_x86_sse41_pmovsxwd:
    194 ; CHECK:       # BB#0:
    195 ; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
    196 ; CHECK-NEXT:    retl
    197   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    198   ret <4 x i32> %res
    199 }
    200 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
    201 
    202 
    203 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
    204 ; CHECK-LABEL: test_x86_sse41_pmovsxwq:
    205 ; CHECK:       # BB#0:
    206 ; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
    207 ; CHECK-NEXT:    retl
    208   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    209   ret <2 x i64> %res
    210 }
    211 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
    212