Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
      4 
      5 ; Make sure that we generate non-temporal stores for the test cases below.
      6 ; We use xorps for zeroing, so domain information isn't available anymore.
      7 
      8 define void @test_zero_v4f32(<4 x float>* %dst) {
      9 ; CHECK-LABEL: test_zero_v4f32:
     10 ; SSE: movntps
     11 ; AVX: vmovntps
     12   store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
     13   ret void
     14 }
     15 
     16 define void @test_zero_v4i32(<4 x i32>* %dst) {
     17 ; CHECK-LABEL: test_zero_v4i32:
     18 ; SSE: movntps
     19 ; AVX: vmovntps
     20   store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
     21   ret void
     22 }
     23 
     24 define void @test_zero_v2f64(<2 x double>* %dst) {
     25 ; CHECK-LABEL: test_zero_v2f64:
     26 ; SSE: movntps
     27 ; AVX: vmovntps
     28   store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
     29   ret void
     30 }
     31 
     32 define void @test_zero_v2i64(<2 x i64>* %dst) {
     33 ; CHECK-LABEL: test_zero_v2i64:
     34 ; SSE: movntps
     35 ; AVX: vmovntps
     36   store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
     37   ret void
     38 }
     39 
     40 define void @test_zero_v8i16(<8 x i16>* %dst) {
     41 ; CHECK-LABEL: test_zero_v8i16:
     42 ; SSE: movntps
     43 ; AVX: vmovntps
     44   store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
     45   ret void
     46 }
     47 
     48 define void @test_zero_v16i8(<16 x i8>* %dst) {
     49 ; CHECK-LABEL: test_zero_v16i8:
     50 ; SSE: movntps
     51 ; AVX: vmovntps
     52   store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
     53   ret void
     54 }
     55 
     56 ; And now YMM versions.
     57 
     58 define void @test_zero_v8f32(<8 x float>* %dst) {
     59 ; CHECK-LABEL: test_zero_v8f32:
     60 ; AVX: vmovntps %ymm
     61   store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
     62   ret void
     63 }
     64 
     65 define void @test_zero_v8i32(<8 x i32>* %dst) {
     66 ; CHECK-LABEL: test_zero_v8i32:
     67 ; AVX2: vmovntps %ymm
     68   store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
     69   ret void
     70 }
     71 
     72 define void @test_zero_v4f64(<4 x double>* %dst) {
     73 ; CHECK-LABEL: test_zero_v4f64:
     74 ; AVX: vmovntps %ymm
     75   store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
     76   ret void
     77 }
     78 
     79 define void @test_zero_v4i64(<4 x i64>* %dst) {
     80 ; CHECK-LABEL: test_zero_v4i64:
     81 ; AVX2: vmovntps %ymm
     82   store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
     83   ret void
     84 }
     85 
     86 define void @test_zero_v16i16(<16 x i16>* %dst) {
     87 ; CHECK-LABEL: test_zero_v16i16:
     88 ; AVX2: vmovntps %ymm
     89   store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
     90   ret void
     91 }
     92 
     93 define void @test_zero_v32i8(<32 x i8>* %dst) {
     94 ; CHECK-LABEL: test_zero_v32i8:
     95 ; AVX2: vmovntps %ymm
     96   store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
     97   ret void
     98 }
     99 
    100 
    101 ; Check that we also handle arguments.  Here the type survives longer.
    102 
    103 define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
    104 ; CHECK-LABEL: test_arg_v4f32:
    105 ; SSE: movntps
    106 ; AVX: vmovntps
    107   store <4 x float> %arg, <4 x float>* %dst, align 16, !nontemporal !1
    108   ret void
    109 }
    110 
    111 define void @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %dst) {
    112 ; CHECK-LABEL: test_arg_v4i32:
    113 ; SSE: movntps
    114 ; AVX: vmovntps
    115   store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1
    116   ret void
    117 }
    118 
    119 define void @test_arg_v2f64(<2 x double> %arg, <2 x double>* %dst) {
    120 ; CHECK-LABEL: test_arg_v2f64:
    121 ; SSE: movntps
    122 ; AVX: vmovntps
    123   store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1
    124   ret void
    125 }
    126 
    127 define void @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %dst) {
    128 ; CHECK-LABEL: test_arg_v2i64:
    129 ; SSE: movntps
    130 ; AVX: vmovntps
    131   store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1
    132   ret void
    133 }
    134 
    135 define void @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %dst) {
    136 ; CHECK-LABEL: test_arg_v8i16:
    137 ; SSE: movntps
    138 ; AVX: vmovntps
    139   store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1
    140   ret void
    141 }
    142 
    143 define void @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %dst) {
    144 ; CHECK-LABEL: test_arg_v16i8:
    145 ; SSE: movntps
    146 ; AVX: vmovntps
    147   store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1
    148   ret void
    149 }
    150 
    151 ; And now YMM versions.
    152 
    153 define void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) {
    154 ; CHECK-LABEL: test_arg_v8f32:
    155 ; AVX: vmovntps %ymm
    156   store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1
    157   ret void
    158 }
    159 
    160 define void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) {
    161 ; CHECK-LABEL: test_arg_v8i32:
    162 ; AVX2: vmovntps %ymm
    163   store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1
    164   ret void
    165 }
    166 
    167 define void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) {
    168 ; CHECK-LABEL: test_arg_v4f64:
    169 ; AVX: vmovntps %ymm
    170   store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1
    171   ret void
    172 }
    173 
    174 define void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) {
    175 ; CHECK-LABEL: test_arg_v4i64:
    176 ; AVX2: vmovntps %ymm
    177   store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1
    178   ret void
    179 }
    180 
    181 define void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) {
    182 ; CHECK-LABEL: test_arg_v16i16:
    183 ; AVX2: vmovntps %ymm
    184   store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1
    185   ret void
    186 }
    187 
    188 define void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) {
    189 ; CHECK-LABEL: test_arg_v32i8:
    190 ; AVX2: vmovntps %ymm
    191   store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1
    192   ret void
    193 }
    194 
    195 
    196 ; Now check that if the execution domain is trivially visible, we use it.
    197 ; We use an add to make the type survive all the way to the MOVNT.
    198 
    199 define void @test_op_v4f32(<4 x float> %a, <4 x float> %b, <4 x float>* %dst) {
    200 ; CHECK-LABEL: test_op_v4f32:
    201 ; SSE: movntps
    202 ; AVX: vmovntps
    203   %r = fadd <4 x float> %a, %b
    204   store <4 x float> %r, <4 x float>* %dst, align 16, !nontemporal !1
    205   ret void
    206 }
    207 
    208 define void @test_op_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32>* %dst) {
    209 ; CHECK-LABEL: test_op_v4i32:
    210 ; SSE: movntdq
    211 ; AVX: vmovntdq
    212   %r = add <4 x i32> %a, %b
    213   store <4 x i32> %r, <4 x i32>* %dst, align 16, !nontemporal !1
    214   ret void
    215 }
    216 
    217 define void @test_op_v2f64(<2 x double> %a, <2 x double> %b, <2 x double>* %dst) {
    218 ; CHECK-LABEL: test_op_v2f64:
    219 ; SSE: movntpd
    220 ; AVX: vmovntpd
    221   %r = fadd <2 x double> %a, %b
    222   store <2 x double> %r, <2 x double>* %dst, align 16, !nontemporal !1
    223   ret void
    224 }
    225 
    226 define void @test_op_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %dst) {
    227 ; CHECK-LABEL: test_op_v2i64:
    228 ; SSE: movntdq
    229 ; AVX: vmovntdq
    230   %r = add <2 x i64> %a, %b
    231   store <2 x i64> %r, <2 x i64>* %dst, align 16, !nontemporal !1
    232   ret void
    233 }
    234 
    235 define void @test_op_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16>* %dst) {
    236 ; CHECK-LABEL: test_op_v8i16:
    237 ; SSE: movntdq
    238 ; AVX: vmovntdq
    239   %r = add <8 x i16> %a, %b
    240   store <8 x i16> %r, <8 x i16>* %dst, align 16, !nontemporal !1
    241   ret void
    242 }
    243 
    244 define void @test_op_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8>* %dst) {
    245 ; CHECK-LABEL: test_op_v16i8:
    246 ; SSE: movntdq
    247 ; AVX: vmovntdq
    248   %r = add <16 x i8> %a, %b
    249   store <16 x i8> %r, <16 x i8>* %dst, align 16, !nontemporal !1
    250   ret void
    251 }
    252 
    253 ; And now YMM versions.
    254 
    255 define void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
    256 ; CHECK-LABEL: test_op_v8f32:
    257 ; AVX: vmovntps %ymm
    258   %r = fadd <8 x float> %a, %b
    259   store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1
    260   ret void
    261 }
    262 
    263 define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
    264 ; CHECK-LABEL: test_op_v8i32:
    265 ; AVX2: vmovntdq %ymm
    266   %r = add <8 x i32> %a, %b
    267   store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1
    268   ret void
    269 }
    270 
    271 define void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) {
    272 ; CHECK-LABEL: test_op_v4f64:
    273 ; AVX: vmovntpd %ymm
    274   %r = fadd <4 x double> %a, %b
    275   store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1
    276   ret void
    277 }
    278 
    279 define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
    280 ; CHECK-LABEL: test_op_v4i64:
    281 ; AVX2: vmovntdq %ymm
    282   %r = add <4 x i64> %a, %b
    283   store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1
    284   ret void
    285 }
    286 
    287 define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
    288 ; CHECK-LABEL: test_op_v16i16:
    289 ; AVX2: vmovntdq %ymm
    290   %r = add <16 x i16> %a, %b
    291   store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1
    292   ret void
    293 }
    294 
    295 define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
    296 ; CHECK-LABEL: test_op_v32i8:
    297 ; AVX2: vmovntdq %ymm
    298   %r = add <32 x i8> %a, %b
    299   store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1
    300   ret void
    301 }
    302 
    303 !1 = !{i32 1}
    304