Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @v_movi8() nounwind {
      4 ;CHECK: v_movi8:
      5 ;CHECK: vmov.i8 d{{.*}}, #0x8
      6 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
      7 }
      8 
      9 define <4 x i16> @v_movi16a() nounwind {
     10 ;CHECK: v_movi16a:
     11 ;CHECK: vmov.i16 d{{.*}}, #0x10
     12 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
     13 }
     14 
     15 define <4 x i16> @v_movi16b() nounwind {
     16 ;CHECK: v_movi16b:
     17 ;CHECK: vmov.i16 d{{.*}}, #0x1000
     18 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
     19 }
     20 
     21 define <4 x i16> @v_mvni16a() nounwind {
     22 ;CHECK: v_mvni16a:
     23 ;CHECK: vmvn.i16 d{{.*}}, #0x10
     24 	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
     25 }
     26 
     27 define <4 x i16> @v_mvni16b() nounwind {
     28 ;CHECK: v_mvni16b:
     29 ;CHECK: vmvn.i16 d{{.*}}, #0x1000
     30 	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
     31 }
     32 
     33 define <2 x i32> @v_movi32a() nounwind {
     34 ;CHECK: v_movi32a:
     35 ;CHECK: vmov.i32 d{{.*}}, #0x20
     36 	ret <2 x i32> < i32 32, i32 32 >
     37 }
     38 
     39 define <2 x i32> @v_movi32b() nounwind {
     40 ;CHECK: v_movi32b:
     41 ;CHECK: vmov.i32 d{{.*}}, #0x2000
     42 	ret <2 x i32> < i32 8192, i32 8192 >
     43 }
     44 
     45 define <2 x i32> @v_movi32c() nounwind {
     46 ;CHECK: v_movi32c:
     47 ;CHECK: vmov.i32 d{{.*}}, #0x200000
     48 	ret <2 x i32> < i32 2097152, i32 2097152 >
     49 }
     50 
     51 define <2 x i32> @v_movi32d() nounwind {
     52 ;CHECK: v_movi32d:
     53 ;CHECK: vmov.i32 d{{.*}}, #0x20000000
     54 	ret <2 x i32> < i32 536870912, i32 536870912 >
     55 }
     56 
     57 define <2 x i32> @v_movi32e() nounwind {
     58 ;CHECK: v_movi32e:
     59 ;CHECK: vmov.i32 d{{.*}}, #0x20ff
     60 	ret <2 x i32> < i32 8447, i32 8447 >
     61 }
     62 
     63 define <2 x i32> @v_movi32f() nounwind {
     64 ;CHECK: v_movi32f:
     65 ;CHECK: vmov.i32 d{{.*}}, #0x20ffff
     66 	ret <2 x i32> < i32 2162687, i32 2162687 >
     67 }
     68 
     69 define <2 x i32> @v_mvni32a() nounwind {
     70 ;CHECK: v_mvni32a:
     71 ;CHECK: vmvn.i32 d{{.*}}, #0x20
     72 	ret <2 x i32> < i32 4294967263, i32 4294967263 >
     73 }
     74 
     75 define <2 x i32> @v_mvni32b() nounwind {
     76 ;CHECK: v_mvni32b:
     77 ;CHECK: vmvn.i32 d{{.*}}, #0x2000
     78 	ret <2 x i32> < i32 4294959103, i32 4294959103 >
     79 }
     80 
     81 define <2 x i32> @v_mvni32c() nounwind {
     82 ;CHECK: v_mvni32c:
     83 ;CHECK: vmvn.i32 d{{.*}}, #0x200000
     84 	ret <2 x i32> < i32 4292870143, i32 4292870143 >
     85 }
     86 
     87 define <2 x i32> @v_mvni32d() nounwind {
     88 ;CHECK: v_mvni32d:
     89 ;CHECK: vmvn.i32 d{{.*}}, #0x20000000
     90 	ret <2 x i32> < i32 3758096383, i32 3758096383 >
     91 }
     92 
     93 define <2 x i32> @v_mvni32e() nounwind {
     94 ;CHECK: v_mvni32e:
     95 ;CHECK: vmvn.i32 d{{.*}}, #0x20ff
     96 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
     97 }
     98 
     99 define <2 x i32> @v_mvni32f() nounwind {
    100 ;CHECK: v_mvni32f:
    101 ;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
    102 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
    103 }
    104 
    105 define <1 x i64> @v_movi64() nounwind {
    106 ;CHECK: v_movi64:
    107 ;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
    108 	ret <1 x i64> < i64 18374687574888349695 >
    109 }
    110 
    111 define <16 x i8> @v_movQi8() nounwind {
    112 ;CHECK: v_movQi8:
    113 ;CHECK: vmov.i8 q{{.*}}, #0x8
    114 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
    115 }
    116 
    117 define <8 x i16> @v_movQi16a() nounwind {
    118 ;CHECK: v_movQi16a:
    119 ;CHECK: vmov.i16 q{{.*}}, #0x10
    120 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
    121 }
    122 
    123 define <8 x i16> @v_movQi16b() nounwind {
    124 ;CHECK: v_movQi16b:
    125 ;CHECK: vmov.i16 q{{.*}}, #0x1000
    126 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
    127 }
    128 
    129 define <4 x i32> @v_movQi32a() nounwind {
    130 ;CHECK: v_movQi32a:
    131 ;CHECK: vmov.i32 q{{.*}}, #0x20
    132 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
    133 }
    134 
    135 define <4 x i32> @v_movQi32b() nounwind {
    136 ;CHECK: v_movQi32b:
    137 ;CHECK: vmov.i32 q{{.*}}, #0x2000
    138 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
    139 }
    140 
    141 define <4 x i32> @v_movQi32c() nounwind {
    142 ;CHECK: v_movQi32c:
    143 ;CHECK: vmov.i32 q{{.*}}, #0x200000
    144 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
    145 }
    146 
    147 define <4 x i32> @v_movQi32d() nounwind {
    148 ;CHECK: v_movQi32d:
    149 ;CHECK: vmov.i32 q{{.*}}, #0x20000000
    150 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
    151 }
    152 
    153 define <4 x i32> @v_movQi32e() nounwind {
    154 ;CHECK: v_movQi32e:
    155 ;CHECK: vmov.i32 q{{.*}}, #0x20ff
    156 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
    157 }
    158 
    159 define <4 x i32> @v_movQi32f() nounwind {
    160 ;CHECK: v_movQi32f:
    161 ;CHECK: vmov.i32 q{{.*}}, #0x20ffff
    162 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
    163 }
    164 
    165 define <2 x i64> @v_movQi64() nounwind {
    166 ;CHECK: v_movQi64:
    167 ;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
    168 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
    169 }
    170 
    171 ; Check for correct assembler printing for immediate values.
    172 %struct.int8x8_t = type { <8 x i8> }
    173 define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
    174 entry:
    175 ;CHECK: vdupn128:
    176 ;CHECK: vmov.i8 d{{.*}}, #0x80
    177   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
    178   store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
    179   ret void
    180 }
    181 
    182 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
    183 entry:
    184 ;CHECK: vdupnneg75:
    185 ;CHECK: vmov.i8 d{{.*}}, #0xb5
    186   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
    187   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
    188   ret void
    189 }
    190 
    191 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
    192 ;CHECK: vmovls8:
    193 ;CHECK: vmovl.s8
    194 	%tmp1 = load <8 x i8>* %A
    195 	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
    196 	ret <8 x i16> %tmp2
    197 }
    198 
    199 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
    200 ;CHECK: vmovls16:
    201 ;CHECK: vmovl.s16
    202 	%tmp1 = load <4 x i16>* %A
    203 	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
    204 	ret <4 x i32> %tmp2
    205 }
    206 
    207 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
    208 ;CHECK: vmovls32:
    209 ;CHECK: vmovl.s32
    210 	%tmp1 = load <2 x i32>* %A
    211 	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
    212 	ret <2 x i64> %tmp2
    213 }
    214 
    215 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
    216 ;CHECK: vmovlu8:
    217 ;CHECK: vmovl.u8
    218 	%tmp1 = load <8 x i8>* %A
    219 	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
    220 	ret <8 x i16> %tmp2
    221 }
    222 
    223 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
    224 ;CHECK: vmovlu16:
    225 ;CHECK: vmovl.u16
    226 	%tmp1 = load <4 x i16>* %A
    227 	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
    228 	ret <4 x i32> %tmp2
    229 }
    230 
    231 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
    232 ;CHECK: vmovlu32:
    233 ;CHECK: vmovl.u32
    234 	%tmp1 = load <2 x i32>* %A
    235 	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
    236 	ret <2 x i64> %tmp2
    237 }
    238 
    239 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
    240 ;CHECK: vmovni16:
    241 ;CHECK: vmovn.i16
    242 	%tmp1 = load <8 x i16>* %A
    243 	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
    244 	ret <8 x i8> %tmp2
    245 }
    246 
    247 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
    248 ;CHECK: vmovni32:
    249 ;CHECK: vmovn.i32
    250 	%tmp1 = load <4 x i32>* %A
    251 	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
    252 	ret <4 x i16> %tmp2
    253 }
    254 
    255 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
    256 ;CHECK: vmovni64:
    257 ;CHECK: vmovn.i64
    258 	%tmp1 = load <2 x i64>* %A
    259 	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
    260 	ret <2 x i32> %tmp2
    261 }
    262 
    263 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
    264 ;CHECK: vqmovns16:
    265 ;CHECK: vqmovn.s16
    266 	%tmp1 = load <8 x i16>* %A
    267 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
    268 	ret <8 x i8> %tmp2
    269 }
    270 
    271 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
    272 ;CHECK: vqmovns32:
    273 ;CHECK: vqmovn.s32
    274 	%tmp1 = load <4 x i32>* %A
    275 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
    276 	ret <4 x i16> %tmp2
    277 }
    278 
    279 define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
    280 ;CHECK: vqmovns64:
    281 ;CHECK: vqmovn.s64
    282 	%tmp1 = load <2 x i64>* %A
    283 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
    284 	ret <2 x i32> %tmp2
    285 }
    286 
    287 define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
    288 ;CHECK: vqmovnu16:
    289 ;CHECK: vqmovn.u16
    290 	%tmp1 = load <8 x i16>* %A
    291 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
    292 	ret <8 x i8> %tmp2
    293 }
    294 
    295 define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
    296 ;CHECK: vqmovnu32:
    297 ;CHECK: vqmovn.u32
    298 	%tmp1 = load <4 x i32>* %A
    299 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
    300 	ret <4 x i16> %tmp2
    301 }
    302 
    303 define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
    304 ;CHECK: vqmovnu64:
    305 ;CHECK: vqmovn.u64
    306 	%tmp1 = load <2 x i64>* %A
    307 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
    308 	ret <2 x i32> %tmp2
    309 }
    310 
    311 define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
    312 ;CHECK: vqmovuns16:
    313 ;CHECK: vqmovun.s16
    314 	%tmp1 = load <8 x i16>* %A
    315 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
    316 	ret <8 x i8> %tmp2
    317 }
    318 
    319 define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
    320 ;CHECK: vqmovuns32:
    321 ;CHECK: vqmovun.s32
    322 	%tmp1 = load <4 x i32>* %A
    323 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
    324 	ret <4 x i16> %tmp2
    325 }
    326 
    327 define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
    328 ;CHECK: vqmovuns64:
    329 ;CHECK: vqmovun.s64
    330 	%tmp1 = load <2 x i64>* %A
    331 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
    332 	ret <2 x i32> %tmp2
    333 }
    334 
    335 declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
    336 declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
    337 declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
    338 
    339 declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
    340 declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
    341 declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
    342 
    343 declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
    344 declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
    345 declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
    346 
    347 ; Truncating vector stores are not supported.  The following should not crash.
    348 ; Radar 8598391.
    349 define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
    350 ;CHECK: vmovn
    351   %tmp1 = load <4 x i32>* %a, align 16
    352   %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
    353   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
    354   ret void
    355 }
    356 
    357 ; Use vmov.f32 to materialize f32 immediate splats
    358 ; rdar://10437054
    359 define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
    360 entry:
    361 ;CHECK: v_mov_v2f32:
    362 ;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
    363   store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
    364   ret void
    365 }
    366 
    367 define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
    368 entry:
    369 ;CHECK: v_mov_v4f32:
    370 ;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
    371   store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
    372   ret void
    373 }
    374 
    375 define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
    376 entry:
    377 ;CHECK: v_mov_v4f32_undef:
    378 ;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
    379   %a = load <4 x float> *%p
    380   %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
    381   store <4 x float> %b, <4 x float> *%p
    382   ret void
    383 }
    384 
    385 ; Vector any_extends must be selected as either vmovl.u or vmovl.s.
    386 ; rdar://10723651
    387 define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
    388 entry:
    389 ;CHECK: any_extend
    390 ;CHECK: vmovl
    391   %and.i186 = zext <4 x i1> %x to <4 x i32>
    392   %add.i185 = sub <4 x i32> %and.i186, %y
    393   %sub.i = sub <4 x i32> %add.i185, zeroinitializer
    394   %add.i = add <4 x i32> %sub.i, zeroinitializer
    395   %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
    396   tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
    397   unreachable
    398 }
    399 
    400 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
    401