Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
      3 
      4 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
      5 ; CHECK-LABEL: funcA:
      6 ; CHECK:       ## BB#0: ## %entry
      7 ; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
      8 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
      9 ; CHECK-NEXT:    retq
     10 entry:
     11   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
     12   ret <32 x i8> %shuffle
     13 }
     14 
     15 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
     16 ; CHECK-LABEL: funcB:
     17 ; CHECK:       ## BB#0: ## %entry
     18 ; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
     19 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
     20 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     21 ; CHECK-NEXT:    retq
     22 entry:
     23   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
     24   ret <16 x i16> %shuffle
     25 }
     26 
     27 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
     28 ; CHECK-LABEL: funcC:
     29 ; CHECK:       ## BB#0: ## %entry
     30 ; CHECK-NEXT:    vmovq %rdi, %xmm0
     31 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     32 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     33 ; CHECK-NEXT:    retq
     34 entry:
     35   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     36   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     37   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     38   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     39   ret <4 x i64> %vecinit6.i
     40 }
     41 
     42 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
     43 ; CHECK-LABEL: funcD:
     44 ; CHECK:       ## BB#0: ## %entry
     45 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     46 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     47 ; CHECK-NEXT:    retq
     48 entry:
     49   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
     50   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
     51   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
     52   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
     53   ret <4 x double> %vecinit6.i
     54 }
     55 
     56 ; Test this turns into a broadcast:
     57 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
     58 ;
     59 define <8 x float> @funcE() nounwind {
     60 ; CHECK-LABEL: funcE:
     61 ; CHECK:       ## BB#0: ## %for_exit499
     62 ; CHECK-NEXT:    xorl %eax, %eax
     63 ; CHECK-NEXT:    ## implicit-def: %YMM0
     64 ; CHECK-NEXT:    testb %al, %al
     65 ; CHECK-NEXT:    jne LBB4_2
     66 ; CHECK-NEXT:  ## BB#1: ## %load.i1247
     67 ; CHECK-NEXT:    pushq %rbp
     68 ; CHECK-NEXT:    movq %rsp, %rbp
     69 ; CHECK-NEXT:    andq $-32, %rsp
     70 ; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
     71 ; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
     72 ; CHECK-NEXT:    movq %rbp, %rsp
     73 ; CHECK-NEXT:    popq %rbp
     74 ; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
     75 ; CHECK-NEXT:    retq
     76 allocas:
     77   %udx495 = alloca [18 x [18 x float]], align 32
     78   br label %for_test505.preheader
     79 
     80 for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
     81   br i1 undef, label %for_exit499, label %for_test505.preheader
     82 
     83 for_exit499:                                      ; preds = %for_test505.preheader
     84   br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
     85 
     86 load.i1247:                                       ; preds = %for_exit499
     87   %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
     88   %ptr.i1237 = bitcast float* %ptr1227 to i32*
     89   %val.i1238 = load i32, i32* %ptr.i1237, align 4
     90   %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
     91   %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
     92   %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
     93   br label %__load_and_broadcast_32.exit1249
     94 
     95 __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
     96   %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
     97   ret <8 x float> %load_broadcast12281250
     98 }
     99 
    100 define <8 x float> @funcF(i32 %val) nounwind {
    101 ; CHECK-LABEL: funcF:
    102 ; CHECK:       ## BB#0:
    103 ; CHECK-NEXT:    vmovd %edi, %xmm0
    104 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
    105 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    106 ; CHECK-NEXT:    retq
    107   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
    108   %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
    109   %tmp = bitcast <8 x i32> %ret7 to <8 x float>
    110   ret <8 x float> %tmp
    111 }
    112 
    113 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
    114 ; CHECK-LABEL: funcG:
    115 ; CHECK:       ## BB#0: ## %entry
    116 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    117 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    118 ; CHECK-NEXT:    retq
    119 entry:
    120   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    121   ret <8 x float> %shuffle
    122 }
    123 
    124 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
    125 ; CHECK-LABEL: funcH:
    126 ; CHECK:       ## BB#0: ## %entry
    127 ; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
    128 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    129 ; CHECK-NEXT:    retq
    130 entry:
    131   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    132   ret <8 x float> %shuffle
    133 }
    134 
    135 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
    136 ; CHECK-LABEL: splat_load_2f64_11:
    137 ; CHECK:       ## BB#0:
    138 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    139 ; CHECK-NEXT:    retq
    140   %x = load <2 x double>, <2 x double>* %ptr
    141   %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    142   ret <2 x double> %x1
    143 }
    144 
    145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
    146 ; CHECK-LABEL: splat_load_4f64_2222:
    147 ; CHECK:       ## BB#0:
    148 ; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    149 ; CHECK-NEXT:    retq
    150   %x = load <4 x double>, <4 x double>* %ptr
    151   %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    152   ret <4 x double> %x1
    153 }
    154 
    155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
    156 ; CHECK-LABEL: splat_load_4f32_0000:
    157 ; CHECK:       ## BB#0:
    158 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
    159 ; CHECK-NEXT:    retq
    160   %x = load <4 x float>, <4 x float>* %ptr
    161   %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    162   ret <4 x float> %x1
    163 }
    164 
    165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
    166 ; CHECK-LABEL: splat_load_8f32_77777777:
    167 ; CHECK:       ## BB#0:
    168 ; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
    169 ; CHECK-NEXT:    retq
    170   %x = load <8 x float>, <8 x float>* %ptr
    171   %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
    172   ret <8 x float> %x1
    173 }
    174